企业🤖AI智能体构建引擎,智能编排和调试,一键部署,支持私有化部署方案 广告
爬取数据: ``` # -*- codeing = utf-8 -*- from bs4 import BeautifulSoup # 网页解析,获取数据 import re # 正则表达式,进行文字匹配` import urllib.request, urllib.error # 制定URL,获取网页数据 import xlwt # 进行excel操作 from Selenium4R import Chrome from selenium import webdriver import time #import sqlite3 # 进行SQLite数据库操作 findLink = re.compile(r'<span class="job-name"><a href="(.*?)" target="_blank">') # 创建正则表达式对象,标售规则 影片详情链接的规则 findImgSrc = re.compile(r'<div class="company-text">(.*)</div>',re.S) #公司名称 findTitle = re.compile(r'<em class="vline"></em>(.*)</p>') #学历要求 findRating = re.compile(r'</a><em class="vline"></em>(.*)<em class="vline"></em>') #公司人员估值 findJudge = re.compile(r'<div class="info-desc">(.*)</div>') #福利 findInq = re.compile(r'<span class="red">(.*)</span>') #薪资 findarea = re.compile(r'<span class="job-area">(.*)</span>') #公司地点 findname = re.compile(r'<span class="job-name">(.*)</span>') #岗位名称 findjyan = re.compile(r'<p>(.*)<em class="vline"></em>') #工作经验要求 # findBd = re.compile(r'<p class="">(.*?)</p>', re.S) # findImgSrc = re.compile(r'<img.*src="(.*?)"', re.S) # re.S的意思是如果不使用re.S参数,则只在每一行内进行匹配,如果一行没有,就换下一行重新开始,不会跨行。而使用re.S参数以后,正则表达式会将这个字符串作为一个整体,将“\n”当做一个普通的字符加入到这个字符串中,在整体中进行匹配 # r的意思是内容是字符串 # re.sub 该函数主要用于替换字符串中的匹配项。从上面的代码中可以看到re.sub()方法中含有5个参数,下面进行一一说明(加粗的为必须参数):(1)pattern:该参数表示正则中的模式字符串;(2)repl:该参数表示要替换的字符串(即匹配到pattern后替换为repl),也可以是个函数;(3)string:该参数表示要被处理(查找替换)的原始字符串; # Python strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符。 def main(): baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的网页链接 # 1.爬取网页 datalist = getData(baseurl) time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time())) savepath = "boss直聘爬虫岗"+time_stamp+".xls" #当前目录新建XLS,存储进去 # dbpath = "movie.db" #当前目录新建数据库,存储进去 # 3.保存数据 saveData(datalist,savepath) #2种存储方式可以只选择一种 # saveData2DB(datalist,dbpath) # 爬取网页 def getData(baseurl): datalist = [] #用来存储爬取的网页信息 for i in range(0, 3): # 调用获取页面信息的函数,10次 url = baseurl + str(i + 1) + '&ka=page-' + str(i + 1) # html = askURL(url) # 保存获取到的网页源码 # driver = Chrome(cache_path=r"E:\Temp") driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1" driver.get(url) time.sleep(8) # js = "window.open("+url+")" # driver.execute_script(js) html = driver.page_source # 2.逐一解析数据 soup = BeautifulSoup(html, "html.parser") for item in soup.find_all('div', class_="job-primary"): # 查找符合要求的字符串 data = [] # 保存一部电影所有信息 item = str(item) link = re.findall(findInq, item)[0] # 通过正则表达式查找 linka = re.findall(findTitle, item)[0] # 通过正则表达式查找 # linka = re.sub('[A-Za-z]',"", linka) # linka = re.sub('[\s+\.\!\/_,$%^*(+\"|<>]+',"", linka) regex_str = ".*?([\u4E00-\u9FA5]+).*?" linka = re.findall(regex_str, linka) data.append(link) data.append(linka) linkb = re.findall(findjyan, item)[0] data.append(linkb) linkc = re.findall(findarea, item)[0] data.append(linkc) linkd = re.findall(findname, item)[0] data.append(linkd) imgSrc = re.findall(findImgSrc, item)[0] # imgtest = re.compile(r'<h3 class="name"></h3>',re.S) imgSrc = re.findall('target="_blank">(.*)</a></h3>', imgSrc) data.append(imgSrc) # titles = re.findall(findTitle, item) # if (len(titles) == 2): # ctitle = titles[0] # data.append(ctitle) # otitle = titles[1].replace("/", "") #消除转义字符 # data.append(otitle) # else: # data.append(titles[0]) # data.append(' ') # rating = re.findall(findRating, item)[0] # data.append(rating) # judgeNum = re.findall(findJudge, item)[0] # data.append(judgeNum) # inq = re.findall(findInq, item) # if len(inq) != 0: # inq = inq[0].replace("。", "") # data.append(inq) # else: # data.append(" ") # bd = re.findall(findBd, item)[0] # bd = re.sub('<br(\s+)?/>(\s+)?', "", bd) # bd = re.sub('/', "", bd) # data.append(bd.strip()) datalist.append(data) return datalist # 得到指定一个URL的网页内容 def askURL(url): head = { # 模拟浏览器头部信息,向豆瓣服务器发送消息 "User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36", "cookie":"" } # 用户代理,表示告诉豆瓣服务器,我们是什么类型的机器、浏览器(本质上是告诉浏览器,我们可以接收什么水平的文件内容) request = urllib.request.Request(url, headers=head) html = "" try: response = urllib.request.urlopen(request) html = response.read().decode("utf-8") except urllib.error.URLError as e: if hasattr(e, "code"): print(e.code) if hasattr(e, "reason"): print(e.reason) return html # 保存数据到表格 def saveData(datalist,savepath): print("save.......") print(datalist) book = xlwt.Workbook(encoding="utf-8",style_compression=0) #创建workbook对象 sheet = book.add_sheet('豆瓣电影Top250', cell_overwrite_ok=True) #创建工作表 col = ("电影详情链接","图片链接","影片中文名","影片外国名","评分","评价数","概况","相关信息") for i in range(0,6): sheet.write(0,i,col[i]) #列名 for i in range(0,90): # print("第%d条" %(i+1)) #输出语句,用来测试 data = datalist[i] for j in range(0,6): sheet.write(i+1,j,data[j]) #数据 book.save(savepath) #保存 if __name__ == "__main__": # 当程序执行时 # 调用函数 main() # init_db("movietest.db") print("爬取完毕!") ``` ***** 2: ``` from bs4 import BeautifulSoup from selenium import webdriver import csv import time def fillPostList(postlist,html): try: soup = BeautifulSoup(html,"html.parser") job_all = soup.find_all('div', {"class": "job-primary"}) for job in job_all: position = job.find('span', {"class": "job-name"}).get_text() address = job.find('span', {'class': "job-area"}).get_text() company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text() salary = job.find('span', {'class': 'red'}).get_text() diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:] experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2] labels = job.find('a', {'class': 'false-link'}).get_text() postlist.append([position,address,company,salary,diploma,experience,labels]) except IndexError: pass def main(): jobinfo = [] driver = webdriver.Chrome() url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1" driver.get(url) time.sleep(8) html = driver.page_source fillPostList(jobinfo,html) #将jobinfo列表信息写入csv文件 headers = ["职位","工作地址","公司全称","薪水","学历","工作经验","行业标签"] with open('job.csv','w',newline = '')as f: f_csv = csv.writer(f) f_csv.writerow(headers) f_csv.writerows(jobinfo) driver.quit() main() ``` 3: ``` # Boss直聘 from bs4 import BeautifulSoup import requests import xlwt from selenium import webdriver from lxml import etree import time begin = int(input("输入起始页:")) end = int(input("输入终止页:")) url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1" base_url="https://www.zhipin.com" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36', 'cookie': '__g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1628342274,1628476062,1628559147; lastCity=100010000; __c=1628559147; __l=l=%2Fwww.zhipin.com%2Fc100010000%2F%3Fpage%3D1%26ka%3Dpage-1&r=&g=&s=3&friend_source=0&s=3&friend_source=0; __a=51751789.1628342272.1628476062.1628559147.80.3.2.80; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1628559375; __zp_stoken__=44fccEA1HA2tYaygfIi87Y39AOV8QMShFLTJsCThyVHN4TQUcEithWCZrdEBRdGB%2BT3s1cRw9fggUJQYnIEMHSE0rHHpfbE0yGiREN2IMbHcNX3s6dg5iIzgCdHxZREcDf1glTGc4AHw%2FcjoH', 'referer': 'https://www.zhipin.com/c100010000/?page=2&ka=page-2' } names = [] locations = [] salarys = [] requirements = [] educations = [] companys = [] links = [] items = [] for page in range(begin, end+1): param = { 'page': page } # response = requests.get(url, params=param, headers=headers) driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1" driver.get(url) time.sleep(8) # js = "window.open("+url+")" # driver.execute_script(js) html = driver.page_source #print(html) root=etree.HTML(html) name=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/text()') names.extend(name) location=root.xpath('// *[ @ id = "main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[2]/span/text()') locations.extend(location) salary=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / span/text()') salarys.extend(salary) requirement=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / p / text()[1]') requirements.extend(requirement) education=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[2]/p/text()[2]') educations.extend(education) company=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[2] / div / h3 / a/text()') companys.extend(company) link=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/@href') for i in range(0,len(link)): link[i]=base_url+link[i] links.extend(link) items.append(names) items.append(locations) items.append(salarys) items.append(requirements) items.append(educations) items.append(companys) items.append(links) #print(items) book = xlwt.Workbook(encoding='utf-8') sheet = book.add_sheet('items') head = ['职位名称', '工作地点', '薪水', '工作经验', '学历', '公司','详情链接'] for i in range(0, 7): sheet.write(0, i, head[i]) for i in range(0, 7): a = items[i] for j in range(len(a)): sheet.write(j + 1, i, a[j]) book.save('Boss直聘12.xls') ``` 4: ``` # -*- codeing = utf-8 -*- from bs4 import BeautifulSoup # 网页解析,获取数据 import re # 正则表达式,进行文字匹配` import urllib.request, urllib.error # 制定URL,获取网页数据 import xlwt # 进行excel操作 from Selenium4R import Chrome from selenium import webdriver import time import csv #import sqlite3 # 进行SQLite数据库操作 begin = int(input("输入起始页:")) end = int(input("输入终止页:")) def main(): baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的网页链接 # 1.爬取网页 datalist = getData(baseurl) time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time())) savepath = "boss直聘爬虫岗"+time_stamp+".csv" #当前目录新建XLS,存储进去 # dbpath = "movie.db" #当前目录新建数据库,存储进去 # 3.保存数据 saveData(datalist,savepath) #2种存储方式可以只选择一种 # 爬取网页 def getData(baseurl): datalist = [] #用来存储爬取的网页信息 for i in range(begin, end+1): # 调用获取页面信息的函数,10次 url = baseurl + str(i) + '&ka=page-' + str(i) driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') driver.get(url) time.sleep(8) html = driver.page_source # 2.逐一解析数据 soup = BeautifulSoup(html, "html.parser") for job in soup.find_all('div', {"class": "job-primary"}): # 查找符合要求的字符串 data = [] # 保存一部电影所有信息 # item = str(item) position = job.find('span', {"class": "job-name"}).get_text() address = job.find('span', {'class': "job-area"}).get_text() company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text() salary = job.find('span', {'class': 'red'}).get_text() diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:] experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2] labels = job.find('a', {'class': 'false-link'}).get_text() # company_status_result = re.search(r'<em class="vline"/>(.*?)<em class="vline"/>', job)[0] # if company_status_result: # company_status = company_status_result.group(1) # else: # company_status = '无信息' # data.append([position,address,company,salary,diploma,experience,labels]) data.append(position) data.append(address) data.append(company) data.append(salary) data.append(diploma) data.append(experience) data.append(labels) datalist.append(data) return datalist # 保存数据到表格 def saveData(datalist,savepath): print("save.......") print(datalist) # book = csv.Workbook(encoding="utf-8",style_compression=0) #创建workbook对象 # sheet = book.add_sheet('豆瓣电影Top250', cell_overwrite_ok=True) #创建工作表 # f = open(savepath,'w',encoding='utf-8',newline = '') # csv_writer = csv.writer(f) col = ("岗位名称","招聘地点","企业名","薪资","学历","经验要求","类型") headers = ["职位","工作地址","公司全称","薪水","学历","工作经验","行业标签"] # csv_writer.writerow(headers) with open(savepath,'w',encoding='utf-8',newline = '') as f: f_csv = csv.writer(f) f_csv.writerow(headers) f_csv.writerows(datalist) # for i in range(0,7): # # csv_writer.writerow(0,i,col[i]) #列名 # for i in range(len(datalist)): # # print("第%d条" %(i+1)) #输出语句,用来测试 # data = datalist[i] # for j in range(0,7): # csv_writer.writerows(data[j]) # csv_writer.writerow(i+1,j,data[j]) #数据 # book.save(savepath) #保存 if __name__ == "__main__": # 当程序执行时 # 调用函数 main() # init_db("movietest.db") print("爬取完毕!") ``` 6: ``` from pyspider.libs.base_handler import * import pymysql import random import time import re count = 0 class Handler(BaseHandler): # 添加请求头,否则出现403报错 crawl_config = {'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}} def __init__(self): # 连接数据库 self.db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='boss_job', charset='utf8mb4') def add_Mysql(self, id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people): # 将数据写入数据库中 try: cursor = self.db.cursor() sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people); print(sql) cursor.execute(sql) print(cursor.lastrowid) self.db.commit() except Exception as e: print(e) self.db.rollback() @every(minutes=24 * 60) def on_start(self): # 因为pyspider默认是HTTP请求,对于HTTPS(加密)请求,需要添加validate_cert=False,否则599/SSL报错 self.crawl('https://www.zhipin.com/job_detail/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&scity=100010000&industry=&position=', callback=self.index_page, validate_cert=False) @config(age=10 * 24 * 60 * 60) def index_page(self, response): time.sleep(random.randint(2, 5)) for i in response.doc('li > div').items(): # 设置全局变量 global count count += 1 # 岗位名称 job_title = i('.job-title').text() print(job_title) # 岗位薪水 job_salary = i('.red').text() print(job_salary) # 岗位地点 city_result = re.search('(.*?)<em class=', i('.info-primary > p').html()) job_city = city_result.group(1).split(' ')[0] print(job_city) # 岗位经验 experience_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-primary > p').html()) job_experience = experience_result.group(1) print(job_experience) # 岗位学历 job_education = i('.info-primary > p').text().replace(' ', '').replace(city_result.group(1).replace(' ', ''), '').replace(experience_result.group(1).replace(' ', ''),'') print(job_education) # 公司名称 company_name = i('.info-company a').text() print(company_name) # 公司类型 company_type_result = re.search('(.*?)<em class=', i('.info-company p').html()) company_type = company_type_result.group(1) print(company_type) # 公司状态 company_status_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-company p').html()) if company_status_result: company_status = company_status_result.group(1) else: company_status = '无信息' print(company_status) # 公司规模 company_people = i('.info-company p').text().replace(company_type, '').replace(company_status,'') print(company_people + '\n') # 写入数据库中 self.add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) # 获取下一页信息 next = response.doc('.next').attr.href if next != 'javascript:;': self.crawl(next, callback=self.index_page, validate_cert=False) else: print("The Work is Done") # 详情页信息获取,由于访问次数有限制,不使用 #for each in response.doc('.name > a').items(): #url = each.attr.href #self.crawl(each.attr.href, callback=self.detail_page, validate_cert=False) @config(priority=2) def detail_page(self, response): # 详情页信息获取,由于访问次数有限制,不使用 message_job = response.doc('div > .info-primary > p').text() city_result = re.findall('城市:(.*?)经验', message_job) experience_result = re.findall('经验:(.*?)学历', message_job) education_result = re.findall('学历:(.*)', message_job) message_company = response.doc('.info-company > p').text().replace(response.doc('.info-company > p > a').text(),'') status_result = re.findall('(.*?)\d', message_company.split(' ')[0]) people_result = message_company.split(' ')[0].replace(status_result[0], '') return { "job_title": response.doc('h1').text(), "job_salary": response.doc('.info-primary .badge').text(), "job_city": city_result[0], "job_experience": experience_result[0], "job_education": education_result[0], "job_skills": response.doc('.info-primary > .job-tags > span').text(), "job_detail": response.doc('div').filter('.text').eq(0).text().replace('\n', ''), "company_name": response.doc('.info-company > .name > a').text(), "company_status": status_result[0], "company_people": people_result, "company_type": response.doc('.info-company > p > a').text(), } ``` ***** 数据可视化: ``` import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.pylab import mpl import matplotlib.font_manager as fm dfs = pd.read_csv('boss直聘爬虫岗1217135602.csv',encoding='utf-8') data_df = pd.DataFrame(dfs) # df['prince'].fillna(df['prince'].mean()) # print("\n查看是否有缺失值\n", data_df.isnull().sum()) data_df_del_empty = data_df.dropna(subset=['职位'], axis=0) data_df_python_keyword = data_df_del_empty.loc[data_df_del_empty['职位'].str.contains('爬虫|python|Python')] # print(data_df_python_keyword)#筛选带有python的行 # 区间最小薪资 data_df_python_keyword_salary = data_df_python_keyword['薪水'].str.split('-', expand=True)[0] + 'K' # print(data_df_python_keyword_salary) # 区间最小薪资 # Dataframe新增一列 在第 列新增一列名为' ' 的一列 数据 data_df_python_keyword.insert(7, '最小薪资', data_df_python_keyword_salary) # print(data_df_python_keyword['学历']) Fre_f=pd.DataFrame(dfs["学历"].value_counts()) Fre_x=data_df_python_keyword["最小薪资"].value_counts() def Bar_1(data,title,is_a): #设置全景中文字体 my_font=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc") mpl.rcParams['font.sans-serif'] = my_font.get_name() mpl.rcParams["axes.unicode_minus"] = False #画直方图 #定义图片大小 p=plt.figure(figsize=(20,8),dpi=300) ax=p.add_subplot(1,1,1) #创建一个1行1列的子图,并开始绘制第1幅 #去掉子图的上,右边框 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) x=list(data.index) if(is_a == 1): y=list(data) else: y=list(data['学历']) plt.bar(range(len(x)),y,color="#4CAF50",width = 0.5) plt.xticks(range(len(x)),x,font=my_font,fontsize=8,color="red") plt.yticks(font=my_font,fontsize=8,color="#006400") #定义背景网格线 plt.grid(axis="y",linestyle="--",color="#FFA500",alpha=0.5) #加上注释()中的属性分别为文本内容,注释坐标,文本坐标 color_list=["#4B0082","#2F4F4F","#32CD32","#808000","#B22222","#808000"] # plt.show() # print(y[0]) # return; for i in range(len(y)): plt.annotate(y[i],xy=(i,y[i]),xytext=(i,y[i]),font=my_font,fontsize=8,color="#808000") #加上x,y轴的标签 plt.xlabel("\n学历",font=my_font,fontsize=20) plt.ylabel("招聘人数\n",font=my_font,fontsize=20) #加上标题 plt.title(title,font=my_font,fontsize=15,color="#FFD700") # plt.savefig("C:/Users/HUAWEI/Desktop/大数据就业与学历关系直方图.png") plt.show() def Bie_1(data,title,is_a): plt.figure(figsize=(10,5),dpi=150) #调节图形大小 x=list(data.index) if(is_a == 1): y=list(data) else: y=list(data['学历']) labels = x #定义标签 sizes = y #每块值 colors = ['red','yellowgreen','lightskyblue','yellow','blue'] #每块颜色定义 explode = (0.1,0.05,0.05,0.05,1.2) #将某一块分割出来,值越大分割出的间隙越大 patches,text1,text2 = plt.pie(sizes, labels=labels, autopct = '%3.2f%%', #数值保留固定小数位 shadow = False, #无阴影设置 startangle =0, #逆时针起始角度设置 pctdistance = 0.6) #数值距圆心半径倍数的距离 #patches饼图的返回值,texts1饼图外label的文本,texts2饼图内部的文本 # x,y轴刻度设置一致,保证饼图为圆形 plt.axis('equal') #设置图列 my_font1=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=10) plt.legend(prop=my_font1) #设置标题 my_font2=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=20) plt.title(title,font=my_font2,color='#32CD32') plt.show() Bie_1(Fre_f,"爬虫就业与学历关系",0) # Bar_1(Fre_x,"爬虫就业与学历关系",1) # annotate 主要是添加注释,如柱状图顶部添加数字注释等 # bar 主要用来绘制柱形图 # value_counts()是一种查看表格某列中有多少个不同值的快捷方法,并计算每个不同值有在该列中有多少重复值。 ```