爬取数据:
```
# -*- codeing = utf-8 -*-
from bs4 import BeautifulSoup # 网页解析,获取数据
import re # 正则表达式,进行文字匹配`
import urllib.request, urllib.error # 制定URL,获取网页数据
import xlwt # 进行excel操作
from Selenium4R import Chrome
from selenium import webdriver
import time
#import sqlite3 # 进行SQLite数据库操作
findLink = re.compile(r'<span class="job-name"><a href="(.*?)" target="_blank">') # 创建正则表达式对象,标售规则 影片详情链接的规则
findImgSrc = re.compile(r'<div class="company-text">(.*)</div>',re.S) #公司名称
findTitle = re.compile(r'<em class="vline"></em>(.*)</p>') #学历要求
findRating = re.compile(r'</a><em class="vline"></em>(.*)<em class="vline"></em>') #公司人员估值
findJudge = re.compile(r'<div class="info-desc">(.*)</div>') #福利
findInq = re.compile(r'<span class="red">(.*)</span>') #薪资
findarea = re.compile(r'<span class="job-area">(.*)</span>') #公司地点
findname = re.compile(r'<span class="job-name">(.*)</span>') #岗位名称
findjyan = re.compile(r'<p>(.*)<em class="vline"></em>') #工作经验要求
# findBd = re.compile(r'<p class="">(.*?)</p>', re.S)
# findImgSrc = re.compile(r'<img.*src="(.*?)"', re.S)
# re.S的意思是如果不使用re.S参数,则只在每一行内进行匹配,如果一行没有,就换下一行重新开始,不会跨行。而使用re.S参数以后,正则表达式会将这个字符串作为一个整体,将“\n”当做一个普通的字符加入到这个字符串中,在整体中进行匹配
# r的意思是内容是字符串
# re.sub 该函数主要用于替换字符串中的匹配项。从上面的代码中可以看到re.sub()方法中含有5个参数,下面进行一一说明(加粗的为必须参数):(1)pattern:该参数表示正则中的模式字符串;(2)repl:该参数表示要替换的字符串(即匹配到pattern后替换为repl),也可以是个函数;(3)string:该参数表示要被处理(查找替换)的原始字符串;
# Python strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符。
def main():
baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的网页链接
# 1.爬取网页
datalist = getData(baseurl)
time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time()))
savepath = "boss直聘爬虫岗"+time_stamp+".xls" #当前目录新建XLS,存储进去
# dbpath = "movie.db" #当前目录新建数据库,存储进去
# 3.保存数据
saveData(datalist,savepath) #2种存储方式可以只选择一种
# saveData2DB(datalist,dbpath)
# 爬取网页
def getData(baseurl):
datalist = [] #用来存储爬取的网页信息
for i in range(0, 3): # 调用获取页面信息的函数,10次
url = baseurl + str(i + 1) + '&ka=page-' + str(i + 1)
# html = askURL(url) # 保存获取到的网页源码
# driver = Chrome(cache_path=r"E:\Temp")
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
# url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
# js = "window.open("+url+")"
# driver.execute_script(js)
html = driver.page_source
# 2.逐一解析数据
soup = BeautifulSoup(html, "html.parser")
for item in soup.find_all('div', class_="job-primary"): # 查找符合要求的字符串
data = [] # 保存一部电影所有信息
item = str(item)
link = re.findall(findInq, item)[0] # 通过正则表达式查找
linka = re.findall(findTitle, item)[0] # 通过正则表达式查找
# linka = re.sub('[A-Za-z]',"", linka)
# linka = re.sub('[\s+\.\!\/_,$%^*(+\"|<>]+',"", linka)
regex_str = ".*?([\u4E00-\u9FA5]+).*?"
linka = re.findall(regex_str, linka)
data.append(link)
data.append(linka)
linkb = re.findall(findjyan, item)[0]
data.append(linkb)
linkc = re.findall(findarea, item)[0]
data.append(linkc)
linkd = re.findall(findname, item)[0]
data.append(linkd)
imgSrc = re.findall(findImgSrc, item)[0]
# imgtest = re.compile(r'<h3 class="name"></h3>',re.S)
imgSrc = re.findall('target="_blank">(.*)</a></h3>', imgSrc)
data.append(imgSrc)
# titles = re.findall(findTitle, item)
# if (len(titles) == 2):
# ctitle = titles[0]
# data.append(ctitle)
# otitle = titles[1].replace("/", "") #消除转义字符
# data.append(otitle)
# else:
# data.append(titles[0])
# data.append(' ')
# rating = re.findall(findRating, item)[0]
# data.append(rating)
# judgeNum = re.findall(findJudge, item)[0]
# data.append(judgeNum)
# inq = re.findall(findInq, item)
# if len(inq) != 0:
# inq = inq[0].replace("。", "")
# data.append(inq)
# else:
# data.append(" ")
# bd = re.findall(findBd, item)[0]
# bd = re.sub('<br(\s+)?/>(\s+)?', "", bd)
# bd = re.sub('/', "", bd)
# data.append(bd.strip())
datalist.append(data)
return datalist
# 得到指定一个URL的网页内容
def askURL(url):
head = { # 模拟浏览器头部信息,向豆瓣服务器发送消息
"User-Agent": "Mozilla / 5.0(Windows NT 10.0; Win64; x64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 80.0.3987.122 Safari / 537.36",
"cookie":""
}
# 用户代理,表示告诉豆瓣服务器,我们是什么类型的机器、浏览器(本质上是告诉浏览器,我们可以接收什么水平的文件内容)
request = urllib.request.Request(url, headers=head)
html = ""
try:
response = urllib.request.urlopen(request)
html = response.read().decode("utf-8")
except urllib.error.URLError as e:
if hasattr(e, "code"):
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
return html
# 保存数据到表格
def saveData(datalist,savepath):
print("save.......")
print(datalist)
book = xlwt.Workbook(encoding="utf-8",style_compression=0) #创建workbook对象
sheet = book.add_sheet('豆瓣电影Top250', cell_overwrite_ok=True) #创建工作表
col = ("电影详情链接","图片链接","影片中文名","影片外国名","评分","评价数","概况","相关信息")
for i in range(0,6):
sheet.write(0,i,col[i]) #列名
for i in range(0,90):
# print("第%d条" %(i+1)) #输出语句,用来测试
data = datalist[i]
for j in range(0,6):
sheet.write(i+1,j,data[j]) #数据
book.save(savepath) #保存
if __name__ == "__main__": # 当程序执行时
# 调用函数
main()
# init_db("movietest.db")
print("爬取完毕!")
```
*****
2:
```
from bs4 import BeautifulSoup
from selenium import webdriver
import csv
import time
def fillPostList(postlist,html):
try:
soup = BeautifulSoup(html,"html.parser")
job_all = soup.find_all('div', {"class": "job-primary"})
for job in job_all:
position = job.find('span', {"class": "job-name"}).get_text()
address = job.find('span', {'class': "job-area"}).get_text()
company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text()
salary = job.find('span', {'class': 'red'}).get_text()
diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:]
experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2]
labels = job.find('a', {'class': 'false-link'}).get_text()
postlist.append([position,address,company,salary,diploma,experience,labels])
except IndexError:
pass
def main():
jobinfo = []
driver = webdriver.Chrome()
url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
html = driver.page_source
fillPostList(jobinfo,html)
#将jobinfo列表信息写入csv文件
headers = ["职位","工作地址","公司全称","薪水","学历","工作经验","行业标签"]
with open('job.csv','w',newline = '')as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
f_csv.writerows(jobinfo)
driver.quit()
main()
```
3:
```
# Boss直聘
from bs4 import BeautifulSoup
import requests
import xlwt
from selenium import webdriver
from lxml import etree
import time
begin = int(input("输入起始页:"))
end = int(input("输入终止页:"))
url = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=1&ka=page-1"
base_url="https://www.zhipin.com"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36',
'cookie': '__g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1628342274,1628476062,1628559147; lastCity=100010000; __c=1628559147; __l=l=%2Fwww.zhipin.com%2Fc100010000%2F%3Fpage%3D1%26ka%3Dpage-1&r=&g=&s=3&friend_source=0&s=3&friend_source=0; __a=51751789.1628342272.1628476062.1628559147.80.3.2.80; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1628559375; __zp_stoken__=44fccEA1HA2tYaygfIi87Y39AOV8QMShFLTJsCThyVHN4TQUcEithWCZrdEBRdGB%2BT3s1cRw9fggUJQYnIEMHSE0rHHpfbE0yGiREN2IMbHcNX3s6dg5iIzgCdHxZREcDf1glTGc4AHw%2FcjoH',
'referer': 'https://www.zhipin.com/c100010000/?page=2&ka=page-2'
}
names = []
locations = []
salarys = []
requirements = []
educations = []
companys = []
links = []
items = []
for page in range(begin, end+1):
param = {
'page': page
}
# response = requests.get(url, params=param, headers=headers)
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
# url = "https://www.zhipin.com/c101020100/e_102/?query=web%E5%89%8D%E7%AB%AF&page=1&ka=page-1"
driver.get(url)
time.sleep(8)
# js = "window.open("+url+")"
# driver.execute_script(js)
html = driver.page_source
#print(html)
root=etree.HTML(html)
name=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/text()')
names.extend(name)
location=root.xpath('// *[ @ id = "main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[2]/span/text()')
locations.extend(location)
salary=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / span/text()')
salarys.extend(salary)
requirement=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[1] / div / div[2] / p / text()[1]')
requirements.extend(requirement)
education=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[2]/p/text()[2]')
educations.extend(education)
company=root.xpath('// *[ @ id = "main"] / div / div[2] / ul / li / div / div[1] / div[2] / div / h3 / a/text()')
companys.extend(company)
link=root.xpath('//*[@id="main"]/div/div[2]/ul/li/div/div[1]/div[1]/div/div[1]/span[1]/a/@href')
for i in range(0,len(link)):
link[i]=base_url+link[i]
links.extend(link)
items.append(names)
items.append(locations)
items.append(salarys)
items.append(requirements)
items.append(educations)
items.append(companys)
items.append(links)
#print(items)
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('items')
head = ['职位名称', '工作地点', '薪水', '工作经验', '学历', '公司','详情链接']
for i in range(0, 7):
sheet.write(0, i, head[i])
for i in range(0, 7):
a = items[i]
for j in range(len(a)):
sheet.write(j + 1, i, a[j])
book.save('Boss直聘12.xls')
```
4:
```
# -*- codeing = utf-8 -*-
from bs4 import BeautifulSoup # 网页解析,获取数据
import re # 正则表达式,进行文字匹配`
import urllib.request, urllib.error # 制定URL,获取网页数据
import xlwt # 进行excel操作
from Selenium4R import Chrome
from selenium import webdriver
import time
import csv
#import sqlite3 # 进行SQLite数据库操作
begin = int(input("输入起始页:"))
end = int(input("输入终止页:"))
def main():
baseurl = "https://www.zhipin.com/c101280100/?query=%E7%88%AC%E8%99%AB&page=" #要爬取的网页链接
# 1.爬取网页
datalist = getData(baseurl)
time_stamp = time.strftime('%m%d%H%M%S',time.localtime(time.time()))
savepath = "boss直聘爬虫岗"+time_stamp+".csv" #当前目录新建XLS,存储进去
# dbpath = "movie.db" #当前目录新建数据库,存储进去
# 3.保存数据
saveData(datalist,savepath) #2种存储方式可以只选择一种
# 爬取网页
def getData(baseurl):
datalist = [] #用来存储爬取的网页信息
for i in range(begin, end+1): # 调用获取页面信息的函数,10次
url = baseurl + str(i) + '&ka=page-' + str(i)
driver = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
driver.get(url)
time.sleep(8)
html = driver.page_source
# 2.逐一解析数据
soup = BeautifulSoup(html, "html.parser")
for job in soup.find_all('div', {"class": "job-primary"}): # 查找符合要求的字符串
data = [] # 保存一部电影所有信息
# item = str(item)
position = job.find('span', {"class": "job-name"}).get_text()
address = job.find('span', {'class': "job-area"}).get_text()
company = job.find('div', {'class': 'company-text'}).find('h3', {'class': "name"}).get_text()
salary = job.find('span', {'class': 'red'}).get_text()
diploma = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[-2:]
experience = job.find('div', {'class': 'job-limit clearfix'}).find('p').get_text()[:-2]
labels = job.find('a', {'class': 'false-link'}).get_text()
# company_status_result = re.search(r'<em class="vline"/>(.*?)<em class="vline"/>', job)[0]
# if company_status_result:
# company_status = company_status_result.group(1)
# else:
# company_status = '无信息'
# data.append([position,address,company,salary,diploma,experience,labels])
data.append(position)
data.append(address)
data.append(company)
data.append(salary)
data.append(diploma)
data.append(experience)
data.append(labels)
datalist.append(data)
return datalist
# 保存数据到表格
def saveData(datalist,savepath):
print("save.......")
print(datalist)
# book = csv.Workbook(encoding="utf-8",style_compression=0) #创建workbook对象
# sheet = book.add_sheet('豆瓣电影Top250', cell_overwrite_ok=True) #创建工作表
# f = open(savepath,'w',encoding='utf-8',newline = '')
# csv_writer = csv.writer(f)
col = ("岗位名称","招聘地点","企业名","薪资","学历","经验要求","类型")
headers = ["职位","工作地址","公司全称","薪水","学历","工作经验","行业标签"]
# csv_writer.writerow(headers)
with open(savepath,'w',encoding='utf-8',newline = '') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
f_csv.writerows(datalist)
# for i in range(0,7):
# # csv_writer.writerow(0,i,col[i]) #列名
# for i in range(len(datalist)):
# # print("第%d条" %(i+1)) #输出语句,用来测试
# data = datalist[i]
# for j in range(0,7):
# csv_writer.writerows(data[j])
# csv_writer.writerow(i+1,j,data[j]) #数据
# book.save(savepath) #保存
if __name__ == "__main__": # 当程序执行时
# 调用函数
main()
# init_db("movietest.db")
print("爬取完毕!")
```
6:
```
from pyspider.libs.base_handler import *
import pymysql
import random
import time
import re
count = 0
class Handler(BaseHandler):
# 添加请求头,否则出现403报错
crawl_config = {'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}}
def __init__(self):
# 连接数据库
self.db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='boss_job', charset='utf8mb4')
def add_Mysql(self, id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people):
# 将数据写入数据库中
try:
cursor = self.db.cursor()
sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people);
print(sql)
cursor.execute(sql)
print(cursor.lastrowid)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
@every(minutes=24 * 60)
def on_start(self):
# 因为pyspider默认是HTTP请求,对于HTTPS(加密)请求,需要添加validate_cert=False,否则599/SSL报错
self.crawl('https://www.zhipin.com/job_detail/?query=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90&scity=100010000&industry=&position=', callback=self.index_page, validate_cert=False)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
time.sleep(random.randint(2, 5))
for i in response.doc('li > div').items():
# 设置全局变量
global count
count += 1
# 岗位名称
job_title = i('.job-title').text()
print(job_title)
# 岗位薪水
job_salary = i('.red').text()
print(job_salary)
# 岗位地点
city_result = re.search('(.*?)<em class=', i('.info-primary > p').html())
job_city = city_result.group(1).split(' ')[0]
print(job_city)
# 岗位经验
experience_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-primary > p').html())
job_experience = experience_result.group(1)
print(job_experience)
# 岗位学历
job_education = i('.info-primary > p').text().replace(' ', '').replace(city_result.group(1).replace(' ', ''), '').replace(experience_result.group(1).replace(' ', ''),'')
print(job_education)
# 公司名称
company_name = i('.info-company a').text()
print(company_name)
# 公司类型
company_type_result = re.search('(.*?)<em class=', i('.info-company p').html())
company_type = company_type_result.group(1)
print(company_type)
# 公司状态
company_status_result = re.search('<em class="vline"/>(.*?)<em class="vline"/>', i('.info-company p').html())
if company_status_result:
company_status = company_status_result.group(1)
else:
company_status = '无信息'
print(company_status)
# 公司规模
company_people = i('.info-company p').text().replace(company_type, '').replace(company_status,'')
print(company_people + '\n')
# 写入数据库中
self.add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people)
# 获取下一页信息
next = response.doc('.next').attr.href
if next != 'javascript:;':
self.crawl(next, callback=self.index_page, validate_cert=False)
else:
print("The Work is Done")
# 详情页信息获取,由于访问次数有限制,不使用
#for each in response.doc('.name > a').items():
#url = each.attr.href
#self.crawl(each.attr.href, callback=self.detail_page, validate_cert=False)
@config(priority=2)
def detail_page(self, response):
# 详情页信息获取,由于访问次数有限制,不使用
message_job = response.doc('div > .info-primary > p').text()
city_result = re.findall('城市:(.*?)经验', message_job)
experience_result = re.findall('经验:(.*?)学历', message_job)
education_result = re.findall('学历:(.*)', message_job)
message_company = response.doc('.info-company > p').text().replace(response.doc('.info-company > p > a').text(),'')
status_result = re.findall('(.*?)\d', message_company.split(' ')[0])
people_result = message_company.split(' ')[0].replace(status_result[0], '')
return {
"job_title": response.doc('h1').text(),
"job_salary": response.doc('.info-primary .badge').text(),
"job_city": city_result[0],
"job_experience": experience_result[0],
"job_education": education_result[0],
"job_skills": response.doc('.info-primary > .job-tags > span').text(),
"job_detail": response.doc('div').filter('.text').eq(0).text().replace('\n', ''),
"company_name": response.doc('.info-company > .name > a').text(),
"company_status": status_result[0],
"company_people": people_result,
"company_type": response.doc('.info-company > p > a').text(),
}
```
*****
数据可视化:
```
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import mpl
import matplotlib.font_manager as fm
dfs = pd.read_csv('boss直聘爬虫岗1217135602.csv',encoding='utf-8')
data_df = pd.DataFrame(dfs)
# df['prince'].fillna(df['prince'].mean())
# print("\n查看是否有缺失值\n", data_df.isnull().sum())
data_df_del_empty = data_df.dropna(subset=['职位'], axis=0)
data_df_python_keyword = data_df_del_empty.loc[data_df_del_empty['职位'].str.contains('爬虫|python|Python')]
# print(data_df_python_keyword)#筛选带有python的行
# 区间最小薪资
data_df_python_keyword_salary = data_df_python_keyword['薪水'].str.split('-', expand=True)[0] + 'K'
# print(data_df_python_keyword_salary) # 区间最小薪资
# Dataframe新增一列 在第 列新增一列名为' ' 的一列 数据
data_df_python_keyword.insert(7, '最小薪资', data_df_python_keyword_salary)
# print(data_df_python_keyword['学历'])
Fre_f=pd.DataFrame(dfs["学历"].value_counts())
Fre_x=data_df_python_keyword["最小薪资"].value_counts()
def Bar_1(data,title,is_a):
#设置全景中文字体
my_font=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc")
mpl.rcParams['font.sans-serif'] = my_font.get_name()
mpl.rcParams["axes.unicode_minus"] = False
#画直方图
#定义图片大小
p=plt.figure(figsize=(20,8),dpi=300)
ax=p.add_subplot(1,1,1) #创建一个1行1列的子图,并开始绘制第1幅
#去掉子图的上,右边框
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
x=list(data.index)
if(is_a == 1):
y=list(data)
else:
y=list(data['学历'])
plt.bar(range(len(x)),y,color="#4CAF50",width = 0.5)
plt.xticks(range(len(x)),x,font=my_font,fontsize=8,color="red")
plt.yticks(font=my_font,fontsize=8,color="#006400")
#定义背景网格线
plt.grid(axis="y",linestyle="--",color="#FFA500",alpha=0.5)
#加上注释()中的属性分别为文本内容,注释坐标,文本坐标
color_list=["#4B0082","#2F4F4F","#32CD32","#808000","#B22222","#808000"]
# plt.show()
# print(y[0])
# return;
for i in range(len(y)):
plt.annotate(y[i],xy=(i,y[i]),xytext=(i,y[i]),font=my_font,fontsize=8,color="#808000")
#加上x,y轴的标签
plt.xlabel("\n学历",font=my_font,fontsize=20)
plt.ylabel("招聘人数\n",font=my_font,fontsize=20)
#加上标题
plt.title(title,font=my_font,fontsize=15,color="#FFD700")
# plt.savefig("C:/Users/HUAWEI/Desktop/大数据就业与学历关系直方图.png")
plt.show()
def Bie_1(data,title,is_a):
plt.figure(figsize=(10,5),dpi=150) #调节图形大小
x=list(data.index)
if(is_a == 1):
y=list(data)
else:
y=list(data['学历'])
labels = x #定义标签
sizes = y #每块值
colors = ['red','yellowgreen','lightskyblue','yellow','blue'] #每块颜色定义
explode = (0.1,0.05,0.05,0.05,1.2) #将某一块分割出来,值越大分割出的间隙越大
patches,text1,text2 = plt.pie(sizes,
labels=labels,
autopct = '%3.2f%%', #数值保留固定小数位
shadow = False, #无阴影设置
startangle =0, #逆时针起始角度设置
pctdistance = 0.6) #数值距圆心半径倍数的距离
#patches饼图的返回值,texts1饼图外label的文本,texts2饼图内部的文本
# x,y轴刻度设置一致,保证饼图为圆形
plt.axis('equal')
#设置图列
my_font1=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=10)
plt.legend(prop=my_font1)
#设置标题
my_font2=fm.FontProperties(fname="C:/Windows/Fonts/msyhl.ttc",size=20)
plt.title(title,font=my_font2,color='#32CD32')
plt.show()
Bie_1(Fre_f,"爬虫就业与学历关系",0)
# Bar_1(Fre_x,"爬虫就业与学历关系",1)
# annotate 主要是添加注释,如柱状图顶部添加数字注释等
# bar 主要用来绘制柱形图
# value_counts()是一种查看表格某列中有多少个不同值的快捷方法,并计算每个不同值有在该列中有多少重复值。
```