案例 · Python爬虫

爬取 https://www.buxiuse.com/?page=1 的图片。 <br/> ```python """ @Date 2021/3/18 """ from lxml import etree import requests class BuXiuSe(object): def __init__(self): self.url = "https://www.buxiuse.com/?page=1" self.header = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36" } def get_img_list(self): r = requests.get(self.url, headers=self.header) if r.status_code != 200: return None # 将获取的页面转换为Element对象 html = etree.HTML(r.text) # 调用xpath函数匹配需要的属性 img_list = html.xpath('//li[@class="span3"]//img//@src') for i in range(len(img_list)): r2 = requests.get(img_list[i], headers=self.header) if r2.status_code != 200: continue self.save_img_to_disk(r2.content, "E:/python/buxiuse/{}.jpg".format(i)) def save_img_to_disk(self, content, save_path): with open(save_path, "wb") as f: f.write(content) if __name__ == "__main__": buXiuSe = BuXiuSe() buXiuSe.get_img_list() ```