多应用+插件架构,代码干净,二开方便,首家独创一键云编译技术,文档视频完善,免费商用码云13.8K 广告
```python """ Date: 2021/2/22 """ import requests # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=0 第一页 # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=50 第二页 # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=100 第三页 class TiebaSpider(object): def __init__(self, tieba_name): self.tieba_name = tieba_name self.url = "http://tieba.baidu.com/f?kw=" + tieba_name + "&ie=utf-8&pn={}" def get_url_list(self): """构造URL列表""" # url_list = [] # for i in range(10): # url_list.append(self.url.format(i*50)) # print(url_list) # 列表推导式 return [self.url.format(i * 50) for i in range(10)] def parse_url(self, url): """发送请求 获取响应""" response = requests.get(url) return response.text def svae_html(self, page_num, tb_html): """保存页面""" file_path = 'E:/python/html/{}-第{}页.html'.format(self.tieba_name, page_num) # python-第1页 with open(file_path, 'w', encoding='utf-8') as f: f.write(tb_html) def run(self): """实现主要业务逻辑""" # 1.构造URL列表 tieba_url_list = self.get_url_list() # 2.遍历发送请求获取响应 for tburl in tieba_url_list: print(tburl) tb_html = self.parse_url(tburl) # 3.保存页面 page_num = tieba_url_list.index(tburl) + 1 self.svae_html(page_num, tb_html) if __name__ == '__main__': tb_spider = TiebaSpider('lol') tb_spider.run() ```