多应用+插件架构,代码干净,二开方便,首家独创一键云编译技术,文档视频完善,免费商用码云13.8K 广告
获取免费代理IP ~~~ import requests from scrapy.selector import Selector import pymysql conn = pymysql.connect(host="127.0.0.1", user="root", passwd="", db="fy", charset="utf8") cursor = conn.cursor() headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' } class GetRandomIp(object): def parse(self, next_url = '/nn/1'): response = requests.get(url = 'http://www.xicidaili.com{}'.format(next_url), headers = headers) response = Selector(text=response.text) tr_list = response.css('#ip_list tr') num = len(tr_list) if tr_list: self.parse_detail(tr_list) next_url = response.css('.pagination a.next_page::attr(href)').extract_first() if next_url: self.parse(next_url) def parse_detail(self, tr_list): for tr in tr_list[1:]: ip = tr.css('td:nth-child(2)::text').extract_first() port = tr.css('td:nth-child(3)::text').extract_first() type = tr.css('td:nth-child(6)::text').extract_first() speed = tr.css('td:nth-child(7) div::attr(title)').extract_first()[:-1] if float(speed) < 1: self.insert_sql(ip, port, type) def insert_sql(self, ip = '', port = '', type = ''): cursor.execute( "insert proxy_ip(ip, port, type) VALUES('{0}', '{1}', '{2}')".format( ip, port, type ) ) conn.commit() def get_ip(self): sql = "select * from proxy_ip ORDER BY RAND() LIMIT 1" cursor.execute(sql) id, ip, port, type = cursor.fetchone() conn.commit() if not type: type = 'http' else: type = type.lower() proxy_url = '{0}://{1}:{2}'.format(type, ip, port ) res = self.check_ip(type, proxy_url) if res: return proxy_url else: self.delete_ip(id) return self.get_ip() def check_ip(self, type, proxy_url): request_url = 'http://hf.58.com/ershoufang/0' try: proxy = {type:proxy_url} response = requests.get(url = request_url, proxies = proxy, allow_redirects = False, timeout = 2) except Exception as e: print('invalid ip and port') return False else: code = response.status_code if code == 200: return True else: print('invalid ip and port') return False def delete_ip(self, id): sql = "delete from proxy_ip where id = {}".format(id) cursor.execute(sql) conn.commit() ip = GetRandomIp() if __name__ == '__main__': ip = GetRandomIp() print(ip.get_ip()) ~~~ 在middlewares.py文件中定义IP中间件 ~~~ import ip class EsfIpMiddleware(object): def process_request(self, request, spider): proxy_ip = ip.get_ip() request.meta["proxy"] = proxy_ip ~~~ 在settings.py配置文件DOWNLOADER_MIDDLEWARES中添加该中间件