spiders · scrapy

~~~ # -*- coding: utf-8 -*- import scrapy import json, re from selenium import webdriver class TaobaoSpider(scrapy.Spider): name = 'taobao' # allowed_domains = ['https://s.taobao.com/search?q=%E9%AB%98%E6%95%B0'] start_urls = ['https://s.taobao.com/search?data-key=s&data-value=132&ajax=true&_ksTS=1520127920911_1426&callback=jsonp1427&initiative_id=tbindexz_20170306&ie=utf8&spm=a21bo.2017.201856-taobao-item.2&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&imgfile=&q=%E8%80%83%E7%A0%94&suggest=history_1&_input_charset=utf-8&wq=&suggest_query=&source=suggest&bcoffset=4&ntoffset=4&p4ppushleft=1%2C48&s=88'] def __init__(self): self.driver = webdriver.Chrome(executable_path='F:/python/chromedriver.exe') def parse(self, response): text = response.text text_json = re.match('.*jsonp\d+\((.*)?\);', text, re.DOTALL) if text_json: _json = text_json.group(1) response_json = json.loads(_json) mods = response_json.get('mods', None) itemlist = mods.get('itemlist', None) data = itemlist.get('data', None) auctions = data.get('auctions', None) pass ~~~