~~~
# -*- coding: utf-8 -*-
import scrapy
import json, re
from selenium import webdriver
class TaobaoSpider(scrapy.Spider):
name = 'taobao'
# allowed_domains = ['https://s.taobao.com/search?q=%E9%AB%98%E6%95%B0']
start_urls = ['https://s.taobao.com/search?data-key=s&data-value=132&ajax=true&_ksTS=1520127920911_1426&callback=jsonp1427&initiative_id=tbindexz_20170306&ie=utf8&spm=a21bo.2017.201856-taobao-item.2&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&imgfile=&q=%E8%80%83%E7%A0%94&suggest=history_1&_input_charset=utf-8&wq=&suggest_query=&source=suggest&bcoffset=4&ntoffset=4&p4ppushleft=1%2C48&s=88']
def __init__(self):
self.driver = webdriver.Chrome(executable_path='F:/python/chromedriver.exe')
def parse(self, response):
text = response.text
text_json = re.match('.*jsonp\d+\((.*)?\);', text, re.DOTALL)
if text_json:
_json = text_json.group(1)
response_json = json.loads(_json)
mods = response_json.get('mods', None)
itemlist = mods.get('itemlist', None)
data = itemlist.get('data', None)
auctions = data.get('auctions', None)
pass
~~~