多应用+插件架构,代码干净,二开方便,首家独创一键云编译技术,文档视频完善,免费商用码云13.8K 广告
1. 发送一次请求,获取登陆所需参数 ~~~ # -*- coding: utf-8 -*- import scrapy # 正统模拟登录方法: # 首先发送登录页面的get请求,获取到页面里的登录必须的参数,比如说zhihu的 _xsrf # 然后和账户密码一起post到服务器,登录成功 class Renren2Spider(scrapy.Spider): name = "renren2" allowed_domains = ["renren.com"] start_urls = ( "http://www.renren.com/PLogin.do", ) def parse(self, response): #_xsrf = response.xpath("//_xsrf").extract()[0] yield scrapy.FormRequest.from_response( response, formdata = {"email" : "mr_mao_hacker@163.com", "password" : "alarmchime"},#, "_xsrf" = _xsrf}, callback = self.parse_page ) def parse_page(self, response): print "=========1===" + response.url #with open("mao.html", "w") as filename: # filename.write(response.body) url = "http://www.renren.com/422167102/profile" yield scrapy.Request(url, callback = self.parse_newpage) def parse_newpage(self, response): print "===========2====" + response.url with open("xiao.html", "w") as filename: filename.write(response.body) ~~~ 2. 只需要post数据的 ~~~ # -*- coding: utf-8 -*- import scrapy # 只要是需要提供post数据的,就可以用这种方法, # 下面示例:post数据是账户密码 class Renren1Spider(scrapy.Spider): name = "renren1" allowed_domains = ["renren.com"] def start_requests(self): url = 'http://www.renren.com/PLogin.do' yield scrapy.FormRequest( url = url, formdata = {"email" : "mr_mao_hacker@163.com", "password" : "alarmchime"}, callback = self.parse_page) def parse_page(self, response): with open("mao2.html", "w") as filename: filename.write(response.body) ~~~ 3. 终极必杀 cookie * 这种就用浏览器登陆一下,取得cookie,然后用在程序里 ~~~ # -*- coding: utf-8 -*- import scrapy # 实在没办法了,可以用这种方法模拟登录,麻烦一点,成功率100% class RenrenSpider(scrapy.Spider): name = "renren" allowed_domains = ["renren.com"] start_urls = ( 'http://www.renren.com/xxxxx', 'http://www.renren.com/11111', 'http://www.renren.com/xx', ) cookies = { "anonymid" : "ixrna3fysufnwv", "_r01_" : "1", "ap" : "327550029", "JSESSIONID" : "abciwg61A_RvtaRS3GjOv", "depovince" : "GW", "springskin" : "set", "jebe_key" : "f6fb270b-d06d-42e6-8b53-e67c3156aa7e%7Cc13c37f53bca9e1e7132d4b58ce00fa3%7C1484060607478%7C1%7C1486198628950", "jebe_key" : "f6fb270b-d06d-42e6-8b53-e67c3156aa7e%7Cc13c37f53bca9e1e7132d4b58ce00fa3%7C1484060607478%7C1%7C1486198619601", "ver" : "7.0", "XNESSESSIONID" : "e703b11f8809", "jebecookies" : "98c7c881-779f-4da8-a57c-7464175cd469|||||", "ick_login" : "4b4a254a-9f25-4d4a-b686-a41fda73e173", "_de" : "BF09EE3A28DED52E6B65F6A4705D973F1383380866D39FF5", "p" : "ea5541736f993365a23d04c0946c10e29", "first_login_flag" : "1", "ln_uact" : "mr_mao_hacker@163.com", "ln_hurl" : "http://hdn.xnimg.cn/photos/hdn521/20140529/1055/h_main_9A3Z_e0c300019f6a195a.jpg", "t" : "691808127750a83d33704a565d8340ae9", "societyguester" : "691808127750a83d33704a565d8340ae9", "id" : "327550029", "xnsid" : "f42b25cf", "loginfrom" : "syshome" } def start_requests(self): for url in self.start_urls: #yield scrapy.Request(url, callback = self.parse) #url = "http://www.renren.com/410043129/profile" yield scrapy.FormRequest(url, cookies = self.cookies, callback = self.parse_page) def parse_page(self, response): print "===========" + response.url with open("deng.html", "w") as filename: filename.write(response.body) ~~~