在pipelines中定义下载图片的pipeline
~~~
from scrapy.pipelines.images import ImagesPipeline
class ImgPipeline(ImagesPipeline):
def item_completed(self, results, item, info):
if results:
item['img_path'] = []
for key, value in results:
if 'path' in value:
item['img_path'].append(value['path'])
else:
item['img_path'] = ''
else:
item['img_path'] = ''
return item
~~~
在settings.py的ITEM_PIPELINES中添加此pipeline
并定义下载图片保存路径
~~~
import os
dir_path = os.path.dirname(os.path.abspath(os.curdir))
此处有个坑,绝对不能使用__file__,此变量与scrapyd冲突,使用后在部署运行时会抛出异常
IMAGES_URLS_FIELD = 'img_url'#item里的图片url字段
IMAGES_STORE = os.path.join(dir_path, 'images')
~~~