数据存储 Json
一、JsonLInesEx
1 from scrapy.exporters import JsonLinesItemExporter 2 class JsonLinesItemExporterPipeline(object): 3 def __init__(self): 4 self.file = open('jsonfile.json', 'wb') # 必须写入二进制 5 self.exporter = JsonLinesItemExporter(self.file, ensure_ascii=False, encoding='utf-8') 6 def process_item(self, item, spider): 7 self.exporter.export_item(item) 8 print(item) 9 def close_item(self, spider):10 self.file.close()11 pass
二、自定义方法保存json文件
1 import json 2 3 # 自定义处理json保存 4 class QsbkDemoPipeline(object): 5 def __init__(self): 6 self.file = open('qsbk.json', 'w', encoding='utf-8') 7 8 def open_spider(self, spider): 9 print('爬虫开始了...')10 pass11 12 def process_item(self, item, spider):13 # 这里需要把item转换字典14 item_json = json.dumps(dict(item), ensure_ascii=False)15 self.file.write(item_json+'\n')16 return item17 18 def close_spider(self, spider):19 self.file.close()20 print('爬虫结束了...')21 pass
三、JsonItemExporter保存json
1 from scrapy.exporters import JsonItemExporter 2 3 # 利用scrapy自带json保存 4 class JsonExporterPipeline(object): 5 def __init__(self): 6 self.file = open('qsbk_1.json', 'wb') # 必须二进制写入 7 self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) 8 # 开始写入 9 self.exporter.start_exporting()10 11 def open_spider(self, spider):12 print('爬虫开始')13 pass14 15 def process_item(self, item, spider):16 self.exporter.export_item(item)17 return item18 19 def close_spider(self, spider):20 # 完成写入21 self.exporter.finish_exporting()22 self.file.close()23 pass