| 1234567891011121314151617181920212223242526272829303132333435363738394041 |
- # coding=utf-8
- '''
- Created on 2016年3月9日
- @author: ChenHao
- '''
- from pymongo.mongo_client import MongoClient
- import json
- import codecs
- # 连接数据库
- cli = MongoClient("mongodb://localhost:27017/")
- db = cli.spider
- listPage_list = list()
- for kl in db.kindlist_todo.find():
- listPage_list.append(kl)
- count = db.kindlist_todo.count()
- c = count/20000
- def _creat_file(index, ls):
- # 生成任务文件
- out_list = list()
- for l in ls:
- d = dict()
- d["id"] = l["id"]
- d["number"] = l["number"]
- d["url"] = l["url"]
- out_list.append(d)
- st = json.dumps(out_list, ensure_ascii=False)
- fileName = "F:/spider_download/listTask/" + str(index) + ".txt"
- fout = codecs.open(fileName, 'w', encoding='utf-8')
- fout.write(st)
- fout.close()
- for i in range((c+1)):
- list_10000 = listPage_list[20000*i: 20000*(i + 1)]
- _creat_file((i+1), list_10000)
- cli.close()
|