create_taskfile_for_listPage.py 924 B

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月9日
  4. @author: ChenHao
  5. '''
  6. from pymongo.mongo_client import MongoClient
  7. import json
  8. import codecs
  9. # 连接数据库
  10. cli = MongoClient("mongodb://localhost:27017/")
  11. db = cli.spider
  12. listPage_list = list()
  13. for kl in db.kindlist_todo.find():
  14. listPage_list.append(kl)
  15. count = db.kindlist_todo.count()
  16. c = count/20000
  17. def _creat_file(index, ls):
  18. # 生成任务文件
  19. out_list = list()
  20. for l in ls:
  21. d = dict()
  22. d["id"] = l["id"]
  23. d["number"] = l["number"]
  24. d["url"] = l["url"]
  25. out_list.append(d)
  26. st = json.dumps(out_list, ensure_ascii=False)
  27. fileName = "F:/spider_download/listTask/" + str(index) + ".txt"
  28. fout = codecs.open(fileName, 'w', encoding='utf-8')
  29. fout.write(st)
  30. fout.close()
  31. for i in range((c+1)):
  32. list_10000 = listPage_list[20000*i: 20000*(i + 1)]
  33. _creat_file((i+1), list_10000)
  34. cli.close()