pyInstallerTest.py 1004 B

1234567891011121314151617181920212223242526272829303132333435363738
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月14日
  4. @author: ChenHao
  5. '''
  6. from util_common import Constant, html_parser
  7. from pymongo.mongo_client import MongoClient
  8. from bs4 import BeautifulSoup
  9. import random
  10. praser = html_parser.HtmlParser()
  11. # 连接数据库
  12. cli = MongoClient(Constant.MONGODB_URL)
  13. db = cli.spider
  14. # 读取kindlist待生成detail任务的
  15. new_product_urls = set()
  16. rs = db.kindlist_todo.find_one({"creatDetailTask": Constant.TODO})
  17. listPage = rs;
  18. # 生成任务并保存
  19. html_cont = listPage["str_html"]
  20. soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
  21. detail_urls = praser._get_detail_urls_from_listPage(soup)
  22. # 组装detail任务
  23. task_list = list()
  24. for detail_url in detail_urls:
  25. d = dict()
  26. d["url"] = detail_url
  27. d["random"] = random.random()
  28. d["status"] = Constant.TODO
  29. task_list.append(d)
  30. db.detail_todo.insert_many(task_list)
  31. # 将此listPage修改状态
  32. listPage["creatDetailTask"] = Constant.DONE
  33. db.kindlist_todo.save(listPage)
  34. cli.close()