createDetailTaskTimeTest.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月29日
  4. 测试detail任务生成大概需要多久
  5. @author: ChenHao
  6. '''
  7. from util_common import Constant, html_parser
  8. from pymongo.mongo_client import MongoClient
  9. from bs4 import BeautifulSoup
  10. import random
  11. import time
  12. # cli = MongoClient(Constant.MONGODB_URL)
  13. cli = MongoClient("mongodb://localhost:27017/")
  14. db = cli.spider
  15. timeStar = time.time()
  16. for i in range(1, 10):
  17. rs = db.kindlist_todo.find_one({"url": "http://www.mouser.cn/Embedded-Solutions/Solid-State-Drives-SSD/_/N-d0ro0/?No=925"})
  18. html_cont = rs["str_html"]
  19. praser = html_parser.HtmlParser()
  20. soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
  21. detail_urls = praser._get_detail_urls_from_listPage(soup)
  22. # 组装detail任务
  23. task_list = list()
  24. for detail_url in detail_urls:
  25. d = dict()
  26. d["url"] = detail_url
  27. d["random"] = random.random()
  28. d["status"] = Constant.TODO
  29. task_list.append(d)
  30. db.detail_todo.insert_many(task_list)
  31. rs["status"] = Constant.DONE
  32. db.kindlist_todo.save(rs)
  33. timeEnd = time.time()
  34. print (timeEnd - timeStar)