| 123456789101112131415161718192021222324252627282930313233343536 |
- # coding=utf-8
- '''
- Created on 2016年3月29日
- 测试detail任务生成大概需要多久
- @author: ChenHao
- '''
- from util_common import Constant, html_parser
- from pymongo.mongo_client import MongoClient
- from bs4 import BeautifulSoup
- import random
- import time
- # cli = MongoClient(Constant.MONGODB_URL)
- cli = MongoClient("mongodb://localhost:27017/")
- db = cli.spider
- timeStar = time.time()
- for i in range(1, 10):
- rs = db.kindlist_todo.find_one({"url": "http://www.mouser.cn/Embedded-Solutions/Solid-State-Drives-SSD/_/N-d0ro0/?No=925"})
- html_cont = rs["str_html"]
- praser = html_parser.HtmlParser()
- soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8')
- detail_urls = praser._get_detail_urls_from_listPage(soup)
- # 组装detail任务
- task_list = list()
- for detail_url in detail_urls:
- d = dict()
- d["url"] = detail_url
- d["random"] = random.random()
- d["status"] = Constant.TODO
- task_list.append(d)
- db.detail_todo.insert_many(task_list)
- rs["status"] = Constant.DONE
- db.kindlist_todo.save(rs)
- timeEnd = time.time()
- print (timeEnd - timeStar)
|