# coding=utf-8 ''' Created on 2016年3月29日 测试detail任务生成大概需要多久 @author: ChenHao ''' from util_common import Constant, html_parser from pymongo.mongo_client import MongoClient from bs4 import BeautifulSoup import random import time # cli = MongoClient(Constant.MONGODB_URL) cli = MongoClient("mongodb://localhost:27017/") db = cli.spider timeStar = time.time() for i in range(1, 10): rs = db.kindlist_todo.find_one({"url": "http://www.mouser.cn/Embedded-Solutions/Solid-State-Drives-SSD/_/N-d0ro0/?No=925"}) html_cont = rs["str_html"] praser = html_parser.HtmlParser() soup = BeautifulSoup(html_cont, 'html.parser', from_encoding='utf-8') detail_urls = praser._get_detail_urls_from_listPage(soup) # 组装detail任务 task_list = list() for detail_url in detail_urls: d = dict() d["url"] = detail_url d["random"] = random.random() d["status"] = Constant.TODO task_list.append(d) db.detail_todo.insert_many(task_list) rs["status"] = Constant.DONE db.kindlist_todo.save(rs) timeEnd = time.time() print (timeEnd - timeStar)