| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # coding=utf-8
- '''
- Created on 2016年3月30日
- 这个是为了临时获得新的kind的
- @author: ChenHao
- '''
- from pymongo.mongo_client import MongoClient
- import time
- from util_common import Constant
- TimeStart = time.time()
- cli = MongoClient(Constant.MONGODB_URL)
- # cli = MongoClient("mongodb://localhost:27017/")
- db = cli.spider
- '''
- 先获得需要将最原始的brand数据清洗一遍
- 因为引入了【商标名】
- '''
- rs = db.propertyvalue_temp.find({"propertyid": "40"}).distinct("value")
- for index, r in enumerate(rs):
- d = dict()
- d["id"] = index + 1
- d["nameEn"] = r
- d["nameCn"] = r
- db.brand_temp.insert_one(d)
- #
- # detail_analysis_kind = detailAnalysis_kind.DetailAnalysisKind(kindlist)
- # cli = MongoClient(Constant.MONGODB_URL)
- # db = cli.spider
- # rs = db.kind_from_listpage.find({}, {"kindls": True})
- # for r in rs:
- # detail_analysis_kind._add_into_out_list(r["kindls"])
- #
- # db.kind_temp_1.insert_many(detail_analysis_kind.out_list)
- # print (detail_analysis_kind.out_list)
- # print (len(detail_analysis_kind.out_list))
- # cli.close()
- cli.close()
- TimeEnd = time.time()
- print ("耗时", TimeEnd - TimeStart)
|