| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- # coding=utf-8
- '''
- Created on 2016年4月6日
- 类目转换关系
- mouser->uu
- @author: ChenHao
- '''
- from pymongo.mongo_client import MongoClient
- from pip._vendor.distlib.util import CONSTRAINTS
- from util_common import Constant
- cli = MongoClient(Constant.MONGODB_URL)
- db = cli.spider
- file_path = "../spider_download/Other/kind_translate.csv"
- '''
- 1、将转换关系存入mongodb
- '''
- fin = open(file_path, "r")
- lines = fin.readlines()
- fin.close()
- '''
- 去除不正确的数据
- '''
- relation_list = list()
- for line in lines:
- str_line = line.replace("\n", "")
- arr_line = str_line.split(",")
- try:
- # 这里只用检查第一个
- arr_line[0] = int(arr_line[0])
- arr_line[1] = int(arr_line[1])
- relation_list.append(arr_line)
- except:
- pass
- temp_set = set()
- relation_final_list = list()
- for rl in relation_list:
- if rl[0] not in temp_set:
- temp_set.add(rl[0])
- d = dict()
- d["mouserid"] = rl[0]
- d["uuid"] = rl[1]
- relation_final_list.append(d)
- print (relation_final_list)
- db.kind_translate.insert_many(relation_final_list)
- '''
- 遍历叶子节点,看哪一些没有覆盖到
- '''
- not_relate_list = list()
- rs = db.kind_temp_2.find({"isLeaf": True})
- for r in rs:
- if r["id"] not in temp_set:
- not_relate_list.append(r)
- print (len(temp_set))
- print (rs.count())
- print (len(not_relate_list))
- # cli = MongoClient(Constant.MONGODB_URL)
- # db = cli.spider
- # for line in lines:
- # temp_str = line.replace("\n", "")
- # temp_list = temp_str.split(",")
- # d = dict()
- # d["mouserid"] = temp_list[0]
- # d["uuid"] = temp_list[1]
- # db.kind_translate.insert_one(d)
- #
- cli.close()
|