| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- # coding=utf-8
- '''
- Created on 2016年4月6日
- 新增新的类目转换关系
- @author: ChenHao
- '''
- from pymongo.mongo_client import MongoClient
- from pip._vendor.distlib.util import CONSTRAINTS
- from util_common import Constant
- cli = MongoClient(Constant.MONGODB_URL)
- db = cli.spider
- file_path = "../spider_download/Other/newTranslate20160427145412.csv"
- '''
- 1、将转换关系存入mongodb
- '''
- fin = open(file_path, "r")
- lines = fin.readlines()
- fin.close()
- '''
- 去除已有的数据
- '''
- old_kindid_mouser_set = set()
- rs_old = db.kind_translate.find({})
- old_kindid_mouser_set = set(r["mouserid"] for r in rs_old)
- print (old_kindid_mouser_set)
- relation_final_list = list()
- for line in lines:
- str_line = line.strip().replace("\n", "")
- arr_line = str_line.split(",")
- print (str_line)
- kindid_mouser = int(arr_line[0])
- kindid_uu = int(arr_line[1])
-
- if kindid_mouser not in old_kindid_mouser_set:
- d = dict()
- d["mouserid"] = kindid_mouser
- d["uuid"] = kindid_uu
- relation_final_list.append(d)
- print (relation_final_list)
- print (len(relation_final_list))
- # for line in lines:
- # str_line = line.replace("\n", "")
- # arr_line = str_line.split(",")
- # try:
- # # 这里只用检查第一个
- # arr_line[0] = int(arr_line[0])
- # arr_line[1] = int(arr_line[1])
- # relation_list.append(arr_line)
- # except:
- # pass
- # temp_set = set()
- # relation_final_list = list()
- # for rl in relation_list:
- # if rl[0] not in temp_set:
- # temp_set.add(rl[0])
- # d = dict()
- # d["mouserid"] = rl[0]
- # d["uuid"] = rl[1]
- # relation_final_list.append(d)
- # print (relation_final_list)
- db.kind_translate.insert_many(relation_final_list)
- # cli = MongoClient(Constant.MONGODB_URL)
- # db = cli.spider
- # for line in lines:
- # temp_str = line.replace("\n", "")
- # temp_list = temp_str.split(",")
- # d = dict()
- # d["mouserid"] = temp_list[0]
- # d["uuid"] = temp_list[1]
- # db.kind_translate.insert_one(d)
- #
- cli.close()
|