init_kind_translate_20160427145412.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # coding=utf-8
  2. '''
  3. Created on 2016年4月6日
  4. 新增新的类目转换关系
  5. @author: ChenHao
  6. '''
  7. from pymongo.mongo_client import MongoClient
  8. from pip._vendor.distlib.util import CONSTRAINTS
  9. from util_common import Constant
  10. cli = MongoClient(Constant.MONGODB_URL)
  11. db = cli.spider
  12. file_path = "../spider_download/Other/newTranslate20160427145412.csv"
  13. '''
  14. 1、将转换关系存入mongodb
  15. '''
  16. fin = open(file_path, "r")
  17. lines = fin.readlines()
  18. fin.close()
  19. '''
  20. 去除已有的数据
  21. '''
  22. old_kindid_mouser_set = set()
  23. rs_old = db.kind_translate.find({})
  24. old_kindid_mouser_set = set(r["mouserid"] for r in rs_old)
  25. print (old_kindid_mouser_set)
  26. relation_final_list = list()
  27. for line in lines:
  28. str_line = line.strip().replace("\n", "")
  29. arr_line = str_line.split(",")
  30. print (str_line)
  31. kindid_mouser = int(arr_line[0])
  32. kindid_uu = int(arr_line[1])
  33. if kindid_mouser not in old_kindid_mouser_set:
  34. d = dict()
  35. d["mouserid"] = kindid_mouser
  36. d["uuid"] = kindid_uu
  37. relation_final_list.append(d)
  38. print (relation_final_list)
  39. print (len(relation_final_list))
  40. # for line in lines:
  41. # str_line = line.replace("\n", "")
  42. # arr_line = str_line.split(",")
  43. # try:
  44. # # 这里只用检查第一个
  45. # arr_line[0] = int(arr_line[0])
  46. # arr_line[1] = int(arr_line[1])
  47. # relation_list.append(arr_line)
  48. # except:
  49. # pass
  50. # temp_set = set()
  51. # relation_final_list = list()
  52. # for rl in relation_list:
  53. # if rl[0] not in temp_set:
  54. # temp_set.add(rl[0])
  55. # d = dict()
  56. # d["mouserid"] = rl[0]
  57. # d["uuid"] = rl[1]
  58. # relation_final_list.append(d)
  59. # print (relation_final_list)
  60. db.kind_translate.insert_many(relation_final_list)
  61. # cli = MongoClient(Constant.MONGODB_URL)
  62. # db = cli.spider
  63. # for line in lines:
  64. # temp_str = line.replace("\n", "")
  65. # temp_list = temp_str.split(",")
  66. # d = dict()
  67. # d["mouserid"] = temp_list[0]
  68. # d["uuid"] = temp_list[1]
  69. # db.kind_translate.insert_one(d)
  70. #
  71. cli.close()