init_kind_uu.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # coding=utf-8
  2. '''
  3. Created on 2016年4月5日
  4. 使用【kind_uu.csv】文件得到uu的类目
  5. @author: ChenHao
  6. '''
  7. from analysis import detailAnalysis_kind
  8. from pymongo.mongo_client import MongoClient
  9. from util_common import Constant
  10. file_path = "../spider_download/Other/kind_uu.csv"
  11. '''
  12. 1、根据提供的分类数据,得到解析所需的数组
  13. PS:在wps里面将文件转成csv时,考虑到内容中","的影响,先手动将全文的","(English)替换为","(汉字);在存储时还原
  14. '''
  15. fin = open(file_path, "r")
  16. lines = fin.readlines()
  17. fin.close()
  18. kinds_list = list()
  19. for index, line in enumerate(lines):
  20. temp_str = line.replace("\n", "")
  21. temp_list = temp_str.split(",")
  22. kind_list = list()
  23. for tl in temp_list[2:]:
  24. if tl is not "":
  25. kind_list.append(tl.replace(",", ","))
  26. print (index, kind_list)
  27. kinds_list.append(kind_list)
  28. '''
  29. 2、解析得到分类并存入mongodb数据库
  30. '''
  31. kindAnalysis = detailAnalysis_kind.DetailAnalysisKind(None)
  32. for kind_list in kinds_list:
  33. kindAnalysis._add_into_out_list(kind_list)
  34. kindAnalysis._set_detno_and_uuidForCmp()
  35. kindAnalysis._set_isLeaf()
  36. kind_uu = kindAnalysis.out_list
  37. print (kind_uu)
  38. '''
  39. 3、存入数据库
  40. '''
  41. cli = MongoClient(Constant.MONGODB_URL)
  42. db = cli.spider
  43. db.kind_uu.insert_many(kind_uu)
  44. cli.close()