detail_analysis.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月14日
  4. @author: ChenHao
  5. '''
  6. from analysis import detailAnalysis_kind
  7. from util_common import Constant
  8. from pymongo.mongo_client import MongoClient
  9. class DetailAnalysis():
  10. def __init__(self):
  11. self.analysis_kind = detailAnalysis_kind.DetailAnalysisKind()
  12. # 处理存储类目数据,得到已经分配好id计算好父子id
  13. def _handle_kind(self, db):
  14. # 处理存储类目数据,得到已经分配好id计算好父子id
  15. kinds = self.analysis_kind.craw(db)
  16. # 将得到的类目存入kind集合
  17. db.kind.insert_many(kinds)
  18. '''
  19. @date 2016年3月17日16:17:26
  20. # 处理存储品牌数据,得到已经分配好id的品牌
  21. # 25 商标名; 40 商标
  22. # 优先取商标
  23. '''
  24. '''
  25. @date 2016年3月30日16:17:48
  26. # 根据统计结果(test/brandAndBrandNameTest.py)
  27. # 2696条有效测试数据中
  28. # 每个器件都有brand属性
  29. # 极少的器件带有brandName属性
  30. ### 所以这里只录入brand的结果
  31. '''
  32. def _handle_brand(self, db):
  33. brand_set = set()
  34. rs_brand = db.propertyvalue.find({"propertyid" : 40})
  35. for propertyvalue in rs_brand:
  36. brand_set.add(propertyvalue["value"])
  37. brand_list = list()
  38. for index, brandName in enumerate(brand_set):
  39. d = dict()
  40. d["id"] = index + 1
  41. d["nameEn"] = brandName
  42. # 因为系统全部用的Cn,所以这里先装模作样地赋值
  43. d["nameCn"] = brandName
  44. brand_list.append(d)
  45. db.brand.insert_many(brand_list)
  46. # 依据器件类目数据,为原始数据添加kindid属性
  47. def _add_kindid_for_component(self, db):
  48. '''
  49. # 对detail_json循环的时候第二次会丢失temp_kinds,所以需要先取出来
  50. '''
  51. temp_kinds = db.kind.find()
  52. kinds = list()
  53. for kind in temp_kinds:
  54. kinds.append(kind)
  55. detail_json_list = db.detail_json.find()
  56. for detail_json in detail_json_list:
  57. kindid = self.analysis_kind._get_kindid_by_kindName(detail_json['lastkind'], kinds)
  58. detail_json["kindid"] = kindid
  59. # 保存进detail_kindid集合
  60. db.detail_kindid.save(detail_json)
  61. # 获得所有的label并存入property集合
  62. def _create_property(self, db):
  63. property_set = set()
  64. detail_json_list = db.detail_json.find()
  65. for detail_json in detail_json_list:
  66. property_list = detail_json["properties"]
  67. for pro in property_list:
  68. '''
  69. @todo 之前写错了,这里在后面需要改为
  70. label = pro["label"]
  71. '''
  72. label = pro["label"]
  73. property_set.add(label)
  74. for index, pro in enumerate(property_set):
  75. d = dict()
  76. d["id"] = index + 1
  77. d["labelCn"] = pro
  78. db.property.insert_one(d)
  79. # 为器件生成propertyvalue集合数据
  80. def _create_propertyvalue(self, db):
  81. temp_property = db.property.find()
  82. property_list = list()
  83. for kind in temp_property:
  84. property_list.append(kind)
  85. def _get_propertyid_by_label(label):
  86. for pro in property_list:
  87. if pro["labelCn"] == label:
  88. return pro["id"]
  89. detail_kindid_list = db.detail_json.find()
  90. for detail_kindid in detail_kindid_list:
  91. componentid = detail_kindid["id"]
  92. propertyValues = detail_kindid["properties"]
  93. for inde, propertyValue in enumerate(propertyValues):
  94. d = dict()
  95. d["componentid"] = componentid
  96. d["propertyid"] = _get_propertyid_by_label(propertyValue["lable"])
  97. d["detno"] = inde + 1
  98. d["value"] = propertyValue["value"]
  99. db.propertyvalue.insert_one(d)
  100. if __name__ == '__main__':
  101. cli = MongoClient(Constant.MONGODB_URL)
  102. db = cli.spider
  103. detailAnalysis_main = DetailAnalysis()
  104. # detailAnalysis_main._handle_kind(db)
  105. # detailAnalysis_main._add_kindid_for_component(db)
  106. # detailAnalysis_main._create_property(db)
  107. # detailAnalysis_main._create_propertyvalue(db)
  108. detailAnalysis_main._handle_brand(db)
  109. cli.close()