123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- # coding=utf-8
- '''
- Created on 2016年3月14日
- @author: ChenHao
- '''
- from analysis import detailAnalysis_kind
- from util_common import Constant
- from pymongo.mongo_client import MongoClient
- class DetailAnalysis():
-
- def __init__(self):
- self.analysis_kind = detailAnalysis_kind.DetailAnalysisKind()
-
- # 处理存储类目数据,得到已经分配好id计算好父子id
- def _handle_kind(self, db):
- # 处理存储类目数据,得到已经分配好id计算好父子id
- kinds = self.analysis_kind.craw(db)
- # 将得到的类目存入kind集合
- db.kind.insert_many(kinds)
-
- '''
- @date 2016年3月17日16:17:26
- # 处理存储品牌数据,得到已经分配好id的品牌
- # 25 商标名; 40 商标
- # 优先取商标
- '''
- '''
- @date 2016年3月30日16:17:48
- # 根据统计结果(test/brandAndBrandNameTest.py)
- # 2696条有效测试数据中
- # 每个器件都有brand属性
- # 极少的器件带有brandName属性
-
- ### 所以这里只录入brand的结果
- '''
-
- def _handle_brand(self, db):
- brand_set = set()
- rs_brand = db.propertyvalue.find({"propertyid" : 40})
- for propertyvalue in rs_brand:
- brand_set.add(propertyvalue["value"])
-
- brand_list = list()
- for index, brandName in enumerate(brand_set):
- d = dict()
- d["id"] = index + 1
- d["nameEn"] = brandName
- # 因为系统全部用的Cn,所以这里先装模作样地赋值
- d["nameCn"] = brandName
- brand_list.append(d)
- db.brand.insert_many(brand_list)
-
- # 依据器件类目数据,为原始数据添加kindid属性
- def _add_kindid_for_component(self, db):
- '''
- # 对detail_json循环的时候第二次会丢失temp_kinds,所以需要先取出来
- '''
- temp_kinds = db.kind.find()
- kinds = list()
- for kind in temp_kinds:
- kinds.append(kind)
-
- detail_json_list = db.detail_json.find()
- for detail_json in detail_json_list:
- kindid = self.analysis_kind._get_kindid_by_kindName(detail_json['lastkind'], kinds)
- detail_json["kindid"] = kindid
- # 保存进detail_kindid集合
- db.detail_kindid.save(detail_json)
-
- # 获得所有的label并存入property集合
- def _create_property(self, db):
- property_set = set()
- detail_json_list = db.detail_json.find()
- for detail_json in detail_json_list:
- property_list = detail_json["properties"]
- for pro in property_list:
- '''
- @todo 之前写错了,这里在后面需要改为
- label = pro["label"]
- '''
- label = pro["label"]
- property_set.add(label)
-
- for index, pro in enumerate(property_set):
- d = dict()
- d["id"] = index + 1
- d["labelCn"] = pro
- db.property.insert_one(d)
-
- # 为器件生成propertyvalue集合数据
- def _create_propertyvalue(self, db):
- temp_property = db.property.find()
- property_list = list()
- for kind in temp_property:
- property_list.append(kind)
-
- def _get_propertyid_by_label(label):
- for pro in property_list:
- if pro["labelCn"] == label:
- return pro["id"]
-
- detail_kindid_list = db.detail_json.find()
- for detail_kindid in detail_kindid_list:
- componentid = detail_kindid["id"]
- propertyValues = detail_kindid["properties"]
- for inde, propertyValue in enumerate(propertyValues):
- d = dict()
- d["componentid"] = componentid
- d["propertyid"] = _get_propertyid_by_label(propertyValue["lable"])
- d["detno"] = inde + 1
- d["value"] = propertyValue["value"]
- db.propertyvalue.insert_one(d)
-
- if __name__ == '__main__':
- cli = MongoClient(Constant.MONGODB_URL)
- db = cli.spider
- detailAnalysis_main = DetailAnalysis()
-
- # detailAnalysis_main._handle_kind(db)
-
- # detailAnalysis_main._add_kindid_for_component(db)
-
- # detailAnalysis_main._create_property(db)
-
- # detailAnalysis_main._create_propertyvalue(db)
-
- detailAnalysis_main._handle_brand(db)
-
- cli.close()
-
|