# coding=utf-8 ''' Created on 2016年3月14日 @author: ChenHao ''' from analysis import detailAnalysis_kind from util_common import Constant from pymongo.mongo_client import MongoClient class DetailAnalysis(): def __init__(self): self.analysis_kind = detailAnalysis_kind.DetailAnalysisKind() # 处理存储类目数据,得到已经分配好id计算好父子id def _handle_kind(self, db): # 处理存储类目数据,得到已经分配好id计算好父子id kinds = self.analysis_kind.craw(db) # 将得到的类目存入kind集合 db.kind.insert_many(kinds) ''' @date 2016年3月17日16:17:26 # 处理存储品牌数据,得到已经分配好id的品牌 # 25 商标名; 40 商标 # 优先取商标 ''' ''' @date 2016年3月30日16:17:48 # 根据统计结果(test/brandAndBrandNameTest.py) # 2696条有效测试数据中 # 每个器件都有brand属性 # 极少的器件带有brandName属性 ### 所以这里只录入brand的结果 ''' def _handle_brand(self, db): brand_set = set() rs_brand = db.propertyvalue.find({"propertyid" : 40}) for propertyvalue in rs_brand: brand_set.add(propertyvalue["value"]) brand_list = list() for index, brandName in enumerate(brand_set): d = dict() d["id"] = index + 1 d["nameEn"] = brandName # 因为系统全部用的Cn,所以这里先装模作样地赋值 d["nameCn"] = brandName brand_list.append(d) db.brand.insert_many(brand_list) # 依据器件类目数据,为原始数据添加kindid属性 def _add_kindid_for_component(self, db): ''' # 对detail_json循环的时候第二次会丢失temp_kinds,所以需要先取出来 ''' temp_kinds = db.kind.find() kinds = list() for kind in temp_kinds: kinds.append(kind) detail_json_list = db.detail_json.find() for detail_json in detail_json_list: kindid = self.analysis_kind._get_kindid_by_kindName(detail_json['lastkind'], kinds) detail_json["kindid"] = kindid # 保存进detail_kindid集合 db.detail_kindid.save(detail_json) # 获得所有的label并存入property集合 def _create_property(self, db): property_set = set() detail_json_list = db.detail_json.find() for detail_json in detail_json_list: property_list = detail_json["properties"] for pro in property_list: ''' @todo 之前写错了,这里在后面需要改为 label = pro["label"] ''' label = pro["label"] property_set.add(label) for index, pro in enumerate(property_set): d = dict() d["id"] = index + 1 d["labelCn"] = pro db.property.insert_one(d) # 为器件生成propertyvalue集合数据 def _create_propertyvalue(self, db): temp_property = db.property.find() property_list = list() for kind in temp_property: property_list.append(kind) def _get_propertyid_by_label(label): for pro in property_list: if pro["labelCn"] == label: return pro["id"] detail_kindid_list = db.detail_json.find() for detail_kindid in detail_kindid_list: componentid = detail_kindid["id"] propertyValues = detail_kindid["properties"] for inde, propertyValue in enumerate(propertyValues): d = dict() d["componentid"] = componentid d["propertyid"] = _get_propertyid_by_label(propertyValue["lable"]) d["detno"] = inde + 1 d["value"] = propertyValue["value"] db.propertyvalue.insert_one(d) if __name__ == '__main__': cli = MongoClient(Constant.MONGODB_URL) db = cli.spider detailAnalysis_main = DetailAnalysis() # detailAnalysis_main._handle_kind(db) # detailAnalysis_main._add_kindid_for_component(db) # detailAnalysis_main._create_property(db) # detailAnalysis_main._create_propertyvalue(db) detailAnalysis_main._handle_brand(db) cli.close()