# coding=utf-8 ''' Created on 2016年2月26日 对类目进行分析 @author: ChenHao ''' class DetailAnalysisKind(object): def __init__(self, out_list_input): if out_list_input is not None: self.out_list = out_list_input else: self.out_list = [] def _is_in_out_list(self, kindName): # print self.out_list if len(self.out_list) == 0: print("none") return False else: for obj_kind in self.out_list: if obj_kind["nameCn"] == kindName: return True return False def _get_id_form_out_list(self, kindName): for obj_kind in self.out_list: if obj_kind["nameCn"] == kindName: return obj_kind["id"] return False def _add_one_into_out_list(self, kindName, parentid, level): id_kind = len(self.out_list) + 1 d = dict() d["id"] = id_kind d["nameCn"] = kindName d["parentid"] = parentid d["level"] = level self.out_list.append(d) def _add_into_out_list(self, kind_list): # print "type:", type(kind_list) # print kind_list # print kind_list[0] for i, kindName in enumerate(kind_list): # 先检查自己在不在数据集合里面 # print kindName if not self._is_in_out_list(kindName): # 检查自己的层级 if i == 0: # 如果是顶级 self._add_one_into_out_list(kindName, 0, 1) else: # 先找父级 parentName = kind_list[i - 1] parentid = self._get_id_form_out_list(parentName) self._add_one_into_out_list(kindName, parentid, i + 1) def _get_mouser_kindid_by_kindName(self, kindName, kinds_list): for obj_kind in kinds_list: if kindName == obj_kind["nameCn"]: return obj_kind["id"] def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation): for obj_kind in kindsmouser: if kindName == obj_kind["nameCn"]: mouserid = obj_kind["id"] for o in translateRelation: if mouserid == o["mouserid"]: return o["uuid"] # 获得父级类目 def _get_parent_by_parentid(self, parentid): for kind in self.out_list: if kind["id"] == parentid: return kind return None # 根据叶子节点id还原整条路径 def _get_parents_by_leafid(self, leafid): kind_list = list() kind_leaf = None for kind in self.out_list: if kind["id"] == leafid: kind_leaf = kind kind_list.append(kind_leaf) kind_parent = self._get_parent_by_parentid(kind_leaf["parentid"]) while(kind_parent): kind_list.append(kind_parent) kind_parent = self._get_parent_by_parentid(kind_parent["parentid"]) if len(kind_list) == 0: return None else: return kind_list # 最后为结果设置isLeaf字段,并设置 def _set_isLeaf(self): #1、得到所有的parentid parentid_set = set() for kind in self.out_list: parentid_set.add(kind["parentid"]) #2、遍历结果集并设置状态 for kind in self.out_list: if kind["id"] in parentid_set: kind["isLeaf"] = False else: kind["isLeaf"] = True # 组装 kind_list = self._get_parents_by_leafid(kind["id"]) kind_list.reverse() uuidForCmp = "" for kd in kind_list: uuidForCmp += kd["uuidForKind"] # 补全不足的位数 uuidForCmp += "0" * (9 - len(uuidForCmp)) kind["uuidForCmp"] = uuidForCmp # 最后为结果设置detno, uuidForCmp字段 def _set_detno_and_uuidForCmp(self): #1、得到所有的parentid parentid_set = set() for kind in self.out_list: parentid_set.add(kind["parentid"]) #2、遍历结果集并设置状态 for parentid in parentid_set: same_parentid_count = 0 for kind in self.out_list: if kind["parentid"] == parentid: kind["detno"] = same_parentid_count + 1 if kind["level"] == 1: kind["uuidForKind"] = str(same_parentid_count + 11) elif kind["level"] == 2: kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(3) else: kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(4) same_parentid_count += 1 def craw(self, db): detail_list = db.detail_json.find() for detail in detail_list: self._add_into_out_list(detail['kinds']) return self.out_list if __name__ == '__main__': obj_spider = DetailAnalysisKind() out_list = obj_spider.craw()