| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- # coding=utf-8
- '''
- Created on 2016年2月26日
- 对类目进行分析
- @author: ChenHao
- '''
- class DetailAnalysisKind(object):
-
- def __init__(self, out_list_input):
- if out_list_input is not None:
- self.out_list = out_list_input
- else:
- self.out_list = []
-
- def _is_in_out_list(self, kindName):
- # print self.out_list
- if len(self.out_list) == 0:
- print("none")
- return False
- else:
- for obj_kind in self.out_list:
- if obj_kind["nameCn"] == kindName:
- return True
-
- return False
-
- def _get_id_form_out_list(self, kindName):
- for obj_kind in self.out_list:
- if obj_kind["nameCn"] == kindName:
- return obj_kind["id"]
- return False
-
- def _add_one_into_out_list(self, kindName, parentid, level):
- id_kind = len(self.out_list) + 1
- d = dict()
- d["id"] = id_kind
- d["nameCn"] = kindName
- d["parentid"] = parentid
- d["level"] = level
-
- self.out_list.append(d)
-
- def _add_into_out_list(self, kind_list):
- # print "type:", type(kind_list)
- # print kind_list
- # print kind_list[0]
- for i, kindName in enumerate(kind_list):
- # 先检查自己在不在数据集合里面
- # print kindName
- if not self._is_in_out_list(kindName):
- # 检查自己的层级
- if i == 0:
- # 如果是顶级
- self._add_one_into_out_list(kindName, 0, 1)
- else:
- # 先找父级
- parentName = kind_list[i - 1]
- parentid = self._get_id_form_out_list(parentName)
- self._add_one_into_out_list(kindName, parentid, i + 1)
-
- def _get_mouser_kindid_by_kindName(self, kindName, kinds_list):
- for obj_kind in kinds_list:
- if kindName == obj_kind["nameCn"]:
- return obj_kind["id"]
-
- def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation):
- for obj_kind in kindsmouser:
- if kindName == obj_kind["nameCn"]:
- mouserid = obj_kind["id"]
- for o in translateRelation:
- if mouserid == o["mouserid"]:
- return o["uuid"]
-
- # 获得父级类目
- def _get_parent_by_parentid(self, parentid):
- for kind in self.out_list:
- if kind["id"] == parentid:
- return kind
-
- return None
-
- # 根据叶子节点id还原整条路径
- def _get_parents_by_leafid(self, leafid):
- kind_list = list()
- kind_leaf = None
- for kind in self.out_list:
- if kind["id"] == leafid:
- kind_leaf = kind
-
- kind_list.append(kind_leaf)
- kind_parent = self._get_parent_by_parentid(kind_leaf["parentid"])
-
- while(kind_parent):
- kind_list.append(kind_parent)
- kind_parent = self._get_parent_by_parentid(kind_parent["parentid"])
-
- if len(kind_list) == 0:
- return None
- else:
- return kind_list
-
- # 最后为结果设置isLeaf字段,并设置
- def _set_isLeaf(self):
- #1、得到所有的parentid
- parentid_set = set()
- for kind in self.out_list:
- parentid_set.add(kind["parentid"])
-
- #2、遍历结果集并设置状态
- for kind in self.out_list:
- if kind["id"] in parentid_set:
- kind["isLeaf"] = False
- else:
- kind["isLeaf"] = True
- # 组装
- kind_list = self._get_parents_by_leafid(kind["id"])
- kind_list.reverse()
- uuidForCmp = ""
- for kd in kind_list:
- uuidForCmp += kd["uuidForKind"]
- # 补全不足的位数
- uuidForCmp += "0" * (9 - len(uuidForCmp))
- kind["uuidForCmp"] = uuidForCmp
-
-
-
-
- # 最后为结果设置detno, uuidForCmp字段
- def _set_detno_and_uuidForCmp(self):
- #1、得到所有的parentid
- parentid_set = set()
- for kind in self.out_list:
- parentid_set.add(kind["parentid"])
-
- #2、遍历结果集并设置状态
- for parentid in parentid_set:
- same_parentid_count = 0
- for kind in self.out_list:
- if kind["parentid"] == parentid:
- kind["detno"] = same_parentid_count + 1
- if kind["level"] == 1:
- kind["uuidForKind"] = str(same_parentid_count + 11)
- elif kind["level"] == 2:
- kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(3)
- else:
- kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(4)
-
- same_parentid_count += 1
-
-
-
-
- def craw(self, db):
- detail_list = db.detail_json.find()
- for detail in detail_list:
- self._add_into_out_list(detail['kinds'])
- return self.out_list
-
- if __name__ == '__main__':
- obj_spider = DetailAnalysisKind()
- out_list = obj_spider.craw()
|