detailAnalysis_kind.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # coding=utf-8
  2. '''
  3. Created on 2016年2月26日
  4. 对类目进行分析
  5. @author: ChenHao
  6. '''
  7. class DetailAnalysisKind(object):
  8. def __init__(self, out_list_input):
  9. if out_list_input is not None:
  10. self.out_list = out_list_input
  11. else:
  12. self.out_list = []
  13. def _is_in_out_list(self, kindName):
  14. # print self.out_list
  15. if len(self.out_list) == 0:
  16. print("none")
  17. return False
  18. else:
  19. for obj_kind in self.out_list:
  20. if obj_kind["nameCn"] == kindName:
  21. return True
  22. return False
  23. def _get_id_form_out_list(self, kindName):
  24. for obj_kind in self.out_list:
  25. if obj_kind["nameCn"] == kindName:
  26. return obj_kind["id"]
  27. return False
  28. def _add_one_into_out_list(self, kindName, parentid, level):
  29. id_kind = len(self.out_list) + 1
  30. d = dict()
  31. d["id"] = id_kind
  32. d["nameCn"] = kindName
  33. d["parentid"] = parentid
  34. d["level"] = level
  35. self.out_list.append(d)
  36. def _add_into_out_list(self, kind_list):
  37. # print "type:", type(kind_list)
  38. # print kind_list
  39. # print kind_list[0]
  40. for i, kindName in enumerate(kind_list):
  41. # 先检查自己在不在数据集合里面
  42. # print kindName
  43. if not self._is_in_out_list(kindName):
  44. # 检查自己的层级
  45. if i == 0:
  46. # 如果是顶级
  47. self._add_one_into_out_list(kindName, 0, 1)
  48. else:
  49. # 先找父级
  50. parentName = kind_list[i - 1]
  51. parentid = self._get_id_form_out_list(parentName)
  52. self._add_one_into_out_list(kindName, parentid, i + 1)
  53. def _get_mouser_kindid_by_kindName(self, kindName, kinds_list):
  54. for obj_kind in kinds_list:
  55. if kindName == obj_kind["nameCn"]:
  56. return obj_kind["id"]
  57. def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation):
  58. for obj_kind in kindsmouser:
  59. if kindName == obj_kind["nameCn"]:
  60. mouserid = obj_kind["id"]
  61. for o in translateRelation:
  62. if mouserid == o["mouserid"]:
  63. return o["uuid"]
  64. # 获得父级类目
  65. def _get_parent_by_parentid(self, parentid):
  66. for kind in self.out_list:
  67. if kind["id"] == parentid:
  68. return kind
  69. return None
  70. # 根据叶子节点id还原整条路径
  71. def _get_parents_by_leafid(self, leafid):
  72. kind_list = list()
  73. kind_leaf = None
  74. for kind in self.out_list:
  75. if kind["id"] == leafid:
  76. kind_leaf = kind
  77. kind_list.append(kind_leaf)
  78. kind_parent = self._get_parent_by_parentid(kind_leaf["parentid"])
  79. while(kind_parent):
  80. kind_list.append(kind_parent)
  81. kind_parent = self._get_parent_by_parentid(kind_parent["parentid"])
  82. if len(kind_list) == 0:
  83. return None
  84. else:
  85. return kind_list
  86. # 最后为结果设置isLeaf字段,并设置
  87. def _set_isLeaf(self):
  88. #1、得到所有的parentid
  89. parentid_set = set()
  90. for kind in self.out_list:
  91. parentid_set.add(kind["parentid"])
  92. #2、遍历结果集并设置状态
  93. for kind in self.out_list:
  94. if kind["id"] in parentid_set:
  95. kind["isLeaf"] = False
  96. else:
  97. kind["isLeaf"] = True
  98. # 组装
  99. kind_list = self._get_parents_by_leafid(kind["id"])
  100. kind_list.reverse()
  101. uuidForCmp = ""
  102. for kd in kind_list:
  103. uuidForCmp += kd["uuidForKind"]
  104. # 补全不足的位数
  105. uuidForCmp += "0" * (9 - len(uuidForCmp))
  106. kind["uuidForCmp"] = uuidForCmp
  107. # 最后为结果设置detno, uuidForCmp字段
  108. def _set_detno_and_uuidForCmp(self):
  109. #1、得到所有的parentid
  110. parentid_set = set()
  111. for kind in self.out_list:
  112. parentid_set.add(kind["parentid"])
  113. #2、遍历结果集并设置状态
  114. for parentid in parentid_set:
  115. same_parentid_count = 0
  116. for kind in self.out_list:
  117. if kind["parentid"] == parentid:
  118. kind["detno"] = same_parentid_count + 1
  119. if kind["level"] == 1:
  120. kind["uuidForKind"] = str(same_parentid_count + 11)
  121. elif kind["level"] == 2:
  122. kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(3)
  123. else:
  124. kind["uuidForKind"] = (str(same_parentid_count + 1)).zfill(4)
  125. same_parentid_count += 1
  126. def craw(self, db):
  127. detail_list = db.detail_json.find()
  128. for detail in detail_list:
  129. self._add_into_out_list(detail['kinds'])
  130. return self.out_list
  131. if __name__ == '__main__':
  132. obj_spider = DetailAnalysisKind()
  133. out_list = obj_spider.craw()