testNewTable.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. #coding=utf-8
  2. from pymongo.mongo_client import MongoClient
  3. from util_common import Constant
  4. '''
  5. Created on 2016年6月6日
  6. @author: uas
  7. '''
  8. class CreateComponent_temp(object):
  9. def __init__(self,db):
  10. self.component_temp=dict()
  11. self.db=db
  12. def _get_brandid_by_brandName(self,brandName,brand):
  13. for row in brand:
  14. if row['nameCn']==brandName:
  15. return row['id']
  16. def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation):
  17. for obj_kind in kindsmouser:
  18. if kindName == obj_kind["nameCn"]:
  19. mouserid = obj_kind["id"]
  20. for o in translateRelation:
  21. if mouserid == o["mouserid"]:
  22. return o["uuid"]
  23. def _get_packaging(self,row):
  24. for propertyValue in row['properties']:
  25. if propertyValue['lable']=='封装':
  26. brandName=propertyValue['value']
  27. def craw(self):
  28. error_info=self.db.error_img_url_uu.find()
  29. error_list=list()
  30. for row in error_info:
  31. error_list.append(row["url"])
  32. rows=self.db.component_original.find().limit(100)
  33. rows_in_kind=self.db.kind_0607.find()
  34. rows_in_translation=self.db.kind_translate.find()
  35. rows_in_brand=self.db.brand_0607.find()
  36. print(rows.count())
  37. for r,index in enumerate(rows):
  38. self.component_temp=dict()
  39. self.component_temp['cmp_id']=index+1
  40. self.component_temp['cmp_code']=r['code']
  41. self.component_temp['cmp_compeny']=r['company']
  42. self.component_temp['cmp_company_url']=r['compamny_url']
  43. self.component_temp['description']=r['description']
  44. try:
  45. if r['img_url_uu'] in error_list:
  46. self.component_temp['cmp_img']=''
  47. else:
  48. self.component_temp['cmp_img']=r['img_url_uu']
  49. self.component_temp['cmp_attach']=r['attachurl_uu']
  50. except KeyError as e:
  51. self.component_temp['cmp_img']=''
  52. try:
  53. if r['attachTask']!=2:
  54. self.component_temp['cmp_attach']=''
  55. else:
  56. self.component_temp['cmp_attach']=r['attachUrl']
  57. except KeyError as e :
  58. self.component_temp['cmp_attach']=''
  59. for propertyValue in r['properties']:
  60. if propertyValue['lable']=='商标':
  61. brandName=propertyValue['value']
  62. break
  63. self.component_temp['cmp_brid']=self._get_brandid_by_brandName(brandName,rows_in_brand)
  64. self.component_temp['cmp_kiid']=self._get_uu_kindid_by_kindName(r['lastkind'], rows_in_kind, rows_in_translation)
  65. self.component_temp['cmp_packaging']=self._get_packaging(r)
  66. self.db.component_temp_0607.insert_one(self.component_temp)
  67. if __name__=="__main__":
  68. cli=MongoClient(Constant.MONGODB_URL)
  69. db=cli.spider
  70. creatComponent_temp_main=CreateComponent_temp(db)
  71. CreateComponent_temp.craw()
  72. # Cmp_id
  73. # Cmp_attach
  74. # Cmp_img
  75. # Cmp_brid
  76. # Cmp_code
  77. # Cmp_kiid
  78. # Cmp_uuid
  79. # Cmp_company
  80. # Cmp_company_url
  81. # Cmp_create_time
  82. # Cmp_description
  83. # Cmp_packaging