| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- #coding=utf-8
- from pymongo.mongo_client import MongoClient
- from util_common import Constant
- '''
- Created on 2016年6月6日
- @author: uas
- '''
- class CreateComponent_temp(object):
-
- def __init__(self,db):
- self.component_temp=dict()
- self.db=db
-
- def _get_brandid_by_brandName(self,brandName,brand):
- for row in brand:
- if row['nameCn']==brandName:
- return row['id']
-
-
-
- def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation):
-
- for obj_kind in kindsmouser:
- if kindName == obj_kind["nameCn"]:
- mouserid = obj_kind["id"]
- for o in translateRelation:
- if mouserid == o["mouserid"]:
- return o["uuid"]
-
- def _get_packaging(self,row):
-
- for propertyValue in row['properties']:
- if propertyValue['lable']=='封装':
- brandName=propertyValue['value']
-
-
- def craw(self):
- error_info=self.db.error_img_url_uu.find()
- error_list=list()
- for row in error_info:
- error_list.append(row["url"])
-
-
-
-
-
-
- rows=self.db.component_original.find().limit(100)
- rows_in_kind=self.db.kind_0607.find()
- rows_in_translation=self.db.kind_translate.find()
- rows_in_brand=self.db.brand_0607.find()
- print(rows.count())
- for r,index in enumerate(rows):
- self.component_temp=dict()
- self.component_temp['cmp_id']=index+1
-
- self.component_temp['cmp_code']=r['code']
-
- self.component_temp['cmp_compeny']=r['company']
- self.component_temp['cmp_company_url']=r['compamny_url']
- self.component_temp['description']=r['description']
- try:
- if r['img_url_uu'] in error_list:
- self.component_temp['cmp_img']=''
- else:
- self.component_temp['cmp_img']=r['img_url_uu']
-
- self.component_temp['cmp_attach']=r['attachurl_uu']
-
- except KeyError as e:
- self.component_temp['cmp_img']=''
-
-
- try:
- if r['attachTask']!=2:
- self.component_temp['cmp_attach']=''
- else:
- self.component_temp['cmp_attach']=r['attachUrl']
- except KeyError as e :
- self.component_temp['cmp_attach']=''
-
-
-
- for propertyValue in r['properties']:
- if propertyValue['lable']=='商标':
- brandName=propertyValue['value']
- break
-
- self.component_temp['cmp_brid']=self._get_brandid_by_brandName(brandName,rows_in_brand)
- self.component_temp['cmp_kiid']=self._get_uu_kindid_by_kindName(r['lastkind'], rows_in_kind, rows_in_translation)
- self.component_temp['cmp_packaging']=self._get_packaging(r)
- self.db.component_temp_0607.insert_one(self.component_temp)
-
- if __name__=="__main__":
- cli=MongoClient(Constant.MONGODB_URL)
- db=cli.spider
- creatComponent_temp_main=CreateComponent_temp(db)
- CreateComponent_temp.craw()
-
-
- # Cmp_id
- # Cmp_attach
- # Cmp_img
- # Cmp_brid
- # Cmp_code
- # Cmp_kiid
- # Cmp_uuid
- # Cmp_company
- # Cmp_company_url
- # Cmp_create_time
- # Cmp_description
- # Cmp_packaging
|