#coding=utf-8 from pymongo.mongo_client import MongoClient from util_common import Constant ''' Created on 2016年6月6日 @author: uas ''' class CreateComponent_temp(object): def __init__(self,db): self.component_temp=dict() self.db=db def _get_brandid_by_brandName(self,brandName,brand): for row in brand: if row['nameCn']==brandName: return row['id'] def _get_uu_kindid_by_kindName(self, kindName, kindsmouser, translateRelation): for obj_kind in kindsmouser: if kindName == obj_kind["nameCn"]: mouserid = obj_kind["id"] for o in translateRelation: if mouserid == o["mouserid"]: return o["uuid"] def _get_packaging(self,row): for propertyValue in row['properties']: if propertyValue['lable']=='封装': brandName=propertyValue['value'] def craw(self): error_info=self.db.error_img_url_uu.find() error_list=list() for row in error_info: error_list.append(row["url"]) rows=self.db.component_original.find().limit(100) rows_in_kind=self.db.kind_0607.find() rows_in_translation=self.db.kind_translate.find() rows_in_brand=self.db.brand_0607.find() print(rows.count()) for r,index in enumerate(rows): self.component_temp=dict() self.component_temp['cmp_id']=index+1 self.component_temp['cmp_code']=r['code'] self.component_temp['cmp_compeny']=r['company'] self.component_temp['cmp_company_url']=r['compamny_url'] self.component_temp['description']=r['description'] try: if r['img_url_uu'] in error_list: self.component_temp['cmp_img']='' else: self.component_temp['cmp_img']=r['img_url_uu'] self.component_temp['cmp_attach']=r['attachurl_uu'] except KeyError as e: self.component_temp['cmp_img']='' try: if r['attachTask']!=2: self.component_temp['cmp_attach']='' else: self.component_temp['cmp_attach']=r['attachUrl'] except KeyError as e : self.component_temp['cmp_attach']='' for propertyValue in r['properties']: if propertyValue['lable']=='商标': brandName=propertyValue['value'] break self.component_temp['cmp_brid']=self._get_brandid_by_brandName(brandName,rows_in_brand) self.component_temp['cmp_kiid']=self._get_uu_kindid_by_kindName(r['lastkind'], rows_in_kind, rows_in_translation) self.component_temp['cmp_packaging']=self._get_packaging(r) self.db.component_temp_0607.insert_one(self.component_temp) if __name__=="__main__": cli=MongoClient(Constant.MONGODB_URL) db=cli.spider creatComponent_temp_main=CreateComponent_temp(db) CreateComponent_temp.craw() # Cmp_id # Cmp_attach # Cmp_img # Cmp_brid # Cmp_code # Cmp_kiid # Cmp_uuid # Cmp_company # Cmp_company_url # Cmp_create_time # Cmp_description # Cmp_packaging