#coding=utf-8 ''' Created on 2016年6月7日 @author: uas ''' import os import time os.environ['NLS_LANG']='SIMPLIFIED CHINESE_CHINA.UTF8' from pymongo.mongo_client import MongoClient import cx_Oracle from util_common import Constant class OracleManager(object): def __init__(self): self.conn=cx_Oracle.connect(Constant.ORACLE_URL) def _insert_component_temp(self,component_list): cursor=self.conn.cursor() # sql="create table product$component_temp1 (cmp_id int primary key,cmp_attach varchar(255) ,cmp_img varchar(255),cmp_brid int ,cmp_code varchar(255),cmp_kiid int,cmp_company varchar(255) ,cmp_company_url varchar(255) ,cmp_description varchar(4000),cmp_packaging varchar(255))" # # # sql="drop table PRODUCT$COMPONENT_TEMP1" # cursor.execute(sql) # self.conn.commit() cursor.prepare("INSERT INTO product$component (cmp_id,cmp_attach,cmp_img,cmp_brid,cmp_code,cmp_kiid,cmp_company,cmp_company_url,cmp_description,cmp_packaging) values(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10)") param_oracle=list() error_component_list=list() for index,component in enumerate(component_list): try: param_oracle.append((component['cmp_id'],component['cmp_attach'],component['cmp_img'],component['cmp_brid'],component['cmp_code'],int(component['cmp_kiid']),component['cmp_company'],component['cmp_company_url'],component['description'],component['cmp_packaging'])) except Exception as e: error_component_list.append(component) if index %20000 ==0: cursor.executemany(None,param_oracle) self.conn.commit() param_oracle.clear() print(index) print( len(error_component_list)) cursor.executemany(None,param_oracle) self.conn.commit() cursor.close() def _insert_property(self,property_list): cursor=self.conn.cursor() cursor.prepare('insert into product$property (pt_id,pt_label) values(:1,:2)' ) param_oracle=list() for index,pt in enumerate(property_list): try: param_oracle.append((pt['id'],pt['lableCn'])) except: pass print(index) cursor.executemany(None,param_oracle) self.conn.commit() cursor.close() def craw(self,component_list,propertyvalue_list,index,property_list=None): # self._insert_component_temp(component_list) self._insert_property_value(propertyvalue_list,index) # self._insert_property(property_list) def _insert_property_value(self,propertyvalue_list,inde): cursor=self.conn.cursor() # sql="create table product$propertyvalue_temp1 (pv_id int primary key,pv_componentid int ,pv_detno int ,pv_propertyid int ,pv_value varchar(255))" # # # sql="drop table PRODUCT$COMPONENT_TEMP1" # cursor.execute(sql) # self.conn.commit() # cursor.prepare('insert into product$propertyvalue (pv_id,pv_componentid,pv_detno,pv_propertyid,pv_value) values(:1,:2,:3,:4,:5)') param_oracle=list() for index,pv in enumerate(propertyvalue_list): try: if len(pv["value"])>80: continue param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],pv['value'])) except: param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],None)) # if index%20000==0: # cursor.executemany(None,param_oracle) # self.conn.commit() # param_oracle.clear() if len(param_oracle)!=0: print(1) cursor.executemany(None,param_oracle) self.conn.commit() cursor.close() if __name__=='__main__': Oracle=OracleManager() cli=MongoClient(Constant.MONGODB_URL) db=cli.spider # property_list=db.property_0614.find() # property_list_temp=list() # for index,row in enumerate(property_list): # property_list_temp.append(row) # print(index) # Oracle.craw(None, None, None,None) # component_list=db.component_temp_0614.find() # component_list_temp=list() # for index,row in enumerate(component_list): # component_list_temp.append(row) # print(index) # Oracle.craw(component_list_temp, None, None) simple_check=db.propertyvalue_0614.find({},{'_id':True}) rows_count=simple_check.count() for i in range(1,rows_count): if i%50000==0: propertyvalue_list=db.propertyvalue_0614.find().skip(i-50000).limit(50000) propertyvalue_list_temp=list() for index,row in enumerate(propertyvalue_list): propertyvalue_list_temp.append(row) Oracle.craw(None,propertyvalue_list_temp,i-50000) if rows_count-i<=50000: propertyvalue_list=db.propertyvalue_0614.find().skip(i).limit(rows_count-i) propertyvalue_list_temp=list() for index,row in enumerate(propertyvalue_list): propertyvalue_list_temp.append(row) print(index) Oracle.craw(None,propertyvalue_list_temp,i) break print('remaining missions are ',rows_count-i) print(i)