123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- #coding=utf-8
- '''
- Created on 2016年6月7日
- @author: uas
- '''
- import os
- import time
- os.environ['NLS_LANG']='SIMPLIFIED CHINESE_CHINA.UTF8'
- from pymongo.mongo_client import MongoClient
- import cx_Oracle
- from util_common import Constant
- class OracleManager(object):
- def __init__(self):
- self.conn=cx_Oracle.connect(Constant.ORACLE_URL)
-
-
- def _insert_component_temp(self,component_list):
-
- cursor=self.conn.cursor()
- # sql="create table product$component_temp1 (cmp_id int primary key,cmp_attach varchar(255) ,cmp_img varchar(255),cmp_brid int ,cmp_code varchar(255),cmp_kiid int,cmp_company varchar(255) ,cmp_company_url varchar(255) ,cmp_description varchar(4000),cmp_packaging varchar(255))"
- #
- # # sql="drop table PRODUCT$COMPONENT_TEMP1"
- # cursor.execute(sql)
- # self.conn.commit()
- cursor.prepare("INSERT INTO product$component (cmp_id,cmp_attach,cmp_img,cmp_brid,cmp_code,cmp_kiid,cmp_company,cmp_company_url,cmp_description,cmp_packaging) values(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10)")
- param_oracle=list()
- error_component_list=list()
- for index,component in enumerate(component_list):
- try:
- param_oracle.append((component['cmp_id'],component['cmp_attach'],component['cmp_img'],component['cmp_brid'],component['cmp_code'],int(component['cmp_kiid']),component['cmp_company'],component['cmp_company_url'],component['description'],component['cmp_packaging']))
- except Exception as e:
- error_component_list.append(component)
-
-
- if index %20000 ==0:
- cursor.executemany(None,param_oracle)
- self.conn.commit()
- param_oracle.clear()
- print(index)
-
- print( len(error_component_list))
- cursor.executemany(None,param_oracle)
- self.conn.commit()
- cursor.close()
-
- def _insert_property(self,property_list):
-
- cursor=self.conn.cursor()
- cursor.prepare('insert into product$property (pt_id,pt_label) values(:1,:2)' )
- param_oracle=list()
-
- for index,pt in enumerate(property_list):
- try:
- param_oracle.append((pt['id'],pt['lableCn']))
- except:
- pass
- print(index)
- cursor.executemany(None,param_oracle)
- self.conn.commit()
- cursor.close()
-
-
- def craw(self,component_list,propertyvalue_list,index,property_list=None):
- # self._insert_component_temp(component_list)
- self._insert_property_value(propertyvalue_list,index)
- # self._insert_property(property_list)
-
-
-
-
- def _insert_property_value(self,propertyvalue_list,inde):
- cursor=self.conn.cursor()
- # sql="create table product$propertyvalue_temp1 (pv_id int primary key,pv_componentid int ,pv_detno int ,pv_propertyid int ,pv_value varchar(255))"
- #
- # # sql="drop table PRODUCT$COMPONENT_TEMP1"
- # cursor.execute(sql)
- # self.conn.commit()
- #
- cursor.prepare('insert into product$propertyvalue (pv_id,pv_componentid,pv_detno,pv_propertyid,pv_value) values(:1,:2,:3,:4,:5)')
- param_oracle=list()
- for index,pv in enumerate(propertyvalue_list):
- try:
- if len(pv["value"])>80:
- continue
- param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],pv['value']))
- except:
- param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],None))
- # if index%20000==0:
- # cursor.executemany(None,param_oracle)
- # self.conn.commit()
- # param_oracle.clear()
-
-
-
- if len(param_oracle)!=0:
- print(1)
- cursor.executemany(None,param_oracle)
- self.conn.commit()
- cursor.close()
-
- if __name__=='__main__':
- Oracle=OracleManager()
- cli=MongoClient(Constant.MONGODB_URL)
- db=cli.spider
- # property_list=db.property_0614.find()
- # property_list_temp=list()
- # for index,row in enumerate(property_list):
- # property_list_temp.append(row)
- # print(index)
- # Oracle.craw(None, None, None,None)
-
-
- # component_list=db.component_temp_0614.find()
- # component_list_temp=list()
- # for index,row in enumerate(component_list):
- # component_list_temp.append(row)
- # print(index)
- # Oracle.craw(component_list_temp, None, None)
-
- simple_check=db.propertyvalue_0614.find({},{'_id':True})
- rows_count=simple_check.count()
- for i in range(1,rows_count):
- if i%50000==0:
- propertyvalue_list=db.propertyvalue_0614.find().skip(i-50000).limit(50000)
- propertyvalue_list_temp=list()
- for index,row in enumerate(propertyvalue_list):
- propertyvalue_list_temp.append(row)
- Oracle.craw(None,propertyvalue_list_temp,i-50000)
- if rows_count-i<=50000:
- propertyvalue_list=db.propertyvalue_0614.find().skip(i).limit(rows_count-i)
- propertyvalue_list_temp=list()
- for index,row in enumerate(propertyvalue_list):
- propertyvalue_list_temp.append(row)
- print(index)
- Oracle.craw(None,propertyvalue_list_temp,i)
- break
- print('remaining missions are ',rows_count-i)
- print(i)
|