testInsertcomponent.py.svn-base 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #coding=utf-8
  2. '''
  3. Created on 2016年6月7日
  4. @author: uas
  5. '''
  6. import os
  7. import time
  8. os.environ['NLS_LANG']='SIMPLIFIED CHINESE_CHINA.UTF8'
  9. from pymongo.mongo_client import MongoClient
  10. import cx_Oracle
  11. from util_common import Constant
  12. class OracleManager(object):
  13. def __init__(self):
  14. self.conn=cx_Oracle.connect(Constant.ORACLE_URL)
  15. def _insert_component_temp(self,component_list):
  16. cursor=self.conn.cursor()
  17. # sql="create table product$component_temp1 (cmp_id int primary key,cmp_attach varchar(255) ,cmp_img varchar(255),cmp_brid int ,cmp_code varchar(255),cmp_kiid int,cmp_company varchar(255) ,cmp_company_url varchar(255) ,cmp_description varchar(4000),cmp_packaging varchar(255))"
  18. #
  19. # # sql="drop table PRODUCT$COMPONENT_TEMP1"
  20. # cursor.execute(sql)
  21. # self.conn.commit()
  22. cursor.prepare("INSERT INTO product$component (cmp_id,cmp_attach,cmp_img,cmp_brid,cmp_code,cmp_kiid,cmp_company,cmp_company_url,cmp_description,cmp_packaging) values(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10)")
  23. param_oracle=list()
  24. error_component_list=list()
  25. for index,component in enumerate(component_list):
  26. try:
  27. param_oracle.append((component['cmp_id'],component['cmp_attach'],component['cmp_img'],component['cmp_brid'],component['cmp_code'],int(component['cmp_kiid']),component['cmp_company'],component['cmp_company_url'],component['description'],component['cmp_packaging']))
  28. except Exception as e:
  29. error_component_list.append(component)
  30. if index %20000 ==0:
  31. cursor.executemany(None,param_oracle)
  32. self.conn.commit()
  33. param_oracle.clear()
  34. print(index)
  35. print( len(error_component_list))
  36. cursor.executemany(None,param_oracle)
  37. self.conn.commit()
  38. cursor.close()
  39. def _insert_property(self,property_list):
  40. cursor=self.conn.cursor()
  41. cursor.prepare('insert into product$property (pt_id,pt_label) values(:1,:2)' )
  42. param_oracle=list()
  43. for index,pt in enumerate(property_list):
  44. try:
  45. param_oracle.append((pt['id'],pt['lableCn']))
  46. except:
  47. pass
  48. print(index)
  49. cursor.executemany(None,param_oracle)
  50. self.conn.commit()
  51. cursor.close()
  52. def craw(self,component_list,propertyvalue_list,index,property_list=None):
  53. # self._insert_component_temp(component_list)
  54. self._insert_property_value(propertyvalue_list,index)
  55. # self._insert_property(property_list)
  56. def _insert_property_value(self,propertyvalue_list,inde):
  57. cursor=self.conn.cursor()
  58. # sql="create table product$propertyvalue_temp1 (pv_id int primary key,pv_componentid int ,pv_detno int ,pv_propertyid int ,pv_value varchar(255))"
  59. #
  60. # # sql="drop table PRODUCT$COMPONENT_TEMP1"
  61. # cursor.execute(sql)
  62. # self.conn.commit()
  63. #
  64. cursor.prepare('insert into product$propertyvalue (pv_id,pv_componentid,pv_detno,pv_propertyid,pv_value) values(:1,:2,:3,:4,:5)')
  65. param_oracle=list()
  66. for index,pv in enumerate(propertyvalue_list):
  67. try:
  68. if len(pv["value"])>80:
  69. continue
  70. param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],pv['value']))
  71. except:
  72. param_oracle.append((inde+index+1,pv['componentid'],pv['detno'],pv['propertyid'],None))
  73. # if index%20000==0:
  74. # cursor.executemany(None,param_oracle)
  75. # self.conn.commit()
  76. # param_oracle.clear()
  77. if len(param_oracle)!=0:
  78. print(1)
  79. cursor.executemany(None,param_oracle)
  80. self.conn.commit()
  81. cursor.close()
  82. if __name__=='__main__':
  83. Oracle=OracleManager()
  84. cli=MongoClient(Constant.MONGODB_URL)
  85. db=cli.spider
  86. # property_list=db.property_0614.find()
  87. # property_list_temp=list()
  88. # for index,row in enumerate(property_list):
  89. # property_list_temp.append(row)
  90. # print(index)
  91. # Oracle.craw(None, None, None,None)
  92. # component_list=db.component_temp_0614.find()
  93. # component_list_temp=list()
  94. # for index,row in enumerate(component_list):
  95. # component_list_temp.append(row)
  96. # print(index)
  97. # Oracle.craw(component_list_temp, None, None)
  98. simple_check=db.propertyvalue_0614.find({},{'_id':True})
  99. rows_count=simple_check.count()
  100. for i in range(1,rows_count):
  101. if i%50000==0:
  102. propertyvalue_list=db.propertyvalue_0614.find().skip(i-50000).limit(50000)
  103. propertyvalue_list_temp=list()
  104. for index,row in enumerate(propertyvalue_list):
  105. propertyvalue_list_temp.append(row)
  106. Oracle.craw(None,propertyvalue_list_temp,i-50000)
  107. if rows_count-i<=50000:
  108. propertyvalue_list=db.propertyvalue_0614.find().skip(i).limit(rows_count-i)
  109. propertyvalue_list_temp=list()
  110. for index,row in enumerate(propertyvalue_list):
  111. propertyvalue_list_temp.append(row)
  112. print(index)
  113. Oracle.craw(None,propertyvalue_list_temp,i)
  114. break
  115. print('remaining missions are ',rows_count-i)
  116. print(i)