dbTest.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月4日
  4. @author: ChenHao
  5. '''
  6. from pymongo.mongo_client import MongoClient
  7. from util_common import Constant, html_downloader
  8. from math import ceil
  9. from util_common.html_downloader import HtmlDownloader
  10. import urllib
  11. import pymongo
  12. cli = MongoClient(Constant.MONGODB_URL)
  13. # cli = MongoClient("mongodb://localhost:27017/")
  14. # cli = MongoClient("mongodb://113.105.74.140:27017/")
  15. db = cli.spider
  16. '''
  17. 查看propertyvalue
  18. '''
  19. # rs = db.propertyvalue.find({"propertyid" : 49})
  20. # for r in rs:
  21. # print r
  22. #
  23. # user = [{"name":"chenhao", "age":12}, {"name":"xiaohaohao", "age":14}]
  24. #
  25. # results = db.restaurant.insert_many(user)
  26. #
  27. # results.inserted_ids()
  28. #
  29. # for i in range(100):
  30. # cursor = db.restaurant.find_one({"name":"chenhao"})
  31. # print cursor
  32. # db.user.insert({"name": "test", "starttime": 0})
  33. # rs = db.user.find({"name": "chen"})
  34. # print rs
  35. #
  36. # rs_one = db.user.find_one({"name": "chenh"})
  37. # print rs_one
  38. # rs_user = db.user.find()
  39. # for r in rs_user:
  40. # print r
  41. # rs = db.kindlist_todo.find_one({"url": "http://www.mouser.cn/Semiconductors/RF-Semiconductors/RF-Integrated-Circuits/_/N-az8go/"})
  42. # db.kindlist_todo.delete_one({"url": "http://www.mouser.cn/Semiconductors/Discrete-Semiconductors/Diodes-Rectifiers/_/N-ax1ma/?No=1900"})
  43. # rs = db.kindlist_todo.find({"status": Constant.DISTINCT})
  44. '''
  45. 检查component_original去重效果
  46. '''
  47. # rs = db.component_original.find({})
  48. # print (rs.count())
  49. #
  50. # rs = db.component_original.find({"status": Constant.DONE})
  51. # print (rs.count())
  52. #
  53. # rs = db.component_original.find({"status": Constant.DISTINCT})
  54. # print (rs.count())
  55. # rs = db.brand_temp.find({})
  56. # brand_set = set()
  57. # print (rs.count())
  58. # for r in rs:
  59. # brand_set.add(r["nameCn"])
  60. # print (len(brand_set))
  61. # rs = db.component_original.find({"status": Constant.DONE,"imgTask": Constant.TODO})
  62. # print ("图片下载未完成", rs.count())
  63. #
  64. # rs = db.component_original.find({"status": Constant.DONE,"imgTask": Constant.DONE})
  65. # print ("图片下载已完成", rs.count())
  66. #
  67. # rs = db.component_original.find({"imgTask": Constant.DONE})
  68. # print ("图片下载已完成", rs.count())
  69. #
  70. # rs = db.component_original.find({"status": Constant.DONE, "imgTask": None})
  71. # print ("图片下载失败" ,rs.count())
  72. # db.user.create_index([("starttime", pymongo.ASCENDING)])
  73. # print (db.user.find_one())
  74. # rs = db.component_original.find({"imgTask": Constant.DONE}, {"img_url_uu": True}).limit(10000)
  75. # ss = set()
  76. # for r in rs:
  77. # ss.add(r["img_url_uu"])
  78. # print (len(ss))
  79. # temp_list = list()
  80. # rs = db.propertyvalue_temp.find({})
  81. # for index, r in enumerate(rs):
  82. # if index < 10000:
  83. # temp_list.append(r)
  84. # else:
  85. # break
  86. #
  87. # print (temp_list)
  88. # print(rs["str_html"])
  89. # # for i in rs:
  90. # # print i
  91. # for ind, i in enumerate(rs):
  92. # if (cou - ind) > 10:
  93. # # if ind < 30:
  94. # i["status"] = Constant.DONE
  95. # db.kindlist_todo.save(i)
  96. # print ind
  97. # componentid_list = list(i for i in range(1, 10000000))
  98. # print (len(componentid_list))
  99. # rs = db.propertyvalue.find({"componentid": {"$in": componentid_list}}, {"_id": False}, no_cursor_timeout=True)
  100. # for r in rs:
  101. # pass
  102. # url_set = set()
  103. # rs = db.component_temp.find({"kindid_mouser": 568})
  104. # rs = db.component_temp.find({"kindid_uu": 214})
  105. # print (rs.count())
  106. # for r in rs:
  107. # print (r)
  108. # print (rs.count())
  109. rs = db.kindlist_todo.find({})
  110. print (rs.count())
  111. rs = db.kindlist_todo.find({"status": Constant.TODO})
  112. print (rs.count())
  113. rs = db.kindlist_todo.find({"status": Constant.DONE})
  114. print (rs.count())
  115. '''
  116. # 检查速度
  117. # 0.0139999389648
  118. # 0.297000169754
  119. t1 = time.time()
  120. for i in range(1000):
  121. db.kindlist_todo.find()
  122. t2 = time.time()
  123. print t2 - t1
  124. for i in range(1000):
  125. db.kindlist_todo.find_one()
  126. t3 = time.time()
  127. print t3 - t2
  128. '''
  129. cli.close()