| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # coding=utf-8
- '''
- Created on 2016年3月30日
- 因为有2个属性brand商标、brandName商标名,统计一下出现的规则
- @author: ChenHao
- '''
- '''
- 结果:
- 2696 2541 0 155 0
- 2696条有效测试数据中
- 每个器件都有brand属性
- 极少的器件带有brandName属性
- '''
- from util_common import Constant
- from pymongo.mongo_client import MongoClient
- # cli = MongoClient(Constant.MONGODB_URL)
- cli = MongoClient("mongodb://localhost:27017/")
- db = cli.spider
- count_ava = 0
- count_hasBrand = 0;
- count_hasBrandName = 0;
- count_hasBoth = 0;
- count_hasNone = 0;
- for i in range(1, 2697):
- rs = db.propertyvalue.find({"componentid": i})
- if rs.count() > 0:
- count_ava += 1
- hasBrand = False
- hasBrandName = False
- for r in rs:
- if r["propertyid"] == 40:
- hasBrand = True
- if r["propertyid"] == 25:
- hasBrandName = True
- if hasBrand:
- if hasBrandName:
- count_hasBoth += 1
- else:
- count_hasBrand += 1
- else:
- if hasBrandName:
- count_hasBrandName += 1
- else:
- count_hasNone += 1
-
- print (count_ava, count_hasBrand, count_hasBrandName, count_hasBoth, count_hasNone)
- cli.close()
|