from pymongo.mongo_client import MongoClient
from util_common import Constant

'''
找无效的图片链接
'''
cli = MongoClient(Constant.MONGODB_URL)
db = cli.spider
fsdb = cli.pictures
files = fsdb.fs.files.find({}, {"filename":1, "_id":0})
filenames = set()
for file in files:
    filenames.add(file["filename"])
print('all files', len(filenames))
cmps = db.component_original.find({"imgTask":Constant.DONE}, {"img_url_uu":1}).limit(100)
err_cmps = set()
for cmp in cmps:
    cmp_file = str(cmp["img_url_uu"])[29:]
    if cmp_file not in filenames:
        print(cmp_file)
        err_cmps.add(cmp["_id"])
print(len(err_cmps))
# cmps = db.component_original.find({"imgTask" : Constant.TODO, "img_url_mouser": {"$exists": True}}, {"img_url_mouser":1})
# all_urls = set()
# for cmp in cmps:
#     all_urls.add(cmp["img_url_mouser"])        
# print(len(all_urls))