#coding=utf-8 ''' Created on 2016年6月27日 @author: uas ''' import os import random import threading import uuid import urllib.request import gridfs from pymongo.mongo_client import MongoClient from util_common import Constant from os.path import getsize class ImgFile_missed(object): def __init__(self,userName=None,maxThread=100): self.cli=MongoClient(Constant.MONGODB_URL) self.db=self.cli.spider self.fs=gridfs.GridFS(self.cli.pictures) self.user=userName self.activeThread=0 self.maxThread=maxThread self.successsed=0 self.failured=0 self.total=0 self.isLast=False def _find_uu_mouser_url(self,url): result=self.db.component_original.find_one({'img_url_mouser':url,"img_url_uu":{'$exists':True}}) if result is not None: return result['img_url_uu'] return None # def _get_imgName_and_url_uu(self): uuid_str=str(uuid.uuid1()) imgName=uuid_str.replace('-','')+'.jpg' img_url_uu=Constant.IMG_URL_HEADER+imgName return imgName,img_url_uu def _get_Imginfo_in_gridfs(self,filename): fsdb=self.cli.pictures rs=fsdb.fs.files.find({'filename':filename}) if rs is not None: return True return False def run(self): rs=self.db.blank_img.find({'img_url_mouser':{'$ne':''}}) for index,r in enumerate(rs): print(index,'-->',r['cmp_id']) if index>1000: break # if self._find_uu_mouser_url(r['img_url_mouser']): # self.db.blank_img.update({'_id':r['_id']},{'$set':{'img_url_uu':self._find_uu_mouser_url(r['img_url_mouser'])}}) # else: try: filename=str(random.randint(1,100))+'.jpg' urllib.request.urlretrieve(r['img_url_mouser'], filename) while (getsize(filename)<100): urllib.request.urlretrieve(r['img_url_mouser'], filename) print(getsize(filename)) imgName,img_url_uu=self._get_imgName_and_url_uu() with open(filename,'rb')as file: data=file.read() self.fs.put(data,content_type="image/jpeg",filename=imgName) while not (self._get_Imginfo_in_gridfs(filename)): self.fs.put(data,content_type="image/jpeg",filename=imgName) self.db.blank_img.update({'_id':r['_id']},{'$set':{'img_url_uu':img_url_uu}}) # u=urllib.request.urlopen(r['img_url_mouser']) # data=u.read() # print('1',data) # with open(filename,'wb') as file: # print(file.write(data)) except Exception as e: print(e) # # finally: # if(os.path.exists(filename)): # os.remove(filename) if __name__=='__main__': img_missed=ImgFile_missed() img_missed.run()