| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- #coding=utf-8
- '''
- Created on 2016年6月27日
- @author: uas
- '''
- import os
- import random
- import threading
- import uuid
- import urllib.request
- import gridfs
- from pymongo.mongo_client import MongoClient
- from util_common import Constant
- from os.path import getsize
- class ImgFile_missed(object):
-
- def __init__(self,userName=None,maxThread=100):
- self.cli=MongoClient(Constant.MONGODB_URL)
- self.db=self.cli.spider
- self.fs=gridfs.GridFS(self.cli.pictures)
- self.user=userName
- self.activeThread=0
- self.maxThread=maxThread
- self.successsed=0
- self.failured=0
- self.total=0
- self.isLast=False
-
- def _find_uu_mouser_url(self,url):
- result=self.db.component_original.find_one({'img_url_mouser':url,"img_url_uu":{'$exists':True}})
- if result is not None:
- return result['img_url_uu']
- return None
-
- #
- def _get_imgName_and_url_uu(self):
- uuid_str=str(uuid.uuid1())
- imgName=uuid_str.replace('-','')+'.jpg'
- img_url_uu=Constant.IMG_URL_HEADER+imgName
- return imgName,img_url_uu
-
- def _get_Imginfo_in_gridfs(self,filename):
- fsdb=self.cli.pictures
- rs=fsdb.fs.files.find({'filename':filename})
- if rs is not None:
- return True
- return False
-
-
- def run(self):
-
- rs=self.db.blank_img.find({'img_url_mouser':{'$ne':''}})
- for index,r in enumerate(rs):
- print(index,'-->',r['cmp_id'])
- if index>1000:
- break
- # if self._find_uu_mouser_url(r['img_url_mouser']):
- # self.db.blank_img.update({'_id':r['_id']},{'$set':{'img_url_uu':self._find_uu_mouser_url(r['img_url_mouser'])}})
- # else:
- try:
- filename=str(random.randint(1,100))+'.jpg'
-
- urllib.request.urlretrieve(r['img_url_mouser'], filename)
- while (getsize(filename)<100):
- urllib.request.urlretrieve(r['img_url_mouser'], filename)
- print(getsize(filename))
- imgName,img_url_uu=self._get_imgName_and_url_uu()
-
- with open(filename,'rb')as file:
- data=file.read()
- self.fs.put(data,content_type="image/jpeg",filename=imgName)
- while not (self._get_Imginfo_in_gridfs(filename)):
- self.fs.put(data,content_type="image/jpeg",filename=imgName)
- self.db.blank_img.update({'_id':r['_id']},{'$set':{'img_url_uu':img_url_uu}})
-
- # u=urllib.request.urlopen(r['img_url_mouser'])
- # data=u.read()
- # print('1',data)
- # with open(filename,'wb') as file:
- # print(file.write(data))
- except Exception as e:
- print(e)
- #
- # finally:
- # if(os.path.exists(filename)):
- # os.remove(filename)
-
-
- if __name__=='__main__':
- img_missed=ImgFile_missed()
- img_missed.run()
|