123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- '''
- remove last page
- '''
- import os
- import random
- import urllib
- from pymongo.mongo_client import MongoClient
- import requests
- from PyPDF2.pdf import PdfFileReader, PdfFileWriter
- from util_common import Constant
- cli = MongoClient(Constant.MONGODB_URL)
- db = cli.spider
- attachs = db.component_original.find({"attachTask" : Constant.DONE, "attach_download_user": {'$ne': 'admin'}}, {"_id": True, "attachUrl_uu": True})
- fs_api_delete = "http://10.10.100.200:9999/file/delete?path=%s"
- for attach in attachs:
- old_filename = str(random.random()) + ".pdf"
- new_filename = str(random.random()) + ".pdf"
-
- try:
- try:
- urllib.request.urlretrieve(attach['attachUrl_uu'].replace('dfs.ubtoc.com/','10.10.100.200:9999/file/download?path='), old_filename)
- except:
-
- db.component_original.update_one({'_id': attach["_id"]}, {'$set': {'attachTask': Constant.TODO, 'attachUrl_uu': None, 'attach_download_user': None}})
- continue
- input_stream = open(old_filename, 'rb')
- pdf_input = PdfFileReader(input_stream)
- pdf_output = PdfFileWriter()
- page = 0
- pages = pdf_input.getNumPages() - 1
-
- while page < pages:
- pdf_output.addPage(pdf_input.getPage(page))
- page += 1
- output_stream = open(new_filename, 'wb')
- pdf_output.write(output_stream)
- output_stream.close()
-
- with open(new_filename, 'rb') as file:
- requests.get(fs_api_delete % attach['attachUrl_uu'])
- res = requests.post(Constant.FS_API_UPLOAD, files={'file': file})
- res_j = res.json()
- db.component_original.update_one({'_id': attach["_id"]}, {'$set': {'attachUrl_uu': res_j['path'], 'attach_download_user': 'admin'}})
- input_stream.close()
- except Exception as e:
- print(attach['attachUrl_uu'], e)
- continue
- finally:
- if os.path.exists(old_filename):
- os.remove(old_filename)
- if os.path.exists(new_filename):
- os.remove(new_filename)
- cli.close()
|