pdffile_delete.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # coding=utf-8
  2. '''
  3. 删除attachTask=4的pdf文件,状态为上传错误
  4. db.component_original.find({attachTask:2}).forEach(function(item){if(item['attachUrl']!=null&&item['attachUrl'].indexOf('.pdf')==-1){item['attachTask']=4;db.component_original.save(item);}})
  5. 删除无法读取的pdf,并修改状态为未上传
  6. {attachTask:2, attach_download_user:'deprecated'}
  7. '''
  8. from pymongo.mongo_client import MongoClient
  9. from util_common import Constant
  10. import requests
  11. cli = MongoClient(Constant.MONGODB_URL)
  12. db = cli.spider
  13. fs_api_delete = "http://10.10.100.200:9999/file/delete?path=%s"
  14. # deprecated
  15. attachs = db.component_original.find({"attachTask" : Constant.DONE, "attach_download_user": 'deprecated'}, {"_id": True, "attachUrl_uu": True})
  16. for attach in attachs:
  17. try:
  18. requests.get(fs_api_delete % attach['attachUrl_uu'])
  19. db.component_original.update_one({'_id': attach["_id"]}, {'$set': {'attachUrl_uu': None, 'attach_download_user': None, 'attachTask': Constant.TODO}})
  20. except Exception as e:
  21. print(attach['attachUrl_uu'], e)
  22. continue
  23. # error
  24. attachs = db.component_original.find({"attachTask" : Constant.ERROR}, {"_id": True, "attachUrl_uu": True})
  25. for attach in attachs:
  26. try:
  27. requests.get(fs_api_delete % attach['attachUrl_uu'])
  28. db.component_original.update_one({'_id': attach["_id"]}, {'$set': {'attachUrl_uu': None, 'attach_download_user': None}})
  29. except Exception as e:
  30. print(attach['attachUrl_uu'], e)
  31. continue
  32. cli.close()