pdffile_client_linux.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import sys
  2. import getopt
  3. from file_spider import pdffile_to_fdfs
  4. default_args = {'userName':'someone', 'maxThread':10, 'tempDir': '/tmp/'}
  5. def Usage():
  6. print('usage:')
  7. print('-h,--help: print help message.')
  8. print('--user-name: downloader name, default', default_args['userName'])
  9. print('--max-thread: max threads, default', default_args['maxThread'])
  10. print('--temp-dir: file temp dir, default', default_args['tempDir'])
  11. def parse_args(argv):
  12. _args = default_args
  13. try:
  14. opts, args = getopt.getopt(argv[1:], 'h:', ['user-name=', 'max-thread=', 'temp-dir='])
  15. except getopt.GetoptError as err:
  16. Usage()
  17. sys.exit(2)
  18. for o, v in opts:
  19. if o in ('-h', '--help'):
  20. Usage()
  21. sys.exit(1)
  22. elif o in ('--user-name',):
  23. _args['userName'] = v
  24. elif o in ('--max-thread',):
  25. _args['maxThread'] = v
  26. elif o in ('--temp-dir',):
  27. _args['tempDir'] = v
  28. else:
  29. print('unhandled option')
  30. sys.exit(3)
  31. return _args
  32. if __name__ == '__main__':
  33. args = parse_args(sys.argv)
  34. task = pdffile_to_fdfs.FileMain(userName=args['userName'], maxThread=args['maxThread'], tempDir=args['tempDir'])
  35. while task.hasNext():
  36. task.craw()
  37. succeed, failured, active, total = task.statistic()
  38. print("成功 %s,失败 %s,正在爬取 %s" % (succeed, failured, active))
  39. task.close()