1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- import sys
- import getopt
- from file_spider import pdffile_to_fdfs
- default_args = {'userName':'someone', 'maxThread':10, 'tempDir': '/tmp/'}
- def Usage():
- print('usage:')
- print('-h,--help: print help message.')
- print('--user-name: downloader name, default', default_args['userName'])
- print('--max-thread: max threads, default', default_args['maxThread'])
- print('--temp-dir: file temp dir, default', default_args['tempDir'])
-
- def parse_args(argv):
- _args = default_args
- try:
- opts, args = getopt.getopt(argv[1:], 'h:', ['user-name=', 'max-thread=', 'temp-dir='])
- except getopt.GetoptError as err:
- Usage()
- sys.exit(2)
- for o, v in opts:
- if o in ('-h', '--help'):
- Usage()
- sys.exit(1)
- elif o in ('--user-name',):
- _args['userName'] = v
- elif o in ('--max-thread',):
- _args['maxThread'] = v
- elif o in ('--temp-dir',):
- _args['tempDir'] = v
- else:
- print('unhandled option')
- sys.exit(3)
- return _args
- if __name__ == '__main__':
- args = parse_args(sys.argv)
- task = pdffile_to_fdfs.FileMain(userName=args['userName'], maxThread=args['maxThread'], tempDir=args['tempDir'])
- while task.hasNext():
- task.craw()
- succeed, failured, active, total = task.statistic()
- print("成功 %s,失败 %s,正在爬取 %s" % (succeed, failured, active))
-
- task.close()
|