test_init_kindlist.py 774 B

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # coding=utf-8
  2. '''
  3. Created on 2016年3月29日14:40:10
  4. 检查之前重复的listpage
  5. @author: ChenHao
  6. '''
  7. fin = open("F:/Users/XIONGCY/Desktop/KindUrlStart.csv")
  8. lines = fin.readlines()
  9. fin.close()
  10. '''
  11. for index, line in enumerate(lines):
  12. dirpath = "../spider_download/listPage/" + str(index+1)
  13. os.mkdir(dirpath)
  14. '''
  15. kindlist_name = list()
  16. kindlist_url = list()
  17. for line in lines:
  18. l = line.split(",")
  19. if l[2] == str(1):
  20. # 读取对应的html并加入
  21. nameCn = l[1]
  22. if nameCn in kindlist_name:
  23. print (nameCn)
  24. else:
  25. kindlist_name.append(nameCn)
  26. url = l[3]
  27. if url in kindlist_url:
  28. print (url)
  29. else:
  30. kindlist_url.append(url)