| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # coding=utf-8
- '''
- Created on 2016年3月29日14:40:10
- 检查之前重复的listpage
- @author: ChenHao
- '''
- fin = open("F:/Users/XIONGCY/Desktop/KindUrlStart.csv")
- lines = fin.readlines()
- fin.close()
- '''
- for index, line in enumerate(lines):
- dirpath = "../spider_download/listPage/" + str(index+1)
- os.mkdir(dirpath)
- '''
- kindlist_name = list()
- kindlist_url = list()
- for line in lines:
- l = line.split(",")
- if l[2] == str(1):
- # 读取对应的html并加入
- nameCn = l[1]
- if nameCn in kindlist_name:
- print (nameCn)
- else:
- kindlist_name.append(nameCn)
-
- url = l[3]
- if url in kindlist_url:
- print (url)
- else:
- kindlist_url.append(url)
-
|