| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # coding=utf-8
- '''
- @author: ch
- '''
- from http import cookiejar
- import urllib.request
- class HtmlDownloader(object):
-
- # head: dict of header
- def makeMyOpener(self, head={
- 'Connection': 'Keep-Alive',
- 'Accept': 'text/html, application/xhtml+xml, */*',
- 'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
- 'Referer': 'http://www.mouser.cn/Electronic-Components/',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
- }, proxy=None):
- cj = cookiejar.CookieJar()
- handlers = [urllib.request.HTTPCookieProcessor(cj)]
- if proxy is not None:
- handlers.append(urllib.request.ProxyHandler({'http': 'http://%s/' % proxy}))
- opener = urllib.request.build_opener(*handlers)
- header = []
- for key, value in head.items():
- elem = (key, value)
- header.append(elem)
- opener.addheaders = header
- return opener
-
-
-
- def download(self, url, proxy):
- if url is None:
- return None
- oper = self.makeMyOpener(proxy=proxy)
- uop = oper.open(url, timeout=30)
- if uop.getcode() != 200:
- return None
-
- return uop.read().decode("utf8")
-
-
-
- def download_file(self, url, proxy=None):
- if url is None:
- return None
- oper = self.makeMyOpener(head={
- 'Connection': 'Keep-Alive',
- 'Accept': 'image/*',
- 'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
- 'Referer': 'http://www.mouser.cn/',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
- }, proxy=proxy)
- uop = oper.open(url, timeout=30)
- if uop.getcode() != 200:
- return None
-
- return uop.read()
-
-
|