|
楼主 |
发表于 2017-11-15 21:58:33
|
显示全部楼层
尝试不用requests模块 老玩家回归urllib.request(然而效果与上面一样 差强人意):
import urllib.request
import random
from bs4 import BeautifulSoup
url = 'http://pubsonline.informs.org/loi/mnsc'
iplist = ['59.40.51.125:8010']
proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})
opener = urllib.request.build_opener(proxy_support)
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36'),
('Referer','http://pubsonline.informs.org/toc/mnsc/63/11'),
('Host','pubsonline.informs.org'),
('Accept','*/*')]
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8')
html= BeautifulSoup(html, "lxml") |
|