|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
楼主想爬京东金融众筹的源代码。写的有些粗糙(代码附上)
#设置超时和休眠时间
socket.setdefaulttimeout(20)
time.sleep(5)
def url_open(url):
response = urllib.request.urlopen(url)
req = urllib.request.Request(url)
req.add_header("……………………………………")#user—agent我删了
#代理设置
proxies = ['http://123.119.202.254:8118','http://113.67.164.198:8118','http://175.8.227.242:8118']
proxy = random.choice(proxies)
proxy_support = urllib.request.ProxyHandler({'http':proxy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
html = response .read()
return html
response.close()
#翻页以及保存源码
#85337.html"
def run():
p = 0
while p<20000:
url = "https://z.jd.com/project/details/"
page_num = 85337
page_num += p
page_url = url + str(page_num) + ".html"
print(' =='+str(page_num)+'==start==')
result = url_open(page_url)
txt =str(p) +'.html'
f = open(txt,'wb+')
f.write(result)
print('=='+str(p)+'====end==')
p+=1
f.close()
|
|