|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import re
- import urllib.request
- from bs4 import BeautifulSoup
- import os
- import socket
- import pymysql
- class GetProxy:
-
- def __init__(self):
- self.IPlist=[]
- self.url="http://www.xicidaili.com/nn"
- self.proxy_from_xici()
-
- def open_page(self):
- req=urllib.request.Request(self.url,None,headers)
- response=urllib.request.urlopen(req)
- html=response.read()
- return html
-
- def proxy_from_xici(self):
- html=BeautifulSoup(self.open_page(),"html.parser")
- ips=html.findAll("tr")
- for each in range(1,len(ips)):
- try:
- tds=ips[each].findAll("td")
- self.IPlist.append([tds[1].text,tds[2].text,tds[5].text])
- except:
- continue
- class TestProxy:
- def __init__(self):
- self.test()
- def test(self):
- if ip_type=="HTTP":
- proxy_host="http://"+ip+":"+port
- proxy_temp={"http":proxy_host}
- else:
- proxy_host="https://"+ip+":"+port
- proxy_temp={"https":proxy_host}
- proxy_support=urllib.request.ProxyHandler(proxy_temp)
- opener=urllib.request.build_opener(proxy_support)
- urllib.request.install_opener(opener)
- try:
- req=urllib.request.Request("http://www.symmz.com/xiaoqingxin.html",None,headers)
- response=urllib.request.urlopen(req)
- except:
- print(proxy_host+"无法登陆")
- return
- else:
- print(proxy_host+"可以登陆")
- Proxy_list.append(proxy_temp)
-
-
- class save_to_mysql:
- def __init__(self):
- self.conn=pymysql.connect(host="127.0.0.1",user="root",passwd="123456",db="mysql")
- self.cur=self.conn.cursor()
- self.cur.execute("USE scraping")
- def save(self):
- self.cur.execute("INSERT INTO proxys_list (PROXY) VALUES (%s)",str((each)))
- self.cur.connection.commit()
- def close(self):
- self.cur.close()
- self.conn.close()
-
- if __name__ == "__main__":
- headers={}
- headers["User-Agent"]="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
- socket.setdefaulttimeout(3)
-
- Proxy_list=[]
- one=GetProxy()
- for ip,port,ip_type in one.IPlist:
- two=TestProxy()
- three=save_to_mysql()
- for each in Proxy_list:
- three.save()
- three.close()
- print("存入数据库完成")
复制代码
第一次发这种帖子,如果代码有什么可以完善的地方,可以告诉小弟,改进改进
|
评分
-
查看全部评分
|