|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
程序的功能是爬妹子图,可以手动设置想爬页数,系统内要有d盘哦。
搞了一天终于搞出来了 ~
#auther: Lian2014051414
#date : 2018.4.16
import requests
from bs4 import BeautifulSoup
import os
def gethtml(url,code = 'utf-8'):
try:
head = {"user-agent":"Mozilla/5.0"}
r = requests.get(url,headers = head)
r.raise_for_status()
r.encoding = code
print('获取页面完成,请等待...')
return r.text
except:
print('获取页面失败!')
def getphotolist(lst,html):
head = 'src='
tail = '.jpg'
a = html.find(head)
while a != -1:
b = html.find(tail,a,a+255)
if b != -1:
lst.append(html[a+5:b+4])
else:
b = a + 5
a = html.find(head,b)
def savephoto(lst,fpath):
if not os.path.exists(fpath):
os.mkdir(fpath)
for i in lst:
name = i.split('/')[-1]
path = fpath + name
r = requests.get(i,headers = {"user-agent":"Mozilla/5.0"})
with open(path,'wb') as f:
f.write(r.content)
f.close()
def main():
num1 = int(input('请输入你想从第几页开始~:'))
num2 = int(input('请输入你想到第几页结束~:'))
print('爬取开始~[手动滑稽]')
os.mkdir('d://photo')
for i in range(num1,num2):
root = 'd://photo//'+ str(i) + '//'
#for i in range(5545,5548):
url = 'http://www.meizitu.com/a/'+ str(i) +'.html'
lst = []
html = gethtml(url)
getphotolist(lst,html)
savephoto(lst,root)
print('第%d页爬取完成!' % i)
if __name__ == '__main__':
main()
print('全部爬取完成!')
input()
|
|