|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import os
- import urllib.request
- def get_html(url):
- req = urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0')
- response = urllib.request.urlopen(req)
- html = response.read()
- #print(html)
- return html
-
-
- def get_img_page_list(url):
- html=get_html(url).decode('gbk')
- img_page_list=[]
- st="<a target='_blank' href="
- a=html.find(st)
- while a != -1:
- b=html.find('.html',a,a+255)
- if b != -1:
- img_page_list.append(html[a+25:b+5])
- else:
- b=a+25
- a=html.find(st,b)
- print('找到%d组网页'%len(img_page_list))
- return img_page_list
-
- def get_img_list(img_addrs):
- #print('?')
- html=get_html(img_addrs).decode('gbk')
- A = html.find('picture')
- B = html.find(r'</div>',A)
- #print(A,B) 目标范围上下限
- img_list=[]
- a=html.find('src=',A,B)
- while a != -1:
- b = html.find('.jpg',a,B)
- if b != -1:
- img_list.append(html[a+5:b+4])
- else:
- b=a+5
- a=html.find('src=',b,B)
- #print(img_list)
- return img_list
-
-
- def save_img(img_list):
- for each in img_list:
- st=each.split('/')
- name=st[-4]+'-'+st[-3]+'-'+st[-2]+'-'+st[-1]
- html=get_html(each)
- #print(name)
- with open(name,'wb') as f:
- f.write(html)
- print('save:',name)
- def download_mm(dirname='XXOO',wantpages=1):
- if os.path.exists(dirname)==False:
- os.mkdir(dirname)
- os.chdir(dirname)
- url='http://www.meizitu.com/a/more_1.html'
- img_page_list = get_img_page_list(url)
- print('将下载%d页'%(wantpages))
- img_list=[]
- for i in range(wantpages):
-
- img_list = get_img_list(img_page_list[i])
- save_img(img_list)#下载图片
-
-
- if __name__ == '__main__' :
- a=int(input('下载页数:'))
- download_mm(wantpages=a)
复制代码 |
|