|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import urllib.request
- import re
- import os
- import time
- def get(url):
- res = urllib.request.Request(url)
- res.add_header('User-Agent',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
- response = urllib.request.urlopen(res)
- html = response.read()
- return html
- # 获得每一张图片的url 的后面内容 然后交给 get_url 拼接
- def open_url(url):
- img_adds = []
- html = get(url).decode('gbk')
- img_add = re.findall(r'href="(/gao[^"]*\.htm)"',html)
- for add in img_add:
- if len(add) > 25:
- # print(add) # ------------------------------------打印 后面的配对
- img_adds.append(add)
- return img_adds
- # 获取 每一张图片新的浏览地址 然后 交给get_new_url
- def get_url(img_adds):
- new_urls = []
- url = 'https://www.4493.com'
- for img_add in img_adds:
- new_urlll = url + img_add
- print('新的图片浏览地址--',new_urlll)
- new_urls.append(new_urlll)
- # time.sleep(0.5) # --------------------- 延时
- return new_urls
- # 每一位 美女对应的所有 的浏览地址 -----------------------所有对应的浏览
- def more_url(new_urls):
- new_url = []
- for url in new_urls:
- html = get(url)
- response = html.decode('gbk')
- last_pages = re.findall(r'<a href="(\d+?\.htm)"',response)
- if len(last_pages):
- last_pages.remove('1.htm') # 移除第一个重复的元素
- print(last_pages)
- for page in last_pages:
- ur = re.sub(r'(\d+?\.htm)', page, url) # 替换浏览地址的最后面的部分
- new_url.append(ur)
- time.sleep(0.1)
- print('每一位美女的浏览地址———',new_url)
- return new_url
- # 获得 图片的下载 地址 然后交给doen_img 下载 并保存
- def get_new_url(new_url):
- img_src = []
- for url in new_url:
- html = get(url).decode('gbk')
- img_srccc = re.search(r'<p><img src="([^"]+\.jpg)"',html)
- if img_srccc:
- img_src.append(img_srccc.group(1))
- print('图片列表-',img_srccc) # ------------------------------------------------------------图片的下载地址
- else:
- continue
- time.sleep(0.2) # -----------------------------延时
- return img_src
- def down_img(la,img_src):
- for add in img_src:
- print('最后的下载地址',add) # -------------------------------------------------------------图片最后的下载地址
- name = add.split('/')[-1]
- print(name)
- with open(name,'wb') as f:
- img = get(add)
- f.write(img)
- time.sleep(0.1) # ------------------------------延时
- def main(la='xiaojiayu'):
- os.mkdir(la)
- os.chdir(la)
- url = 'https://www.4493.com/gaoqingmeinv/index-'
- num = 0
- for i in range(1):
- num += i
- start_url = url + str(num) + '.htm' # 起始链接
- img_adds = open_url(start_url) # 获得拼接内容
- new_urls = get_url(img_adds) # 获得新的连接
- new_url = more_url(new_urls) # 获得 每一个 美女的所有 所有连接
- img_src = get_new_url(new_url) # 新的 下载地址
- down_img(la,img_src)
- if __name__ == '__main__':
- main()
复制代码 |
-
评分
-
查看全部评分
|