|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Pear1994 于 2018-3-18 18:57 编辑
有点烂,,,凑合着看吧
- import requests
- import os
- from lxml import etree
- import re
- headers = {
- 'Referer': 'http://www.mmjpg.com/mm/',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'
- }
- path = os.getcwd() + '\img'
- def get_index():
- r = requests.get('http://www.mmjpg.com', headers=headers)
- r.encoding = 'utf-8'
- html = r.text
- selector = etree.HTML(html)
- content = selector.xpath('//li/a')[0]
- num = content.xpath('@href')[0]
- num = num.split('/')
- num = int(num[-1])
- return range(1, num + 1)
- def get_page(url):
- r = requests.get(url, headers=headers)
- r.encoding = 'utf-8'
- html = r.text
- # print(html)
- selector = etree.HTML(html)
- page = selector.xpath('//div[@id="page"]/a/text()')[-2]
- return(page)
- def get_img(url):
- r = requests.get(url, headers=headers)
- r.encoding = 'utf-8'
- html = r.text
- selector = etree.HTML(html)
- try:
- content = selector.xpath('//div[@id="content"]/a/img')[0]
- img_url = content.xpath('@src')[0]
- title = content.xpath('@alt')[0]
- sav_img(title, img_url)
- except Exception as e:
- print('Erro!!!')
- pass
- def sav_img(name, url):
- name = name + '.jpg'
- if name in os.listdir(path):
- print('重复文件')
- else:
- r = requests.get(url, headers=headers)
- with open(name, 'wb') as f:
- f.write(r.content)
- print(name)
- def main(index):
- main_url = 'http://www.mmjpg.com/mm/%s' % index
- page = get_page(main_url)
- if os.path.exists(path) != True:
- os.mkdir(path)
- else:
- os.chdir(path)
- # print(path)
- for i in range(1, int(page) + 1):
- url = '%s/%s' % (main_url, i)
- try:
- get_img(url)
- except Exception as e:
- raise e
- if __name__ == '__main__':
- indexs = get_index()
- for index in reversed(indexs):
- main(index)
复制代码 |
-
|