|
10鱼币
- import urllib.request as q
- import os,re
- import urllib.error
- def readurl(url):
- head={}
- head['User-Agent']='Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0)'
- req=q.Request(url,headers=head)
- try:
- res=q.urlopen(url)
- except urllib.error.HTTPError as cwu:
- return cwu.code
- except urllib.error.URLError as ucwu:
- return ucwu.code
- html=res.read()
- return html
- def xiazai():
- for j in range(1,445):
- if j<10:
- motenum='00'+str(j)
- elif 10<=j<100:
- motenum='0'+str(j)
- else:
- motenum=str(j)
-
- url=r'http://www.ugirls.com/Content/List/Magazine-'+ motenum+'.html'#模特总共从001到444
-
- html=readurl(url)
- if type(html)==type(int()):
-
- if 400<=html<=599:
- print('网页错误无法下载%d'%html)
- continue
- else:
- html=html.decode('utf-8')
- #获取模特名字
- uname=re.findall(r'<meta name="keywords" content="[\u4e00-\u9fa5]{2,5}、ugirls ([\u4e00-\u9fa5]{2,3})" />',html)
- if len(uname)==0:
- uname='大集合'
- else:
- uname=str(uname[0])
- print('第%d模特是%s'%(j,uname))
-
- dqian=os.getcwd()
- try:
- os.mkdir('ugirl')
- except OSError:
-
- os.chdir('ugirl')
- else:
- os.chdir('ugirl')
- try:
- os.mkdir(uname)
- except OSError:
- os.chdir(uname)
- else:
- os.chdir(uname)
- str1=len('05c9b11a01aebf2a2af18a030ffea553')
- str2=len('_magazine_web_m.jpg')
- imglist=re.findall(r'http://img.ugirls.tv/uploads/magazine/content/[^"]+_magazine_web_m\.jpg',html)
- for each in imglist:
- with open(each.split('/')[-1][:str1]+'.jpg','wb') as f:
- f.write(readurl(each[:-str2]+'.jpg'))
- print(each[:-str2]+'.jpg',end='下载完成\n')
- print('******************************************************************')
- os.chdir(dqian)
-
- xiazai()
复制代码
寂寞的兄弟有福利了
|
-
|