|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
楼主写了个爬虫,想把爬出来的图片的进行分类(即总文件下有多个不同的子文件夹),但是我不知道怎样用代码把爬出来的图片进行分类。以及放总文件夹内- import re
- import os
- import os.path
- import urllib.request
- def open_ye(url): #打开
- try:
- req = urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.108 Safari/537.36 2345Explorer/8.6.1.15524')
-
- html_a = urllib.request.urlopen(req)
- except:
- print('打开网页失败。。。。。。。。。')
- return html_a
-
-
- def other_out(url): #页面
- html_b=open_ye(url).read().decode('utf-8')
- #print(html)
- return html_b
- #other_out(url)
- def other_ast(): #主函数
- raw = int(input('输入页数:'))
- for page in range(raw):
- url = 'http://www.mmjpg.com/home/{0}'
- url= url.format(page)
- print(url)
- reg = re.compile(r'<a href="http://www.mmjpg.com/mm/(\d{4})" target="_blank">') #数字
- item =re.findall(reg,other_out(url))
- item= list(set(item)) #集合
-
- page = 1
- for each in item:
- while page<10:
- url_a = 'http://www.mmjpg.com/mm/{0}/{1}'.format(each,page)
- page+=1
-
- print(url_a)
- req = urllib.request.Request(url_a)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.108 Safari/537.36 2345Explorer/8.6.1.15524')
- html_c = urllib.request.urlopen(req).read()
-
- html_c = html_c.decode('utf-8')
- #print(html_c)
- reg = re.compile(r'<img src="(.*?\.jpg)" alt="(.*?)" /></a></div>')
- item = re.findall(reg,html_c)
- print(item)
- k = item[0][-1] #名字
- print(k)
- z = item[0][0] #图片
- print(z)
- path = 'D:\PYPY,,\妹子图\%s.jpg' % (k)
- urllib.request.urlretrieve(z, path) #图片爬出来了
-
- #if k[:4]==k[:4]: #名字前4个相等
- #if not os.path.exists(k[:10]): #如果没有这个名字文件夹
-
- #os.mkdir(k[:10]) #则创一个
- #怎么把图片放进传的文件夹里去?
- #with open(os.mkdir(k[:10]),'w')as f:
- #f.write(path)
-
- #rllib.request.urlretrieve(z,
-
-
-
-
- if __name__=='__main__':
- other_ast()
-
-
-
-
-
复制代码 |
|