|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
之前做课设,训练网络需要大量的图片数据,于是自己写了个下载百度图片搜索结果的代码。没加多线程,有兴趣的可以在我的代码基础上修改成多线程的。
PS:百度图库的质量真的不咋地。。。
效果:
源代码:
- import os
- import sys
- import itertools
- import urllib
- from urllib.parse import quote
- import requests
- import re
- # 用于解码objURL
- str_table = {
- '_z2C$q': ':',
- '_z&e3B': '.',
- 'AzdH3F': '/'
- }
- char_table = {
- 'w': 'a',
- 'k': 'b',
- 'v': 'c',
- '1': 'd',
- 'j': 'e',
- 'u': 'f',
- '2': 'g',
- 'i': 'h',
- 't': 'i',
- '3': 'j',
- 'h': 'k',
- 's': 'l',
- '4': 'm',
- 'g': 'n',
- '5': 'o',
- 'r': 'p',
- 'q': 'q',
- '6': 'r',
- 'f': 's',
- 'p': 't',
- '7': 'u',
- 'e': 'v',
- 'o': 'w',
- '8': '1',
- 'd': '2',
- 'n': '3',
- '9': '4',
- 'c': '5',
- 'm': '6',
- '0': '7',
- 'b': '8',
- 'l': '9',
- 'a': '0'
- }
- # 转为ASCII码
- char_table = {ord(key): ord(value) for key, value in char_table.items()}
- # 获得所有图片下载链接
- def Build_Urls(keyword):
- keyword = quote(keyword)
- url = r"http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&st=-1&ic=0&word={word}&face=0&istype=2nc=1&pn={pn}&rn=60"
- urls = (url.format(word=keyword, pn=i) for i in itertools.count(start=0, step=60))
- return urls
- # 解码图片URL
- def decode_objURL(url):
- # 替换字符
- for key, value in str_table.items():
- url = url.replace(key, value)
- # 替换字符
- return url.translate(char_table)
- # 解析JSON获取图片URL
- def Get_ImgUrl(html):
- re_url = re.compile(r'"objURL":"(.*?)"')
- imgUrls = [decode_objURL(x) for x in re_url.findall(html)]
- return imgUrls
- # 下载图片到指定路径
- def downImage(imgUrl, dirpath, imgName):
- #print(imgName)
- #print(dirpath)
- filename = os.path.join(dirpath, imgName)
- try:
- res = requests.get(imgUrl, timeout=15)
- except:
- print("[异常:]", imgUrl)
- return False
- with open(filename, 'wb') as f:
- f.write(res.content)
- return True
- if __name__ == '__main__':
- print("*" * 55)
- print('代码功能:<自动下载百度图片搜索结果>')
- print('下载结果保存在代码所在目录下的pictures文件夹中')
- print('目前只支持单个关键词搜索,输入后按回车即可自动下载')
- print("*" * 55)
- keyword = input("请输入你要下载的图片关键词:\n")
- try:
- IMG_NUM = int(input('请输入你要下载的图片数量:\n'))
- except:
- IMG_NUM = 0
- # 创建pictures文件夹(若不存在)用于保存结果
- try:
- os.mkdir("pictures")
- dirpath = './pictures'
- except:
- dirpath = './pictures'
- # 获得所有图片下载链接
- urls = Build_Urls(keyword)
- index = 0
- for url in urls:
- if index > IMG_NUM-1:
- break
- print("[GET_URL]:", url)
- html = requests.get(url, timeout=10).content.decode('utf-8', 'replace')
- # 获取图片URL
- imgUrls = Get_ImgUrl(html)
- # 没有图片则结束
- if len(imgUrls) == 0:
- break
- for imgUrl in imgUrls:
- pic_name = "%s%s.jpg" % (str(keyword), str(index))
- if downImage(imgUrl, dirpath, pic_name):
- index += 1
- print("已下载%s张" % index)
- if index > IMG_NUM-1:
- break
-
复制代码 |
|