|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 ~风介~ 于 2016-7-27 23:29 编辑
写入excel数据前要保证:name、agre_num和comment_num的数目一致
欢迎大家指正
- # -*- coding: utf-8 -*-
- """
- Created on Sun Jul 24 12:36:25 2016
- @author: mozzielx
- """
- import re
- from urllib.error import URLError
- from urllib.request import Request,urlopen
- from openpyxl import Workbook
- from openpyxl.compat import range
- def get_content(url):
- req=Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36')
- temp=urlopen(req)
- html=temp.read().decode('utf-8')
- return html
- def get_target(tar):
- name=re.findall(r'<a href="/users/[^"]+/" target="_blank" title="(.+)">',tar)
- agre_num=re.findall(r'<span class="stats-vote"><i class="number">(.+)</i> 好笑</span>',tar)
- comment_num=re.findall(r'<a href="/article/[^"]+" data-share="/article/[^"]+" id="[^"]+" class="qiushi_comments" target="_blank">\n<i class="number">(.+)</i> 评论\n</a>',tar)
- # print(len(name),len(agre_num),len(comment_num))
- for row in range(2,len(name)+2):#指定行数
- for col in range(1,2):#指定列数
- _ = ws.cell(column=col, row=row, value="%s" % name[row-2])
- for row in range(2,len(agre_num)+2):
- for col in range(2,3):
- _ = ws.cell(column=col, row=row, value="%s" % agre_num[row-2])
- for row in range(2,len(comment_num)+2):
- for col in range(3,4):
- _ = ws.cell(column=col, row=row, value="%s" % comment_num[row-2])
- if __name__ == "__main__":
- url='http://www.qiushibaike.com/8hr/page/4/?s=4897779'
- wb=Workbook()#建立excel文件
- ws=wb.active
- ws['A1']='作者'
- ws['B1']='点赞数'
- ws['C1']='评论数'
- get_target(get_content(url))
- wb.save("糗事百科段子数据.xlsx")
复制代码
|
评分
-
查看全部评分
|