|
楼主 |
发表于 2016-11-27 21:59:17
|
显示全部楼层
本帖最后由 jerryxjr1220 于 2016-11-28 20:07 编辑
提示:会用到的库有:numpy , sklearn,matplotlib
其实只是利用了线性回归拟合了近1个月的数据,由于样本量比较少,多半预测的数据是不准确的。
这里只是提供一个思路,对于大量样本的话,预测是有一定可信度的。
代码:
- # -*- coding: utf-8 -*-
- """
- Created on Sun Nov 27 22:01:32 2016
- @author: Administrator
- """
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import linear_model as ln
- viewer = [['11/14-11/20','蒙面唱将','江苏卫视',2.010,6.826],\
- ['11/14-11/20','今夜百乐门','东方卫视',1.377,3.934],\
- ['11/14-11/20','喜剧总动员','浙江卫视',1.987,5.728],\
- ['11/14-11/20','快乐大本营','湖南卫视',1.299,3.735],\
- ['11/07-11/13','蒙面唱将','江苏卫视',1.840,6.294],\
- ['11/07-11/13','今夜百乐门','东方卫视',1.336,4.012],\
- ['11/07-11/13','喜剧总动员','浙江卫视',2.049,6.013],\
- ['11/07-11/13','快乐大本营','湖南卫视',1.606,4.710],\
- ['10/31-11/06','蒙面唱将','江苏卫视',1.388,5.334],\
- ['10/31-11/06','今夜百乐门','东方卫视',1.246,3.759],\
- ['10/31-11/06','喜剧总动员','浙江卫视',1.972,5.905],\
- ['10/31-11/06','快乐大本营','湖南卫视',1.489,4.426],\
- ['10/24-10/30','蒙面唱将','江苏卫视',1.830,6.271],\
- ['10/24-10/30','今夜百乐门','东方卫视',0.868,2.573],\
- ['10/24-10/30','喜剧总动员','浙江卫视',1.860,5.504],\
- ['10/24-10/30','快乐大本营','湖南卫视',1.464,4.315]]
- for i in range(len(viewer)):
- viewer[i].append(viewer[i][-1]*viewer[i][-2])
-
- plt.scatter([5,4,3,2],[e[-1] for e in viewer if e[1] == '蒙面唱将'],color='blue')
- plt.scatter([5,4,3,2],[e[-1] for e in viewer if e[1] == '今夜百乐门'],color='red')
- plt.scatter([5,4,3,2],[e[-1] for e in viewer if e[1] == '喜剧总动员'],color='green')
- plt.scatter([5,4,3,2],[e[-1] for e in viewer if e[1] == '快乐大本营'],color='black')
- X = np.array([5,4,3,2]).reshape(-1,1)
- px = np.array([6,5,4,3,2]).reshape(-1,1)
- regm = ln.LinearRegression()
- regm.fit(X,[e[-1] for e in viewer if e[1] == '蒙面唱将'])
- plt.plot(px,regm.predict(px),color='blue',label="mask")
- print ('预测《蒙面唱将》将在11/21-11/28的收视率达到%.3f percent' % (regm.predict(6)/sum([e[-2] for e in viewer if e[1] == '蒙面唱将'])*4))
- regj = ln.LinearRegression()
- regj.fit(X,[e[-1] for e in viewer if e[1] == '今夜百乐门'])
- plt.plot(px,regj.predict(px),color='red',label="tonight")
- print ('预测《今夜百乐门》将在11/21-11/28的收视率达到%.3f percent' % (regm.predict(6)/sum([e[-2] for e in viewer if e[1] == '今夜百乐门'])*4))
- regx = ln.LinearRegression()
- regx.fit(X,[e[-1] for e in viewer if e[1] == '喜剧总动员'])
- plt.plot(px,regx.predict(px),color='green',label="comedy")
- print ('预测《喜剧总动员》将在11/21-11/28的收视率达到%.3f percent' % (regm.predict(6)/sum([e[-2] for e in viewer if e[1] == '喜剧总动员'])*4))
- regk = ln.LinearRegression()
- regk.fit(X,[e[-1] for e in viewer if e[1] == '快乐大本营'])
- plt.plot(px,regk.predict(px),color='black',label="happy")
- print ('预测《快乐大本营》将在11/21-11/28的收视率达到%.3f percent' % (regm.predict(6)/sum([e[-2] for e in viewer if e[1] == '快乐大本营'])*4))
- plt.legend(loc='upper left')
- plt.xticks([0, 2, 3, 4, 5, 6],
- ['', '10/24\n-10/30', '10/31\n-11/06', '11/07\n-11/13', '11/14\n-11/20', '11/21\n-11/28'])
- plt.show()
复制代码 |
|