<>读取文件

import re import numpy from sklearn import linear_model from matplotlib import
pyplotas plt fn = open('data.txt','rb') all_data=fn.readlines() fn.close()
<>数据写入
# x=[] # y=[] # for single_data in all_data: # print(single_data) # tmp_data =
re.split('\t|\n', single_data) # print(tmp_data) # x.append(float(tmp_data[0]))
# print(tmp_data) # y.append(float(tmp_data[1])) # print(tmp_data) # x =
numpy.array(x).reshape([100, 1]) # y = numpy.array(y).reshape([100, 1]) import
28192.0 68980.0
0 39275.0 78875.0
1 34512.0 81400.0
2 24430.0 80624.0
3 23811.0 65562.0
4 34856.0 94603.0 x=df.iloc[:,0].values.reshape(-1,1) y=df.iloc[:,1].values.
reshape(-1,1)
<>数据分析展示
plt.scatter(x,y)

<>数据建模
#线性回归分析 model=linear_model.LinearRegression() model.fit(x,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
# 得到评估结果 model_coef=model.coef_ model_intercept=model.intercept_ r2=model.score(
x,y) #相关性系数 print(r2) #拟合的a print(model_coef) #拟合的b print(model_intercept)
0.7873031585790161 [[2.09300087]] [13260.03313486] # 销售预测 import numpy as np
new_x= 84610 new_x = np.array(new_x).reshape(1, -1) pre_y = model.predict(new_x)
print(pre_y) [[190348.83636364]] #拟合直线参数 x_train=x y_pre_train=model.predict(
x_train) # 数据分析展示 plt.scatter(x, y) #散点图 plt.plot(x_train,y_pre_train,color=
"red",linewidth=2) #拟合直线