First use Python Completed data operation cases —— Sales forecast

Data overview ： The first column is promotion expenses ; The second column is the sales volume of goods .

import re import numpy from sklearn import linear_model from matplotlib import
pyplotas plt fn = open('data.txt','rb') all_data=fn.readlines() fn.close()
<> Data write
# x=[] # y=[] # for single_data in all_data: # print(single_data) # tmp_data =
re.split('\t|\n', single_data) # print(tmp_data) # x.append(float(tmp_data[0]))
# print(tmp_data) # y.append(float(tmp_data[1])) # print(tmp_data) # x =
numpy.array(x).reshape([100, 1]) # y = numpy.array(y).reshape([100, 1]) import
28192.0 68980.0
0 39275.0 78875.0
1 34512.0 81400.0
2 24430.0 80624.0
3 23811.0 65562.0
4 34856.0 94603.0 x=df.iloc[:,0].values.reshape(-1,1) y=df.iloc[:,1].values.
reshape(-1,1)
<> Data analysis display
plt.scatter(x,y)

<> Data modeling
# Linear regression analysis model=linear_model.LinearRegression() model.fit(x,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
# Get the evaluation results model_coef=model.coef_ model_intercept=model.intercept_ r2=model.score(
x,y) # Correlation coefficient print(r2) # Fitted a print(model_coef) # Fitted b print(model_intercept)
0.7873031585790161 [[2.09300087]] [13260.03313486] # Sales forecast import numpy as np
new_x= 84610 new_x = np.array(new_x).reshape(1, -1) pre_y = model.predict(new_x)
print(pre_y) [[190348.83636364]] # Fitting line parameters x_train=x y_pre_train=model.predict(
x_train) # Data analysis display plt.scatter(x, y) # Scatter diagram plt.plot(x_train,y_pre_train,color=
"red",linewidth=2) # Fit line

Technology
Daily Recommendation