sklearn实践_普通线性回归

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

data = pd.read_csv(r"C:\Users\Oscar\Downloads\Advertising.csv")

x = data[["TV","Radio","Newspaper"]]

y=data["Sales"]

plt.plot(data["TV"],y,"ro",Label="TV")

plt.plot(data["Radio"],y,"g^",Label="Radio")

plt.plot(data["Newspaper"],y,"bo",Label="Newspaper")

plt.legend(loc="lower right")

plt.grid()

plt.show

plt.figure(figsize=(10,10))

plt.subplot(311)

plt.plot(data["TV"],y,"ro",Label="TV")

plt.title("TV")

plt.subplot(312)#plt.subplot(3,1,2)

plt.plot(data["Newspaper"],y,"g^",Label="Newspaper")

plt.title("Newspaper")

plt.subplot(313)

plt.plot(data["Radio"],y,"bo",Label="Radio")

plt.title("Radio")

#建模

feature_cols = ["TV","Radio","Newspaper"]

X = data[feature_cols]

y = data["Sales"]

from sklearn.cross_validation import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y)

from sklearn.linear_model import LinearRegression

model = LinearRegression()

model.fit(X_train,y_train)

print(model)

print(model.coef_)

print(model.intercept_)

y_rep = model.predict(X_test)

#评估

from sklearn import metrics

import numpy as np

sum_mean = 0

for i in range(len(y_rep)):

    sum_mean+=(y_rep[i]-y_test.values[i])**2

print("RMSE:",np.sqrt(sum_mean/len(y_rep)))

#作图

plt.figure()

plt.plot(range(len(y_rep)),y_rep,"b",Label="pre")

plt.plot(range(len(y_rep)),y_test,"r",Label="test")

plt.legend(loc="upper right")

plt.xlabel("the number of sales")

plt.ylabel("values of sales")
秒客网

sklearn实践_普通线性回归

相关文章