In this blog we will learn how to predict model using Linear Regression using Ridge Regression in machine learning by using "US Housing" data.
#import libraries import numpy as np import pandas as pd from sklearn.model_selection import ShuffleSplit from sklearn.linear_model import LinearRegression, Ridge from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score
Assign Index Name
column_names = ["CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS","RAD","TAX","PTRATIO","B","LSTAT","MEDV"]
Read US housing data
datafile = "housing.data" dataFrame = pd.read_csv(datafile,header=None, delim_whitespace = True, names = column_names)
prices = dataFrame['MEDV'] features = dataFrame.drop('MEDV', axis = 1)
#mean of MEDV column
mean = dataFrame['MEDV'].mean() mean
median = np.median(dataFrame['MEDV']) median
prices.head()
Split Main and target(price) data
# TODO: Shuffle and split the data into training and testing subsets X_train, X_test, y_train, y_test = train_test_split(features, prices, test_size=0.2, random_state=10)
Implement a linear regression model with ridge regression that predicts median house prices from the other variables
# initialize from sklearn.linear_model import Ridge from sklearn import metrics
## training the model ridgeReg = Ridge(alpha=0.05, normalize=True) ridgeReg.fit(X_train,y_train) pred_X = ridgeReg.predict(X_test)
pred_X
Output:
array([29.85464657, 31.58991004, 30.39384671, 23.8339747 , 19.20002968, 16.52364348, 35.47082139, 14.99344304, 24.85825037, 36.27645 , 21.24937724, 30.72977103, 27.03687178, 32.94870149, 33.52586804, 39.60795326, 24.54263511, 22.51146391, 25.18108709, 22.42702071, 32.17258486, 17.76590695, 25.28566841, 25.02621386, 32.75486912, 20.49126046, 19.74149725, 17.13948061, 38.19820331, 0.89647704, 32.28355326, 31.34462247, 26.36649416, 23.9634044 , 20.02479017, 19.73378455, 4.37382689, 33.82345754, 26.56580249, 27.30744146, 33.71708924, 28.99342938, 18.04451886, 31.16760619, 18.04608315, 28.36386332, 19.33119703, 21.30694095, 37.07740862, 16.79227521, 24.25671423, 19.10989473, 23.9144611 , 34.10410138, 26.49793341, 33.7790573 , 21.07749309, 19.72107809, 18.40396221, 24.54608723, 20.21132159, 23.5480848 , 39.77432893, 41.63314535, 29.86399045, 17.1161445 , 23.95275281, 3.5447882 , 30.47208809, 29.69934889, 18.41409878, 27.22029943, 19.36106254, 25.1757635 , 25.01064422, 10.25740163, 38.08187479, 8.21777277, 18.55666167, 30.60934051, 22.96269918, 22.42861937, 20.51809974, 28.14932011, 30.54180549, 27.8891282 , 26.33386253, 31.63993603, 22.56578847, -4.46560683, 21.97766633, 19.89380594, 24.96153737, 23.72440162, 19.1832791 , 19.19404134, 27.09906737, 22.63972762, 26.10936452, 23.21721482, 23.98122563, 19.77329451])
#print the score
ridgeReg.score(X_test,y_test)
#predicting X_train1 predict_train = ridgeReg.predict(X_train1)
#find the r2-score
from sklearn.metrics import r2_score def performance_metric(y_train1, predict_train): score = r2_score(y_train1, predict_train) # Return the score return score performance_metric(y_train1, predict_train)
Thanks for learning realcode4you blog if you like then comment in below section so we can help easily or you are face any other issue or need any assignment related help then you can directly send your quote so we can help you as soon as we can.
You can send quote at given main directly:
"realcode4you@gmail.com"
or
Submit your requirement details at here:
Comments