Introduction to Linear Regression
Code Linear Regression from Scratch
In the homework, you have learned how to implement linear regression using scikit-learn. Here, we will check how to implement linear regression using Numpy and PyTorch.
# Load the data and split it into train and test
from sklearn.model_selection import train_test_split
import pandas as pd
data = pd.read_csv('../datasets/Boston.csv')
X = data.drop('medv', axis=1)
y = data['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Call the sklearn linear regression model
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)
# Evaluate the model
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_pred))
When print the mean squared error, you will get 21.538929180643528. The idea for us is to implement the gradient descent algorithm to find the best parameters for the linear regression model.
from sklearn.preprocessing import StandardScaler
import numpy as np
class linear_regression:
def __init__(self, N):
# N = number of features
self.intercept_ = 0
self.coef_ = np.zeros((N,), dtype=float)
self.scaler = StandardScaler()
def forward(self, X):
y = np.dot(X, self.coef_) + self.intercept_
return y
def fit(self, X, y, lr = 0.001, epochs = 20000):
self.scaler.fit(X)
X = self.scaler.transform(X)
N, M = X.shape
error = 100
for epoch in range(epochs):
y_pred = self.forward(X)
error = y_pred - y
grad_coff = np.dot(X.T, error) / M # the gradient of the cost function with respect to the coefficients
grad_intercept = np.sum(error) / M # the gradient of the cost function with respect to the intercept
self.coef_ -= lr * grad_coff
self.intercept_ -= lr * grad_intercept
return
def predict(self, X):
X = self.scaler.transform(X)
return self.forward(X)
my_lin_reg = linear_regression(X_train.shape[1], X_train.shape[0])
my_lin_reg.fit(X_train, y_train)
y_pred = my_lin_reg.predict(X_test)
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_pred))
To increase the performance of the model, we can adjust the learning rate lr
and the number of epochs epochs
.
my_lin_reg.fit(X_train, y_train, lr = 0.01, epochs = 100)
Now, let’s move to the PyTorch implementation. First we need to install PyTorch by pip3 install torch torchvision torchaudio
.
import torch
class linear_regression_pytorch:
def __init__(self, X_shape, lr=0.01, epochs=10000):
self.lr = lr
self.epochs = epochs
# you can use nn.Linear instead of defining the coefficients and intercept
# Here, we want to break down the process to understand the gradient descent algorithm
self.coef_ = torch.zeros(X_shape, 1, requires_grad=True)
self.intercept_ = torch.zeros(1, requires_grad=True)
def forward(self, X):
y_hat = X @ self.coef_ + self.intercept_
return y_hat
def fit(self, X, y):
for epoch in range(self.epochs):
y_hat = self.forward(X)
loss = ((y_hat - y)**2).mean()
loss.backward()
error = (y_hat - y)
with torch.no_grad():
self.coef_ -= self.lr * self.coef_.grad
self.intercept_ -= self.lr * self.intercept_.grad
self.coef_.grad.zero_() # Zero the gradients after updating
self.intercept_.grad.zero_()
return self
my_lin_reg = my_linear_regression(X_train.shape[1])
my_lin_reg.fit(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train.values, dtype=torch.float32))
y_pred = my_lin_reg.forward(torch.tensor(X_test, dtype=torch.float32)).detach().numpy()
print(mean_squared_error(y_test, y_pred))
By running the code, you will get a mean squared error of 71.09… Why the error is high?