In this notebook, we are implementing ADALINE "by hand" without using PyTorch's autograd capabilities.
Import Necessary Packages
import pandas as pd
import matplotlib.pyplot as plt
import torch
%matplotlib inline
Load & Prepare a Toy Dataset
df = pd.read_csv('./datasets/iris.data', index_col=None, header=None)
df.columns = ['x1', 'x2', 'x3', 'x4', 'y']
df = df.iloc[50:150]
df['y'] = df['y'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1)
df.tail()
output:
# Assign features and target
X = torch.tensor(df[['x2', 'x3']].values, dtype=torch.float)
y = torch.tensor(df['y'].values, dtype=torch.int)
# Shuffling & train/test split
torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
X, y = X[shuffle_idx], y[shuffle_idx]
percent70 = int(shuffle_idx.size(0)*0.7)
X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]
# Normalize (mean zero, unit variance)
mu, sigma = X_train.mean(dim=0), X_train.std(dim=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], label='class 0')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], label='class 1')
plt.legend()
plt.show()
output:
plt.scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1], label='class 0')
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], label='class 1')
plt.legend()
plt.show()
output:
Implement ADALINE Model
class Adaline1():
def __init__(self, num_features):
self.num_features = num_features
self.weights = torch.zeros(num_features, 1,
dtype=torch.float)
self.bias = torch.zeros(1, dtype=torch.float)
def forward(self, x):
netinputs = torch.add(torch.mm(x, self.weights), self.bias)
activations = netinputs
return activations.view(-1)
def backward(self, x, yhat, y):
grad_loss_yhat = 2*(yhat - y)
grad_yhat_weights = x
grad_yhat_bias = 1.
# Chain rule: inner times outer
grad_loss_weights = torch.mm(grad_yhat_weights.t(),
grad_loss_yhat.view(-1, 1)) / y.size(0)
grad_loss_bias = torch.sum(grad_yhat_bias*grad_loss_yhat) / y.size(0)
# return negative gradient
return (-1)*grad_loss_weights, (-1)*grad_loss_bias
Define Training and Evaluation Functions
####################################################
##### Training and evaluation wrappers
###################################################
def loss(yhat, y):
return torch.mean((yhat - y)**2)
def train(model, x, y, num_epochs,
learning_rate=0.01, seed=123, minibatch_size=10):
cost = []
torch.manual_seed(seed)
for e in range(num_epochs):
#### Shuffle epoch
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
minibatches = torch.split(shuffle_idx, minibatch_size)
for minibatch_idx in minibatches:
#### Compute outputs ####
yhat = model.forward(x[minibatch_idx])
#### Compute gradients ####
negative_grad_w, negative_grad_b = \
model.backward(x[minibatch_idx], yhat, y[minibatch_idx])
#### Update weights ####
model.weights += learning_rate * negative_grad_w
model.bias += learning_rate * negative_grad_b
#### Logging ####
minibatch_loss = loss(yhat, y[minibatch_idx])
print(' Minibatch MSE: %.3f' % minibatch_loss)
#### Logging ####
yhat = model.forward(x)
curr_loss = loss(yhat, y)
print('Epoch: %03d' % (e+1), end="")
print(' | MSE: %.5f' % curr_loss)
cost.append(curr_loss)
return cost
Train Model
model = Adaline1(num_features=X_train.size(1))
cost = train(model,
X_train, y_train.float(),
num_epochs=20,
learning_rate=0.1,
seed=123,
minibatch_size=10)
Output:
Minibatch MSE: 0.500 Minibatch MSE: 0.341 Minibatch MSE: 0.220 Minibatch MSE: 0.245 Minibatch MSE: 0.157 Minibatch MSE: 0.133 Minibatch MSE: 0.144 Epoch: 001 | MSE: 0.12142 Minibatch MSE: 0.107 Minibatch MSE: 0.147 Minibatch MSE: 0.064 Minibatch MSE: 0.079 Minibatch MSE: 0.185 Minibatch MSE: 0.063 Minibatch MSE: 0.135 Epoch: 002 | MSE: 0.09932 Minibatch MSE: 0.093 Minibatch MSE: 0.064 ... ...
Evaluate ADALINE Model
Plot Loss (MSE)
plt.plot(range(len(cost)), cost)
plt.ylabel('Mean Squared Error')
plt.xlabel('Epoch')
plt.show()
output:
Compare with analytical solution
print('Weights', model.weights)
print('Bias', model.bias)
Output:
Weights tensor([[-0.0763],
[ 0.4181]])
Bias tensor([0.4888])
def analytical_solution(x, y):
Xb = torch.cat( (torch.ones((x.size(0), 1)), x), dim=1)
w = torch.zeros(x.size(1))
z = torch.inverse(torch.matmul(Xb.t(), Xb))
params = torch.matmul(z, torch.matmul(Xb.t(), y))
b, w = torch.tensor([params[0]]), params[1:].view(x.size(1), 1)
return w, b
w, b = analytical_solution(X_train, y_train.float())
print('Analytical weights', w)
print('Analytical bias', b)
Output:
Analytical weights tensor([[-0.0703],
[ 0.4219]]) Analytical bias tensor([0.4857])
Evaluate on Evaluation Metric (Prediction Accuracy)
ones = torch.ones(y_train.size())
zeros = torch.zeros(y_train.size())
train_pred = model.forward(X_train)
train_acc = torch.mean(
(torch.where(train_pred > 0.5,
ones,
zeros).int() == y_train).float())
ones = torch.ones(y_test.size())
zeros = torch.zeros(y_test.size())
test_pred = model.forward(X_test)
test_acc = torch.mean(
(torch.where(test_pred > 0.5,
ones,
zeros).int() == y_test).float())
print('Training Accuracy: %.2f' % (train_acc*100))
print('Test Accuracy: %.2f' % (test_acc*100))
Output:
Training Accuracy: 90.00
Test Accuracy: 96.67
Decision Boundary
##########################
### 2D Decision Boundary
##########################
w, b = model.weights, model.bias - 0.5
x_min = -3
y_min = ( (-(w[0] * x_min) - b[0])
/ w[1] )
x_max = 3
y_max = ( (-(w[0] * x_max) - b[0])
/ w[1] )
fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3))
ax[0].plot([x_min, x_max], [y_min, y_max])
ax[1].plot([x_min, x_max], [y_min, y_max])
ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o')
ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s')
ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o')
ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s')
ax[1].legend(loc='upper left')
plt.show()
output:
Comments