Import Necessary Packages
from DL_utilities import*
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import fetch_openml
%matplotlib inline
Data Preparation
mnist = fetch_openml("mnist_784")
X, y = mnist["data"], mnist["target"]
print("Shape of X", X.shape)
print("Shape of Y", y.shape)
count=0
sample_size=20
plt.figure(figsize=(20, 20))
for i in range(sample_size):
plt.subplot(1, sample_size, i+1)
plt.axhline('')
plt.axvline('')
plt.text(x=10, y=-5, s=y[i], fontsize=20)
plt.imshow(X[i,:].reshape([28,28]), cmap='Greys')
plt.show()
# Scaling the X value
X = X / 255
# one-hot encode labels
digits = 10
examples = y.shape[0]
y = y.reshape(1, examples)
Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits, examples)
np.random.seed(30)
# split, reshape, shuffle
m = 60000
m_test = X.shape[0] - m
X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]
shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]
print("X_train shape", X_train.shape)
print("Y_train shape", Y_train.shape)
print("X_test shape", X_test.shape)
print("Y_test shape", Y_test.shape)
count=0
sample_size=20
plt.figure(figsize=(20, 20))
for i in range(sample_size):
count = count + 1
plt.subplot(4,5,i+1)
plt.axhline('')
plt.axvline('')
plt.text(x=0, y=30, s=Y_train[:,i], fontsize=15)
plt.imshow(X_train[:,i].reshape(28,28), cmap='Greys')
plt.show()
Nueral Network Model
def softmax(Z):
exps = np.exp(Z - Z.max()) # Numerically stable with large exponentials
A = exps / np.sum(exps, axis=0)
return A, Z
def softmax_gradient(dA, Z):
A, Z = softmax(Z)
dZ = dA * A * (1 - A) # softmax gradient, dA/dZ = AL(1-AL)
return dZ
def linear_activation_forward(A_prev, W, b, activation_fn):
if activation_fn == "sigmoid":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = sigmoid(Z)
elif activation_fn == "tanh":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = tanh(Z)
elif activation_fn == "relu":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = relu(Z)
elif activation_fn == "softmax":
Z, linear_cache = linear_forward(A_prev, W, b)
A, activation_cache = softmax(Z)
assert A.shape == (W.shape[0], A_prev.shape[1])
cache = (linear_cache, activation_cache)
return A, cache
def linear_activation_backward(dA, cache, activation_fn):
linear_cache, activation_cache = cache
if activation_fn == "sigmoid":
dZ = sigmoid_gradient(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation_fn == "tanh":
dZ = tanh_gradient(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation_fn == "relu":
dZ = relu_gradient(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation_fn == "softmax":
dZ = softmax_gradient(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db
def L_model_forward_new(X, parameters, hidden_layers_activation_fn="relu", multiclass=True):
A = X
caches = []
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
A, cache = linear_activation_forward(
A_prev, parameters["W" + str(l)], parameters["b" + str(l)],
activation_fn=hidden_layers_activation_fn)
caches.append(cache)
if multiclass:
output_layer="softmax"
else:
output_layer="sigmoid"
AL, cache = linear_activation_forward(
A, parameters["W" + str(L)], parameters["b" + str(L)],
activation_fn=output_layer)
caches.append(cache)
return AL, caches
def L_model_backward_new(AL, y, caches, hidden_layers_activation_fn="relu", multiclass=False):
y = y.reshape(AL.shape)
L = len(caches)
grads = {}
if multiclass:
output_layer="softmax"
else:
output_layer="sigmoid"
dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))
grads["dA" + str(L - 1)], grads["dW" + str(L)], grads[
"db" + str(L)] = linear_activation_backward(
dAL, caches[L - 1], output_layer)
for l in range(L - 1, 0, -1):
current_cache = caches[l - 1]
grads["dA" + str(l - 1)], grads["dW" + str(l)], grads[
"db" + str(l)] = linear_activation_backward(
grads["dA" + str(l)], current_cache,
hidden_layers_activation_fn)
return grads
# For multiclass classification
def CrossEntropy(AL, y):
cost_sum = np.sum(np.multiply(y, np.log(AL)))
m = y.shape[1]
cost = -(1./m) * cost_sum
return cost
# For multiclass classification
def evaluate_cost_acc(X, y, params, activation_fn):
AL, caches = L_model_forward_new(X, params, activation_fn)
cost = CrossEntropy(AL,y)
predictions = np.argmax(AL, axis=0)
labels = np.argmax(y, axis=0)
accuracy = metrics.accuracy_score(predictions, labels)
return cost, accuracy*100
Neural Network Training
np.random.seed(48)
# Neural Network Model and Initialize parameters
# 784 --> 20 --> 15 --> 10
layers_dims = [X_train.shape[0], 20, 15, 10]
params = initialize_parameters(layers_dims)
hn_activation = "sigmoid"
# Hyper-parameters
learning_rate = 0.5
number_of_epoch=100
# intialize cost lists
train_costs, train_accs, test_costs, test_accs = [],[],[],[]
for epoch in range(number_of_epoch):
AL, caches = L_model_forward_new(X_train, params, hn_activation, multiclass=True)
grads = L_model_backward_new(AL, Y_train, caches, hn_activation, multiclass=True)
params = update_parameters(params, grads, learning_rate)
train_cost, train_acc = evaluate_cost_acc(X_train, Y_train, params, hn_activation)
test_cost, test_acc = evaluate_cost_acc(X_test, Y_test, params, hn_activation)
train_costs.append(train_cost)
test_costs.append(test_cost)
train_accs.append(train_acc)
test_accs.append(test_acc)
print("Epoch {}: \ntraining cost = {:.3f}, training accuracy = {:.2f}, \
\ntest cost = {:.3f}, test accuracy = {:.2f}".format(
epoch+1, train_cost, train_acc, test_cost, test_acc))
print("Done.")
Contact Us ir send your requirement details at:
Comments