Neural Network from Scratch using Numpy
We will replicate pytorch functionalities using only Numpy
- Multi Class classification
- Libraries used
- Loading dataset
- Train Test split
- Neural Network Layers
- Relu Activation
- Loss function
- Model Class
- Score function/metric
- Training Loop
- Evaluation on test set
Multi Class classification
Dataset
The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other.
Attribute Information:
- sepal length in cm
- sepal width in cm
- petal length in cm
- petal width in cm
- class:
1. Iris Setosa 2. Iris Versicolour 3. Iris Virginica
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from functools import reduce
import numpy as np
X,y = datasets.load_iris(return_X_y=True,as_frame=False)
X[:10],y[:10]
train_X,test_X, train_y,test_y = train_test_split(X,y,test_size=0.3,stratify=y)
Neural Network Layers
We define the layers which are required by our model. Each layer has a forward pass and backward pass.In forward pass we will compute the output of the network given the input data and in the backward pass we will compute the gradient for the layer with respect to loss.
Linear Layer
Forward Pass $$ L=X.W^{T}+b_{1} $$
Backward Pass
Gradient for W.r.t W: $$ \frac{\partial L}{\partial W}=\frac{\partial L}{\partial Y}^{T} . X $$ Gradient for W.r.t b: $$ \frac{\partial L}{\partial b_{1}}=\frac{\partial L}{\partial Y}^{T} $$ Gradient for W.r.t X: $$ \frac{\partial L}{\partial X}=\frac{\partial L}{\partial Y} . W $$
class Linear:
def __init__(self,in_features,out_features,bias=True):
self.weight = {"value":None,"grad":None}
self.bias = {"value":None,"grad":None}
self.inp = None
self.weight["value"] = np.random.uniform(-1,1,(out_features,in_features))
self.weight["grad"] = np.random.uniform(-1,1,(out_features,in_features))
if bias==True:
self.bias["value"] = np.random.uniform(-1,1,(out_features))
self.bias["grad"] = np.random.uniform(-1,1,(out_features))
else:
self.bias["value"] = np.zeros((out_features))
self.bias["grad"] = np.zeros((out_features))
def forward(self,inp):
"""
inp: [#_samples,in_fetures]
"""
self.inp = inp
return np.dot(self.inp,self.weight["value"].T)+self.bias["value"]
def backward(self,grad):
"""
grad : shape(#,out_feat) [same as the return of forward]
"""
self.weight["grad"] = np.dot(grad.T,self.inp ) # (out,in) = (out,#)(#,in)
self.bias["grad"] = grad.sum(0) # (out) = (out)
inp_grad = np.dot(grad, self.weight["value"]) # (#,in) = (#,out)(out,in)
return inp_grad
class Relu:
def __init__(self):
self.inp = None
def forward(self,inp):
self.inp = inp
return np.clip(self.inp,0.,np.inf)
def backward(self,grad):
"""
grad : [same as the inp]
"""
inp_grad = (self.inp>0)*grad
return inp_grad
class CrossEntropy:
def __init__(self):
pass;
@staticmethod
def Softmax(x):
exps = np.exp(x)
return exps/exps.sum(1,keepdims=True)
def forward(self,pred,target):
"""
pred: (#,num_target)
target: (#,1)
"""
m = target.shape[0]
# take softmax along col
self.pred = self.Softmax(pred)
self.target = target
log_likelihood = -np.log(self.pred[range(m),self.target])
loss = log_likelihood.sum() / m
return loss
def backward(self,grad=1):
"""
grad : [same as the inp]
"""
m = self.target.shape[0]
# take softmax along col
self.grad = self.pred
self.grad[range(m),self.target] -= grad;
self.grad = self.grad/m
return self.grad
class Model:
def __init__(self,model,loss_func,learning_rate):
self.model = model
self.loss = loss_func
self.lr = learning_rate
def fit(self,x,y):
# forward pass
pred_logits = reduce(lambda acc,curr: curr.forward(acc),self.model,x)
# calculate loss
loss = self.loss.forward(pred_logits,y)
g = self.loss.backward()
# backward pass
g = reduce(lambda acc,curr: curr.backward(acc),self.model[::-1],g)
# update weights
for l in self.model:
if isinstance(l,Linear):
l.weight["value"] -= self.lr*l.weight["grad"]
l.bias["value"] -= self.lr*l.bias["grad"]
return loss
def evaluate(self,x):
pred_logits = reduce(lambda acc,curr: curr.forward(acc),self.model,x)
return pred_logits
def accuracy(pred,target):
pred = CrossEntropy.Softmax(pred)
pred = np.argmax(pred,1)
return (pred==target).mean()
model = [
Linear(4,100),
Relu(),
Linear(100,3)]
loss_function = CrossEntropy() # Loss function
lr=1e-3 # Learning Rate
my_model = Model(model,loss_function,lr) # initialize model
epochs = 500 # epochs to train
for i in range(1,epochs+1):
loss = my_model.fit(train_X,train_y) # train
pred_logits = my_model.evaluate(train_X) # get predictions
# print Loss and accuracy every 50 epochs
if i%50==0:
print(f"Loss at epoch {i} :{loss}, accuracy: {accuracy(pred_logits,train_y)}")
pred = my_model.evaluate(test_X)
print(f"Accuracy for the test set: {accuracy(pred,test_y)}")