How to modify display loss function for the Vectorised Feedforward network?

I was practicing vectorising the baackprop for a basic NN and I tried modifying a code for binary classification which was originally written for multi class classification. Code:

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
from tqdm import tqdm_notebook 
import seaborn as sns
import imageio
import time
from IPython.display import HTML


from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs

data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
labels = np.mod(labels_orig, 2)
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
Y_train = Y_train.reshape(-1,1)
W1 = np.random.randn(2,2)
W2 = np.random.randn(2,1)
class FF_MultiClass_InputWeightVectorisedEx:

  def __init__(self, W1, W2):
    self.W1 = W1.copy() #(2,2)
    self.W2 = W2.copy() #(2,1)
    self.B1 = np.zeros((1,2))
    self.B2 = np.zeros((1,1))

  def sigmoid(self, X):
    return 1.0/(1.0 + np.exp(-X))

  def softmax(self, X):
    exps = np.exp(X)
    return exps / np.sum(exps, axis=1).reshape(-1,1)

  def forward_pass(self, X):
    self.A1 = np.matmul(X,self.W1) + self.B1 # (N, 2) * (2, 2) -> (N, 2)
    self.H1 = self.sigmoid(self.A1) # (N, 2)
    self.A2 = np.matmul(self.H1, self.W2) + self.B2 # (N, 2) * (2, 1) -> (N, 1)
    self.H2 = self.softmax(self.A2) # (N, 1)
    return self.H2

  def grad_sigmoid(self, X):
    return X*(1-X) 

  def grad(self, X, Y):
    self.forward_pass(X)
    m = X.shape[0]

    self.dA2 = self.H2 - Y # (N, 1) - (N, 1) -> (N, 1)
    self.dW2 = np.matmul(self.H1.T, self.dA2) # (2, N) * (N, 1) -> (2, 1)
    self.dB2 = np.sum(self.dA2, axis=0).reshape(1, -1) # (N, 1) -> (1, 1)
    self.dH1 = np.matmul(self.dA2, self.W2.T) # (N, 1) * (1, 2) -> (N, 2)
    self.dA1 = np.multiply(self.dH1, self.grad_sigmoid(self.H1)) # (N, 2) .* (N, 2) -> (N, 2)

    self.dW1 = np.matmul(X.T, self.dA1) # (2, N) * (N, 2) -> (2, 2)
    self.dB1 = np.sum(self.dA1, axis=0).reshape(1, -1) # (N, 2) -> (1, 2)


  def fit(self, X, Y, epochs=1, learning_rate=1, display_loss=False):

    if display_loss:
      loss = {}

    for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
      self.grad(X, Y) # X -> (N, 2), Y -> (N, 4)

      m = X.shape[0]
      self.W2 -= learning_rate * (self.dW2/m)
      self.B2 -= learning_rate * (self.dB2/m)
      self.W1 -= learning_rate * (self.dW1/m)
      self.B1 -= learning_rate * (self.dB1/m)

      if display_loss:
        Y_pred = self.predict(X)
        loss[i] = log_loss(Y, Y_pred)


    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      plt.ylabel('Log Loss')
      plt.show()


  def predict(self, X):
    Y_pred = self.forward_pass(X)
    return np.array(Y_pred).squeeze()
models_init = [FF_MultiClass_InputWeightVectorisedEx(W1, W2)]
models = []
for idx, model in enumerate(models_init, start=1):
  tic = time.time()
  ffsn_multi_specific = FF_MultiClass_InputWeightVectorisedEx(W1, W2)
  ffsn_multi_specific.fit(X_train,Y_train,epochs=2000,learning_rate=.5,display_loss=True)
  models.append(ffsn_multi_specific)
  toc = time.time()
  print("Time taken by model {}: {}".format(idx, toc-tic))

The error I'm getting is for the display loss function in the above class which is

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

The original multi class classification display loss function was:

if display_loss:
        Y_pred = self.predict(X)
        loss[i] = log_loss(np.argmax(Y, axis=1), Y_pred)

My display loss fucntion(based on the multi class one) was(Don't mind the indentation):

if display_loss:
            Y_pred = self.predict(X)
            loss[i] = log_loss(Y, Y_pred)

As I am new to NN in general, can anyone tell what was the cause of this error and how can it be modififed? Edit: It may be concerning what the loop at the bottom does. Originally this code was to compare different algos of NN for time. However I was concerned with only 1 algo i.e written above. So I modified the loop doing as little change as possible.

Latest Images

Trending Articles

Latest Images