Module Source.frame

Expand source code
import numpy as np
from Source.Loss import *
from Source.activations import *
from Source.evaluation import *
from Source.optmizers import *
from Source.data import *




class MultiLayer:
    def __init__(self, number_of_neurons=0, cost_func=cross_entropy):
        """ init of the class multilayer and needed variables
        variables:
            w,b lists for weights
            parameters dic for weights in the form of parameters['W1']
            layers_size for size of each layer
            number_of_input_neurons
            act_func list for activations of each layer
            derivative_act_func list for backward activations derivative functions
            cost_func the choosen cost functions

        parmeters:
            (method) : the cost function of model

        returns:
            (None)

        """
        self.w, self.b = [], []
        self.parameters = {}
        self.layer_size = []

        self.number_of_input_neurons = number_of_neurons
        self.number_of_outputs = 0

        self.act_func = []
        self.derivative_act_func = []

        self.cost_func = cost_func
        self.cost_func_der = determine_der_cost_func(self.cost_func)

        self.cache = {}
        self.prev = []

    def addLayerInput(self, size):
        """ add the input layer of the model

        parmeters:
            size (int) : size of input layer

        retruns:
            (None)

        """
        self.number_of_input_neurons = size
        self.layer_size.append(size)

    def addHidenLayer(self, size, act_func=sigmoid):
        """ add a hidden layer of the model

        parmeters:
            size (int) : size of input layer
            act_func (function) : the activation function of the layer
        
        retruns:
            (None)
        """
        self.layer_size.append(size)
        self.act_func.append(act_func)
        self.derivative_act_func.append(determine_der_act_func(act_func))

    def addOutputLayer(self, size, act_func=sigmoid):
        """ add the output layer of the model

        parmeters:
            size (int) : size of input layer
            act_func (function) : the activation function of the layer
        
        retruns:
            (None)

        """
        self.number_of_outputs = size
        self.layer_size.append(size)
        self.act_func.append(act_func)
        self.derivative_act_func.append(determine_der_act_func(act_func))

    def initialize_parameters(self, seed=2): #,init_func=random_init_zero_bias):
        """ initialize_weights of the model at the start with xavier init

        parmeters:
            seed (int) : seed for random function

        retruns:
            paramters

        """

        # todo very important check later

        np.random.seed(seed)  # we set up a seed so that your output matches ours although the initialization is random.

        L = len(self.layer_size)  # number of layers in the network

        for l in range(1, L):
            self.w.append(np.random.randn(self.layer_size[l], self.layer_size[l - 1]) * np.sqrt
            (2 / self.layer_size[l - 1]))  # *0.01
            self.b.append(np.zeros((self.layer_size[l], 1)))
            # seed += 1
            # np.random.seed(seed)

        for i in range(len(self.layer_size) - 1):
            self.parameters["W" + str(i + 1)] = self.w[i]
            self.parameters["b" + str(i + 1)] = self.b[i]

        return self.parameters

    def forward_propagation(self, X, drop=0):
        """ forward propagation through the layers

        parmeters:
            X (np.array) : input feature vector
            drop (float) : propablity to keep neurons or shut down
       
        retruns:
            cashe (dic) : the output of each layer in the form of cashe['Z1']
            Alast (np.array) : last layer activations


        """

        self.prev = []
        self.prev.append((1, X))
        for i in range(len(self.layer_size) - 1):
            Zi = np.dot(self.w[i], self.prev[i][1]) + self.b[i]
            Ai = self.act_func[i](Zi)
            if drop > 0 and i != len(self.layer_size) - 2:
                D = np.random.rand(Ai.shape[0], Ai.shape[1])
                D = D < drop
                Ai = Ai * D
                Ai = Ai / drop

            self.prev.append((Zi, Ai))

        A_last = self.prev[-1][1]

        for i in range(len(self.layer_size) - 1):
            self.cache["Z" + str(i + 1)] = self.prev[i + 1][0]
            self.cache["A" + str(i + 1)] = self.prev[i + 1][1]

        # todo sould i compute cost in here

        return A_last, self.cache

    def set_cost(self, cost_func):
        """ cahnge the initial cost function

        parmeters:
            cost_funct (function) : the new function
        
        retruns:
            cashe (dic) : the output of each layer in the form of cashe['Z1']
            Alast (np.array) : last layer activations

        """
        self.cost_func = cost_func
        self.cost_func_der = determine_der_cost_func(cost_func)

    def compute_cost(self, Alast, Y):
        """ compute cost of the given examples

        parmeters:
            Alast (np.array) : model predictions
            Y (np.array) : True labels
        
        retruns:
            cost (float) : cost output

        """
        m = Alast.shape[1]
        return self.cost_func(m, Alast, Y)

    def backward_propagation(self, X, Y):
        """ compute cost of the given examples

        parmeters:
            Alast (np.array) : model predictions
            Y (np.array) : True labels
        
        retruns:
            grads (dic) : all gridients of wieghts and biasses

        """

        m = X.shape[1]

        # todo all depends on the type of function in cost and actviation function
        grad_list1_w = []
        grad_list1_b = []

        Alast = self.prev[-1][1]
        final_act = self.derivative_act_func[-1]
        dzi = self.cost_func_der(m, Alast, Y) * final_act(Alast)

        if self.cost_func == cross_entropy:
            if self.act_func[-1] == sigmoid:
                pass

        for i in range(len(self.w), 0, -1):
            A = self.prev[i - 1][1]
            dwi = (1 / m) * np.dot(dzi, self.prev[i - 1][1].T)
            dbi = (1 / m) * np.sum(dzi, axis=1, keepdims=True)
            if i != 1:
                der_func = self.derivative_act_func[i - 2]
                A = self.prev[i - 1][1]
                dzi = np.multiply(np.dot((self.w[i - 1]).T, dzi), der_func(A))

            grad_list1_w.append(dwi)
            grad_list1_b.append(dbi)

        # reverse grad list
        grad_list_w = []
        grad_list_b = []

        for i in range(len(grad_list1_w) - 1, -1, -1):
            grad_list_w.append(grad_list1_w[i])
            grad_list_b.append(grad_list1_b[i])

        grads = {}

        for i in range(len(grad_list_w)):
            grads['dW' + str(i + 1)] = grad_list_w[i]
            grads['db' + str(i + 1)] = grad_list_b[i]

        return grads

    def set_cashe(self, cache, X):
        """ set an external cache

        parmeters:
            X (np.array) : input feature vector
            cache (dic) :  output of each layer
        
        retruns:
            (None)

        """
        self.cache = cache
        self.prev = []
        self.prev.append((1, X))
        for i in range(int(len(cache.keys()) / 2)):
            A, Z = cache["A" + str(i + 1)], cache["Z" + str(i + 1)]
            self.prev.append((Z, A))

    def set_parameters(self, para):
        """ set an external parmeters

        parmeters:
            para (dic) :  the weights and biasses
        
        retruns:
            (None)

        """
        self.parameters = para
        self.w = []
        self.b = []
        for i in range(int(len(para.keys()) / 2)):
            W, b = para["W" + str(i + 1)], para["b" + str(i + 1)]
            self.w.append(W)
            self.b.append(b)

    def set_parameters_internal(self):
        """ set an internal parmeters this is used by model during training

        parmeters:
            (None)
        
        retruns:
            (None)

        """
        self.parameters = {}
        for i in range(len(self.w)):
            self.parameters["W" + str(i + 1)] = self.w[i]
            self.parameters["b" + str(i + 1)] = self.b[i]

    def update_parameters(self, grads, learning_rate=1.2 , reg_term=0, m = 1):
        """ update parameters using grads

        parmeters:
            grads (dic) :  the gradient of weights and biases
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):
            self.w[i] = (1-reg_term/m) * self.w[i] - learning_rate * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - learning_rate * grads["db" + str(i + 1)]

        self.set_parameters_internal()

        return self.parameters

    def update_parameters_adagrad(self, grads,adagrads, learning_rate=1.2, reg_term=0, m = 1):
        """ update parameters using adagrad

        parameters:
            grads (dic) :  the gradient of weights and biases
            adagrads(dic): the square of the gradiant
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(adagrads["dW" + str(i + 1)]) + 0.000000001)) * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(adagrads["db"+str(i+1)]) + 0.000000001)) * grads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def upadte_patameters_RMS(self, grads,rmsgrads, learning_rate=1.2 , reg_term=0, m = 1,eps=None):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases
            rmsgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
            eps(float) : the small value added to rmsgrads to make sure there is no division by zero
       
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(rmsgrads["dW" + str(i + 1)]) + eps)) * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(rmsgrads["db"+str(i+1)]) + eps)) * grads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def upadte_patameters_adadelta(self, grads,delta, learning_rate=1.2, reg_term=0, m = 1):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases, note: this parameter is not used in this function
            delta(dic): dictionary contains the values that should be subtracted from current parameters to be updated
            learning_rate (float) : the learn rate hyper parameter , note: this parameter is not used in this function
            reg_term (float) : the learn rate hyper parameter
       
        returns:
            dictionary contains the updated parameters

        """


        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - delta["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - delta["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def update_parameters_adam(self, grads,adamgrads,Fgrads, learning_rate=1.2, reg_term=0, m = 1,eps=None):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases , note: grads is not used in this function
            adamgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
            Fgrads(dic): taking rhof multiplied by the  previous grads and (1-rhof) multiplied by the  current grads
            learning_rate (float) : the learn rate hyper parameter (alpha_t not alpha)
            reg_term (float) : the learn rate hyper parameter
            eps(float) : the small value added to adamgrads to make sure there is no division by zero
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate/np.sqrt(adamgrads["dW"+str(i+1)] + eps)) * Fgrads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate /np.sqrt(adamgrads["db"+str(i+1)] + eps)) * Fgrads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def train(self, X, Y, num_iterations=10000, print_cost=False , print_cost_each=100, cont=0, learning_rate=1 , reg_term=0 , batch_size=0 , opt_func=gd_optm, param_dic=None,drop=0):
        """ train giving the data and hpyerparmeters and optmizer type
        
        parmeters:
            X (np.array) : input feature vector
            Y (np.array) :  the true label
            num_of iterations (int) : how many epochs
            print cost (bool) : to print cost or not
            print cost_each (int) : to print cost each how many iterations
            learning_rate (float) : the learn rate hyper parmeter
            reg_term (float) : the learn rate hyper parmeter
            batch_size (int) : how big is the mini batch and 0 for batch gradint
            optm_func (function) : a function for calling the wanted optmizer
       
        retruns:
            parmeters (dic) : weights and biasses after training
            cost (float) : cost
        """

        parameters, costs = opt_func(self, X, Y, num_iterations, print_cost, print_cost_each, cont, learning_rate,reg_term, batch_size, param_dic, drop)
        return parameters, costs

    def predict(self, X):
        """ perdict classes or output

        parmeters:
            X (np.array) :  input feature vector
        
        retruns:
            Alast (np.array) : output of last layer
        """

        Alast, cache = self.forward_propagation(X)
        #predictions = (Alast > thres) * 1

        return Alast

    def test(self, X, Y,eval_func=accuracy_score):
        """ evalute model

        parmeters:
            X (np.array) :  input feature vector
            Y (np.array) :  the true label
            eval_func (function) : the method of evalution
        
        retruns:
            Alast (np.array) : output of last layer
        """


        Alast, cache = self.forward_propagation(X)

        acc = eval_func(Alast,Y)

        return acc

Classes

class MultiLayer (number_of_neurons=0, cost_func=<function cross_entropy>)

init of the class multilayer and needed variables variables: w,b lists for weights parameters dic for weights in the form of parameters['W1'] layers_size for size of each layer number_of_input_neurons act_func list for activations of each layer derivative_act_func list for backward activations derivative functions cost_func the choosen cost functions

parmeters: (method) : the cost function of model

returns: (None)

Expand source code
class MultiLayer:
    def __init__(self, number_of_neurons=0, cost_func=cross_entropy):
        """ init of the class multilayer and needed variables
        variables:
            w,b lists for weights
            parameters dic for weights in the form of parameters['W1']
            layers_size for size of each layer
            number_of_input_neurons
            act_func list for activations of each layer
            derivative_act_func list for backward activations derivative functions
            cost_func the choosen cost functions

        parmeters:
            (method) : the cost function of model

        returns:
            (None)

        """
        self.w, self.b = [], []
        self.parameters = {}
        self.layer_size = []

        self.number_of_input_neurons = number_of_neurons
        self.number_of_outputs = 0

        self.act_func = []
        self.derivative_act_func = []

        self.cost_func = cost_func
        self.cost_func_der = determine_der_cost_func(self.cost_func)

        self.cache = {}
        self.prev = []

    def addLayerInput(self, size):
        """ add the input layer of the model

        parmeters:
            size (int) : size of input layer

        retruns:
            (None)

        """
        self.number_of_input_neurons = size
        self.layer_size.append(size)

    def addHidenLayer(self, size, act_func=sigmoid):
        """ add a hidden layer of the model

        parmeters:
            size (int) : size of input layer
            act_func (function) : the activation function of the layer
        
        retruns:
            (None)
        """
        self.layer_size.append(size)
        self.act_func.append(act_func)
        self.derivative_act_func.append(determine_der_act_func(act_func))

    def addOutputLayer(self, size, act_func=sigmoid):
        """ add the output layer of the model

        parmeters:
            size (int) : size of input layer
            act_func (function) : the activation function of the layer
        
        retruns:
            (None)

        """
        self.number_of_outputs = size
        self.layer_size.append(size)
        self.act_func.append(act_func)
        self.derivative_act_func.append(determine_der_act_func(act_func))

    def initialize_parameters(self, seed=2): #,init_func=random_init_zero_bias):
        """ initialize_weights of the model at the start with xavier init

        parmeters:
            seed (int) : seed for random function

        retruns:
            paramters

        """

        # todo very important check later

        np.random.seed(seed)  # we set up a seed so that your output matches ours although the initialization is random.

        L = len(self.layer_size)  # number of layers in the network

        for l in range(1, L):
            self.w.append(np.random.randn(self.layer_size[l], self.layer_size[l - 1]) * np.sqrt
            (2 / self.layer_size[l - 1]))  # *0.01
            self.b.append(np.zeros((self.layer_size[l], 1)))
            # seed += 1
            # np.random.seed(seed)

        for i in range(len(self.layer_size) - 1):
            self.parameters["W" + str(i + 1)] = self.w[i]
            self.parameters["b" + str(i + 1)] = self.b[i]

        return self.parameters

    def forward_propagation(self, X, drop=0):
        """ forward propagation through the layers

        parmeters:
            X (np.array) : input feature vector
            drop (float) : propablity to keep neurons or shut down
       
        retruns:
            cashe (dic) : the output of each layer in the form of cashe['Z1']
            Alast (np.array) : last layer activations


        """

        self.prev = []
        self.prev.append((1, X))
        for i in range(len(self.layer_size) - 1):
            Zi = np.dot(self.w[i], self.prev[i][1]) + self.b[i]
            Ai = self.act_func[i](Zi)
            if drop > 0 and i != len(self.layer_size) - 2:
                D = np.random.rand(Ai.shape[0], Ai.shape[1])
                D = D < drop
                Ai = Ai * D
                Ai = Ai / drop

            self.prev.append((Zi, Ai))

        A_last = self.prev[-1][1]

        for i in range(len(self.layer_size) - 1):
            self.cache["Z" + str(i + 1)] = self.prev[i + 1][0]
            self.cache["A" + str(i + 1)] = self.prev[i + 1][1]

        # todo sould i compute cost in here

        return A_last, self.cache

    def set_cost(self, cost_func):
        """ cahnge the initial cost function

        parmeters:
            cost_funct (function) : the new function
        
        retruns:
            cashe (dic) : the output of each layer in the form of cashe['Z1']
            Alast (np.array) : last layer activations

        """
        self.cost_func = cost_func
        self.cost_func_der = determine_der_cost_func(cost_func)

    def compute_cost(self, Alast, Y):
        """ compute cost of the given examples

        parmeters:
            Alast (np.array) : model predictions
            Y (np.array) : True labels
        
        retruns:
            cost (float) : cost output

        """
        m = Alast.shape[1]
        return self.cost_func(m, Alast, Y)

    def backward_propagation(self, X, Y):
        """ compute cost of the given examples

        parmeters:
            Alast (np.array) : model predictions
            Y (np.array) : True labels
        
        retruns:
            grads (dic) : all gridients of wieghts and biasses

        """

        m = X.shape[1]

        # todo all depends on the type of function in cost and actviation function
        grad_list1_w = []
        grad_list1_b = []

        Alast = self.prev[-1][1]
        final_act = self.derivative_act_func[-1]
        dzi = self.cost_func_der(m, Alast, Y) * final_act(Alast)

        if self.cost_func == cross_entropy:
            if self.act_func[-1] == sigmoid:
                pass

        for i in range(len(self.w), 0, -1):
            A = self.prev[i - 1][1]
            dwi = (1 / m) * np.dot(dzi, self.prev[i - 1][1].T)
            dbi = (1 / m) * np.sum(dzi, axis=1, keepdims=True)
            if i != 1:
                der_func = self.derivative_act_func[i - 2]
                A = self.prev[i - 1][1]
                dzi = np.multiply(np.dot((self.w[i - 1]).T, dzi), der_func(A))

            grad_list1_w.append(dwi)
            grad_list1_b.append(dbi)

        # reverse grad list
        grad_list_w = []
        grad_list_b = []

        for i in range(len(grad_list1_w) - 1, -1, -1):
            grad_list_w.append(grad_list1_w[i])
            grad_list_b.append(grad_list1_b[i])

        grads = {}

        for i in range(len(grad_list_w)):
            grads['dW' + str(i + 1)] = grad_list_w[i]
            grads['db' + str(i + 1)] = grad_list_b[i]

        return grads

    def set_cashe(self, cache, X):
        """ set an external cache

        parmeters:
            X (np.array) : input feature vector
            cache (dic) :  output of each layer
        
        retruns:
            (None)

        """
        self.cache = cache
        self.prev = []
        self.prev.append((1, X))
        for i in range(int(len(cache.keys()) / 2)):
            A, Z = cache["A" + str(i + 1)], cache["Z" + str(i + 1)]
            self.prev.append((Z, A))

    def set_parameters(self, para):
        """ set an external parmeters

        parmeters:
            para (dic) :  the weights and biasses
        
        retruns:
            (None)

        """
        self.parameters = para
        self.w = []
        self.b = []
        for i in range(int(len(para.keys()) / 2)):
            W, b = para["W" + str(i + 1)], para["b" + str(i + 1)]
            self.w.append(W)
            self.b.append(b)

    def set_parameters_internal(self):
        """ set an internal parmeters this is used by model during training

        parmeters:
            (None)
        
        retruns:
            (None)

        """
        self.parameters = {}
        for i in range(len(self.w)):
            self.parameters["W" + str(i + 1)] = self.w[i]
            self.parameters["b" + str(i + 1)] = self.b[i]

    def update_parameters(self, grads, learning_rate=1.2 , reg_term=0, m = 1):
        """ update parameters using grads

        parmeters:
            grads (dic) :  the gradient of weights and biases
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):
            self.w[i] = (1-reg_term/m) * self.w[i] - learning_rate * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - learning_rate * grads["db" + str(i + 1)]

        self.set_parameters_internal()

        return self.parameters

    def update_parameters_adagrad(self, grads,adagrads, learning_rate=1.2, reg_term=0, m = 1):
        """ update parameters using adagrad

        parameters:
            grads (dic) :  the gradient of weights and biases
            adagrads(dic): the square of the gradiant
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(adagrads["dW" + str(i + 1)]) + 0.000000001)) * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(adagrads["db"+str(i+1)]) + 0.000000001)) * grads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def upadte_patameters_RMS(self, grads,rmsgrads, learning_rate=1.2 , reg_term=0, m = 1,eps=None):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases
            rmsgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
            learning_rate (float) : the learn rate hyper parameter
            reg_term (float) : the learn rate hyper parameter
            eps(float) : the small value added to rmsgrads to make sure there is no division by zero
       
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(rmsgrads["dW" + str(i + 1)]) + eps)) * grads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(rmsgrads["db"+str(i+1)]) + eps)) * grads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def upadte_patameters_adadelta(self, grads,delta, learning_rate=1.2, reg_term=0, m = 1):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases, note: this parameter is not used in this function
            delta(dic): dictionary contains the values that should be subtracted from current parameters to be updated
            learning_rate (float) : the learn rate hyper parameter , note: this parameter is not used in this function
            reg_term (float) : the learn rate hyper parameter
       
        returns:
            dictionary contains the updated parameters

        """


        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - delta["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - delta["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def update_parameters_adam(self, grads,adamgrads,Fgrads, learning_rate=1.2, reg_term=0, m = 1,eps=None):
        """ update parameters using RMS gradient

        parameters:
            grads (dic) :  the gradient of weights and biases , note: grads is not used in this function
            adamgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
            Fgrads(dic): taking rhof multiplied by the  previous grads and (1-rhof) multiplied by the  current grads
            learning_rate (float) : the learn rate hyper parameter (alpha_t not alpha)
            reg_term (float) : the learn rate hyper parameter
            eps(float) : the small value added to adamgrads to make sure there is no division by zero
        
        returns:
            dictionary contains the updated parameters

        """

        for i in range(len(self.w)):

            self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate/np.sqrt(adamgrads["dW"+str(i+1)] + eps)) * Fgrads["dW" + str(i + 1)]
            self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate /np.sqrt(adamgrads["db"+str(i+1)] + eps)) * Fgrads["db" + str(i + 1)]
        self.set_parameters_internal()

        return self.parameters

    def train(self, X, Y, num_iterations=10000, print_cost=False , print_cost_each=100, cont=0, learning_rate=1 , reg_term=0 , batch_size=0 , opt_func=gd_optm, param_dic=None,drop=0):
        """ train giving the data and hpyerparmeters and optmizer type
        
        parmeters:
            X (np.array) : input feature vector
            Y (np.array) :  the true label
            num_of iterations (int) : how many epochs
            print cost (bool) : to print cost or not
            print cost_each (int) : to print cost each how many iterations
            learning_rate (float) : the learn rate hyper parmeter
            reg_term (float) : the learn rate hyper parmeter
            batch_size (int) : how big is the mini batch and 0 for batch gradint
            optm_func (function) : a function for calling the wanted optmizer
       
        retruns:
            parmeters (dic) : weights and biasses after training
            cost (float) : cost
        """

        parameters, costs = opt_func(self, X, Y, num_iterations, print_cost, print_cost_each, cont, learning_rate,reg_term, batch_size, param_dic, drop)
        return parameters, costs

    def predict(self, X):
        """ perdict classes or output

        parmeters:
            X (np.array) :  input feature vector
        
        retruns:
            Alast (np.array) : output of last layer
        """

        Alast, cache = self.forward_propagation(X)
        #predictions = (Alast > thres) * 1

        return Alast

    def test(self, X, Y,eval_func=accuracy_score):
        """ evalute model

        parmeters:
            X (np.array) :  input feature vector
            Y (np.array) :  the true label
            eval_func (function) : the method of evalution
        
        retruns:
            Alast (np.array) : output of last layer
        """


        Alast, cache = self.forward_propagation(X)

        acc = eval_func(Alast,Y)

        return acc

Methods

def addHidenLayer(self, size, act_func=<function sigmoid>)

add a hidden layer of the model

parmeters: size (int) : size of input layer act_func (function) : the activation function of the layer

retruns: (None)

Expand source code
def addHidenLayer(self, size, act_func=sigmoid):
    """ add a hidden layer of the model

    parmeters:
        size (int) : size of input layer
        act_func (function) : the activation function of the layer
    
    retruns:
        (None)
    """
    self.layer_size.append(size)
    self.act_func.append(act_func)
    self.derivative_act_func.append(determine_der_act_func(act_func))
def addLayerInput(self, size)

add the input layer of the model

parmeters: size (int) : size of input layer

retruns: (None)

Expand source code
def addLayerInput(self, size):
    """ add the input layer of the model

    parmeters:
        size (int) : size of input layer

    retruns:
        (None)

    """
    self.number_of_input_neurons = size
    self.layer_size.append(size)
def addOutputLayer(self, size, act_func=<function sigmoid>)

add the output layer of the model

parmeters: size (int) : size of input layer act_func (function) : the activation function of the layer

retruns: (None)

Expand source code
def addOutputLayer(self, size, act_func=sigmoid):
    """ add the output layer of the model

    parmeters:
        size (int) : size of input layer
        act_func (function) : the activation function of the layer
    
    retruns:
        (None)

    """
    self.number_of_outputs = size
    self.layer_size.append(size)
    self.act_func.append(act_func)
    self.derivative_act_func.append(determine_der_act_func(act_func))
def backward_propagation(self, X, Y)

compute cost of the given examples

parmeters: Alast (np.array) : model predictions Y (np.array) : True labels

retruns: grads (dic) : all gridients of wieghts and biasses

Expand source code
def backward_propagation(self, X, Y):
    """ compute cost of the given examples

    parmeters:
        Alast (np.array) : model predictions
        Y (np.array) : True labels
    
    retruns:
        grads (dic) : all gridients of wieghts and biasses

    """

    m = X.shape[1]

    # todo all depends on the type of function in cost and actviation function
    grad_list1_w = []
    grad_list1_b = []

    Alast = self.prev[-1][1]
    final_act = self.derivative_act_func[-1]
    dzi = self.cost_func_der(m, Alast, Y) * final_act(Alast)

    if self.cost_func == cross_entropy:
        if self.act_func[-1] == sigmoid:
            pass

    for i in range(len(self.w), 0, -1):
        A = self.prev[i - 1][1]
        dwi = (1 / m) * np.dot(dzi, self.prev[i - 1][1].T)
        dbi = (1 / m) * np.sum(dzi, axis=1, keepdims=True)
        if i != 1:
            der_func = self.derivative_act_func[i - 2]
            A = self.prev[i - 1][1]
            dzi = np.multiply(np.dot((self.w[i - 1]).T, dzi), der_func(A))

        grad_list1_w.append(dwi)
        grad_list1_b.append(dbi)

    # reverse grad list
    grad_list_w = []
    grad_list_b = []

    for i in range(len(grad_list1_w) - 1, -1, -1):
        grad_list_w.append(grad_list1_w[i])
        grad_list_b.append(grad_list1_b[i])

    grads = {}

    for i in range(len(grad_list_w)):
        grads['dW' + str(i + 1)] = grad_list_w[i]
        grads['db' + str(i + 1)] = grad_list_b[i]

    return grads
def compute_cost(self, Alast, Y)

compute cost of the given examples

parmeters: Alast (np.array) : model predictions Y (np.array) : True labels

retruns: cost (float) : cost output

Expand source code
def compute_cost(self, Alast, Y):
    """ compute cost of the given examples

    parmeters:
        Alast (np.array) : model predictions
        Y (np.array) : True labels
    
    retruns:
        cost (float) : cost output

    """
    m = Alast.shape[1]
    return self.cost_func(m, Alast, Y)
def forward_propagation(self, X, drop=0)

forward propagation through the layers

parmeters: X (np.array) : input feature vector drop (float) : propablity to keep neurons or shut down

retruns: cashe (dic) : the output of each layer in the form of cashe['Z1'] Alast (np.array) : last layer activations

Expand source code
def forward_propagation(self, X, drop=0):
    """ forward propagation through the layers

    parmeters:
        X (np.array) : input feature vector
        drop (float) : propablity to keep neurons or shut down
   
    retruns:
        cashe (dic) : the output of each layer in the form of cashe['Z1']
        Alast (np.array) : last layer activations


    """

    self.prev = []
    self.prev.append((1, X))
    for i in range(len(self.layer_size) - 1):
        Zi = np.dot(self.w[i], self.prev[i][1]) + self.b[i]
        Ai = self.act_func[i](Zi)
        if drop > 0 and i != len(self.layer_size) - 2:
            D = np.random.rand(Ai.shape[0], Ai.shape[1])
            D = D < drop
            Ai = Ai * D
            Ai = Ai / drop

        self.prev.append((Zi, Ai))

    A_last = self.prev[-1][1]

    for i in range(len(self.layer_size) - 1):
        self.cache["Z" + str(i + 1)] = self.prev[i + 1][0]
        self.cache["A" + str(i + 1)] = self.prev[i + 1][1]

    # todo sould i compute cost in here

    return A_last, self.cache
def initialize_parameters(self, seed=2)

initialize_weights of the model at the start with xavier init

parmeters: seed (int) : seed for random function

retruns: paramters

Expand source code
def initialize_parameters(self, seed=2): #,init_func=random_init_zero_bias):
    """ initialize_weights of the model at the start with xavier init

    parmeters:
        seed (int) : seed for random function

    retruns:
        paramters

    """

    # todo very important check later

    np.random.seed(seed)  # we set up a seed so that your output matches ours although the initialization is random.

    L = len(self.layer_size)  # number of layers in the network

    for l in range(1, L):
        self.w.append(np.random.randn(self.layer_size[l], self.layer_size[l - 1]) * np.sqrt
        (2 / self.layer_size[l - 1]))  # *0.01
        self.b.append(np.zeros((self.layer_size[l], 1)))
        # seed += 1
        # np.random.seed(seed)

    for i in range(len(self.layer_size) - 1):
        self.parameters["W" + str(i + 1)] = self.w[i]
        self.parameters["b" + str(i + 1)] = self.b[i]

    return self.parameters
def predict(self, X)

perdict classes or output

parmeters: X (np.array) : input feature vector

retruns: Alast (np.array) : output of last layer

Expand source code
def predict(self, X):
    """ perdict classes or output

    parmeters:
        X (np.array) :  input feature vector
    
    retruns:
        Alast (np.array) : output of last layer
    """

    Alast, cache = self.forward_propagation(X)
    #predictions = (Alast > thres) * 1

    return Alast
def set_cashe(self, cache, X)

set an external cache

parmeters: X (np.array) : input feature vector cache (dic) : output of each layer

retruns: (None)

Expand source code
def set_cashe(self, cache, X):
    """ set an external cache

    parmeters:
        X (np.array) : input feature vector
        cache (dic) :  output of each layer
    
    retruns:
        (None)

    """
    self.cache = cache
    self.prev = []
    self.prev.append((1, X))
    for i in range(int(len(cache.keys()) / 2)):
        A, Z = cache["A" + str(i + 1)], cache["Z" + str(i + 1)]
        self.prev.append((Z, A))
def set_cost(self, cost_func)

cahnge the initial cost function

parmeters: cost_funct (function) : the new function

retruns: cashe (dic) : the output of each layer in the form of cashe['Z1'] Alast (np.array) : last layer activations

Expand source code
def set_cost(self, cost_func):
    """ cahnge the initial cost function

    parmeters:
        cost_funct (function) : the new function
    
    retruns:
        cashe (dic) : the output of each layer in the form of cashe['Z1']
        Alast (np.array) : last layer activations

    """
    self.cost_func = cost_func
    self.cost_func_der = determine_der_cost_func(cost_func)
def set_parameters(self, para)

set an external parmeters

parmeters: para (dic) : the weights and biasses

retruns: (None)

Expand source code
def set_parameters(self, para):
    """ set an external parmeters

    parmeters:
        para (dic) :  the weights and biasses
    
    retruns:
        (None)

    """
    self.parameters = para
    self.w = []
    self.b = []
    for i in range(int(len(para.keys()) / 2)):
        W, b = para["W" + str(i + 1)], para["b" + str(i + 1)]
        self.w.append(W)
        self.b.append(b)
def set_parameters_internal(self)

set an internal parmeters this is used by model during training

parmeters: (None)

retruns: (None)

Expand source code
def set_parameters_internal(self):
    """ set an internal parmeters this is used by model during training

    parmeters:
        (None)
    
    retruns:
        (None)

    """
    self.parameters = {}
    for i in range(len(self.w)):
        self.parameters["W" + str(i + 1)] = self.w[i]
        self.parameters["b" + str(i + 1)] = self.b[i]
def test(self, X, Y, eval_func=<function accuracy_score>)

evalute model

parmeters: X (np.array) : input feature vector Y (np.array) : the true label eval_func (function) : the method of evalution

retruns: Alast (np.array) : output of last layer

Expand source code
def test(self, X, Y,eval_func=accuracy_score):
    """ evalute model

    parmeters:
        X (np.array) :  input feature vector
        Y (np.array) :  the true label
        eval_func (function) : the method of evalution
    
    retruns:
        Alast (np.array) : output of last layer
    """


    Alast, cache = self.forward_propagation(X)

    acc = eval_func(Alast,Y)

    return acc
def train(self, X, Y, num_iterations=10000, print_cost=False, print_cost_each=100, cont=0, learning_rate=1, reg_term=0, batch_size=0, opt_func=<function gd_optm>, param_dic=None, drop=0)

train giving the data and hpyerparmeters and optmizer type

parmeters: X (np.array) : input feature vector Y (np.array) : the true label num_of iterations (int) : how many epochs print cost (bool) : to print cost or not print cost_each (int) : to print cost each how many iterations learning_rate (float) : the learn rate hyper parmeter reg_term (float) : the learn rate hyper parmeter batch_size (int) : how big is the mini batch and 0 for batch gradint optm_func (function) : a function for calling the wanted optmizer

retruns: parmeters (dic) : weights and biasses after training cost (float) : cost

Expand source code
def train(self, X, Y, num_iterations=10000, print_cost=False , print_cost_each=100, cont=0, learning_rate=1 , reg_term=0 , batch_size=0 , opt_func=gd_optm, param_dic=None,drop=0):
    """ train giving the data and hpyerparmeters and optmizer type
    
    parmeters:
        X (np.array) : input feature vector
        Y (np.array) :  the true label
        num_of iterations (int) : how many epochs
        print cost (bool) : to print cost or not
        print cost_each (int) : to print cost each how many iterations
        learning_rate (float) : the learn rate hyper parmeter
        reg_term (float) : the learn rate hyper parmeter
        batch_size (int) : how big is the mini batch and 0 for batch gradint
        optm_func (function) : a function for calling the wanted optmizer
   
    retruns:
        parmeters (dic) : weights and biasses after training
        cost (float) : cost
    """

    parameters, costs = opt_func(self, X, Y, num_iterations, print_cost, print_cost_each, cont, learning_rate,reg_term, batch_size, param_dic, drop)
    return parameters, costs
def upadte_patameters_RMS(self, grads, rmsgrads, learning_rate=1.2, reg_term=0, m=1, eps=None)

update parameters using RMS gradient

parameters: grads (dic) : the gradient of weights and biases rmsgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads learning_rate (float) : the learn rate hyper parameter reg_term (float) : the learn rate hyper parameter eps(float) : the small value added to rmsgrads to make sure there is no division by zero

returns: dictionary contains the updated parameters

Expand source code
def upadte_patameters_RMS(self, grads,rmsgrads, learning_rate=1.2 , reg_term=0, m = 1,eps=None):
    """ update parameters using RMS gradient

    parameters:
        grads (dic) :  the gradient of weights and biases
        rmsgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
        learning_rate (float) : the learn rate hyper parameter
        reg_term (float) : the learn rate hyper parameter
        eps(float) : the small value added to rmsgrads to make sure there is no division by zero
   
    returns:
        dictionary contains the updated parameters

    """

    for i in range(len(self.w)):

        self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(rmsgrads["dW" + str(i + 1)]) + eps)) * grads["dW" + str(i + 1)]
        self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(rmsgrads["db"+str(i+1)]) + eps)) * grads["db" + str(i + 1)]
    self.set_parameters_internal()

    return self.parameters
def upadte_patameters_adadelta(self, grads, delta, learning_rate=1.2, reg_term=0, m=1)

update parameters using RMS gradient

parameters: grads (dic) : the gradient of weights and biases, note: this parameter is not used in this function delta(dic): dictionary contains the values that should be subtracted from current parameters to be updated learning_rate (float) : the learn rate hyper parameter , note: this parameter is not used in this function reg_term (float) : the learn rate hyper parameter

returns: dictionary contains the updated parameters

Expand source code
def upadte_patameters_adadelta(self, grads,delta, learning_rate=1.2, reg_term=0, m = 1):
    """ update parameters using RMS gradient

    parameters:
        grads (dic) :  the gradient of weights and biases, note: this parameter is not used in this function
        delta(dic): dictionary contains the values that should be subtracted from current parameters to be updated
        learning_rate (float) : the learn rate hyper parameter , note: this parameter is not used in this function
        reg_term (float) : the learn rate hyper parameter
   
    returns:
        dictionary contains the updated parameters

    """


    for i in range(len(self.w)):

        self.w[i] = (1-reg_term/m) * self.w[i] - delta["dW" + str(i + 1)]
        self.b[i] = (1-reg_term/m) * self.b[i] - delta["db" + str(i + 1)]
    self.set_parameters_internal()

    return self.parameters
def update_parameters(self, grads, learning_rate=1.2, reg_term=0, m=1)

update parameters using grads

parmeters: grads (dic) : the gradient of weights and biases learning_rate (float) : the learn rate hyper parameter reg_term (float) : the learn rate hyper parameter

returns: dictionary contains the updated parameters

Expand source code
def update_parameters(self, grads, learning_rate=1.2 , reg_term=0, m = 1):
    """ update parameters using grads

    parmeters:
        grads (dic) :  the gradient of weights and biases
        learning_rate (float) : the learn rate hyper parameter
        reg_term (float) : the learn rate hyper parameter
    
    returns:
        dictionary contains the updated parameters

    """

    for i in range(len(self.w)):
        self.w[i] = (1-reg_term/m) * self.w[i] - learning_rate * grads["dW" + str(i + 1)]
        self.b[i] = (1-reg_term/m) * self.b[i] - learning_rate * grads["db" + str(i + 1)]

    self.set_parameters_internal()

    return self.parameters
def update_parameters_adagrad(self, grads, adagrads, learning_rate=1.2, reg_term=0, m=1)

update parameters using adagrad

parameters: grads (dic) : the gradient of weights and biases adagrads(dic): the square of the gradiant learning_rate (float) : the learn rate hyper parameter reg_term (float) : the learn rate hyper parameter

returns: dictionary contains the updated parameters

Expand source code
def update_parameters_adagrad(self, grads,adagrads, learning_rate=1.2, reg_term=0, m = 1):
    """ update parameters using adagrad

    parameters:
        grads (dic) :  the gradient of weights and biases
        adagrads(dic): the square of the gradiant
        learning_rate (float) : the learn rate hyper parameter
        reg_term (float) : the learn rate hyper parameter
    
    returns:
        dictionary contains the updated parameters

    """

    for i in range(len(self.w)):

        self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate / (np.sqrt(adagrads["dW" + str(i + 1)]) + 0.000000001)) * grads["dW" + str(i + 1)]
        self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate / (np.sqrt(adagrads["db"+str(i+1)]) + 0.000000001)) * grads["db" + str(i + 1)]
    self.set_parameters_internal()

    return self.parameters
def update_parameters_adam(self, grads, adamgrads, Fgrads, learning_rate=1.2, reg_term=0, m=1, eps=None)

update parameters using RMS gradient

parameters: grads (dic) : the gradient of weights and biases , note: grads is not used in this function adamgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads Fgrads(dic): taking rhof multiplied by the previous grads and (1-rhof) multiplied by the current grads learning_rate (float) : the learn rate hyper parameter (alpha_t not alpha) reg_term (float) : the learn rate hyper parameter eps(float) : the small value added to adamgrads to make sure there is no division by zero

returns: dictionary contains the updated parameters

Expand source code
def update_parameters_adam(self, grads,adamgrads,Fgrads, learning_rate=1.2, reg_term=0, m = 1,eps=None):
    """ update parameters using RMS gradient

    parameters:
        grads (dic) :  the gradient of weights and biases , note: grads is not used in this function
        adamgrads(dic): taking rho multiplied by the square of previous grads and (1-rho) multiplied by the square of current grads
        Fgrads(dic): taking rhof multiplied by the  previous grads and (1-rhof) multiplied by the  current grads
        learning_rate (float) : the learn rate hyper parameter (alpha_t not alpha)
        reg_term (float) : the learn rate hyper parameter
        eps(float) : the small value added to adamgrads to make sure there is no division by zero
    
    returns:
        dictionary contains the updated parameters

    """

    for i in range(len(self.w)):

        self.w[i] = (1-reg_term/m) * self.w[i] - (learning_rate/np.sqrt(adamgrads["dW"+str(i+1)] + eps)) * Fgrads["dW" + str(i + 1)]
        self.b[i] = (1-reg_term/m) * self.b[i] - (learning_rate /np.sqrt(adamgrads["db"+str(i+1)] + eps)) * Fgrads["db" + str(i + 1)]
    self.set_parameters_internal()

    return self.parameters