Layer
class Layer(ABC):
def __init__(
self,
n_neurons: Optional[int] = None,
n_inputs: Optional[int] = None,
activation: Optional[Activation] = ReLU,
optimizer: Optional[Optimizer] = SGD,
loss_fn: Optional[LossFunction] = MeanSquaredError,
initializer: Optional[Initializer] = HeNormal(),
weights_initializer: Optional[Initializer] = HeNormal(),
bias_initializer: Optional[Initializer] = Zero(),
name: str = ""
):
if not Callable:
raise ValueError(
"Missing activation function. Cannot be empty. Example: activation_fn=Relu"
)
self.n_inputs = n_inputs
self.n_neurons = n_neurons
self.activation = activation
self.optimizer = optimizer
self.neurons = None
self.loss_fn = loss_fn
self.name = name
self.weights_initializer = weights_initializer if weights_initializer else initializer
self.bias_initializer = bias_initializer if bias_initializer else initializer
# inputs remain same for all so just store in n_inputs x 1
self.inputs = None
# weights size: n_neurons * n_inputs
self.W = None
self.old_W = None
# bias and weighted sums size : n_neurons x 1
self.b = None
self.z = None
# gradients size n_neurons x 1
self.gradients = None
# activated outputs (store for output layer only) size n_neurons x 1
self.activated = None
def initialize_params(self, inputs: np.ndarray):
# inputs always get updated
self.inputs = inputs.reshape((self.n_inputs, 1))
# only initialize if not initialized yet, if not use previously learned values
if self.W is None or self.b is None:
self.W = self.weights_initializer.gen_W(self.n_inputs, self.n_neurons)
self.b = self.bias_initializer.gen_b(self.n_neurons)
@abstractmethod
def forward_propagation(self, inputs: np.ndarray, no_save: Optional[bool] = False) -> np.ndarray:
pass
@abstractmethod
def back_propagation(self, y_orig, y_pred):
pass
def calc_gradient_wrt_b(self, dl_dz):
return dl_dz
def calc_gradient_wrt_w(self, dl_dz, inputs):
return dl_dz * inputs
def calc_gradient_wrt_z(self, weighted_sum, y_pred, y_orig):
# (∂L/∂y_pred):
dl_dy = self.calc_gradient_wrt_y_pred(y_pred, y_orig)
# (∂a/∂z)
da_dz = self.activation.derivative(weighted_sum)
dl_dz = da_dz * dl_dy
return dl_dz
def calc_gradient_wrt_y_pred(self, y_pred, y_orig):
return 2 * (y_pred - y_orig) / self.n_inputs
Last updated