ncxlib
  • ⚡Welcome
  • Getting Started
    • Quickstart
    • API Documentation
      • Overview
        • Neural Network
          • _compile
          • add_layer
          • forward_propagate_all
          • forward_propagate_all_no_save
          • back_propagation
          • train
          • predict
          • evaluate
          • save_model
          • load_model
        • Activation
          • ReLU
          • LeakyReLU
          • Sigmoid
          • Softmax
          • Tanh
        • Layer
          • InputLayer
          • FullyConnectedLayer
          • OutputLayer
        • LossFunction
          • MeanSquaredError
          • BinaryCrossEntropy
          • CategoricalCrossEntropy
        • Optimizer
          • SGD
          • SGDMomentum
          • RMSProp
          • Adam
        • Initializer
          • HeNormal
          • Zero
        • PreProcessor
          • OneHotEncoder
          • MinMaxScaler
          • Scaler
          • ImageRescaler
          • ImageGrayscaler
        • DataLoader
          • CSVDataLoader
          • ImageDataLoader
        • Generators
          • random_array
          • integer_array
          • generate_training_data
        • Utils
          • train_test_split
          • k_fold_cross_validation
Powered by GitBook
On this page
  1. Getting Started
  2. API Documentation
  3. Overview
  4. Optimizer

Adam

class Adam(Optimizer):
    """
    Adam Optimizer.

    Attributes:
    learning_rate : float
        The learning rate for parameter updates.
    beta_1: float
        The decay_rate for first momentum.
    beta_2: float
        The decay_rate for second momentum.
    epsilon:
        A small constant to avoid division by zero

    """

    def __init__(self, learning_rate = 0.01, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-07):
        super().__init__(learning_rate)
        self.beta_1 = beta_1
        self.beta_2 = beta_2 
        self.epsilon = epsilon
        self.m_w = 0
        self.v_w = 0
        self.m_b = 0
        self.v_b = -0
        self.timestep = 0

    def apply(self, W: np.ndarray, dl_dw: np.ndarray, b: np.ndarray, dl_db: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        self.timestep += 1

        self.m_w = (self.beta_1 * self.m_w) + ((1 - self.beta_1) * dl_dw)
        self.m_b = (self.beta_1 * self.m_b) + ((1 - self.beta_1) * dl_db)

        self.v_w = (self.beta_2 * self.v_w) + ((1 - self.beta_2) * np.square(dl_dw))
        self.v_b = (self.beta_2 * self.v_b) + ((1 - self.beta_2) * np.square(dl_db))

        m_w_hat = self.m_w / (1 - (self.beta_1 ** self.timestep))
        m_b_hat = self.m_b / (1 - (self.beta_1 ** self.timestep))
        v_w_hat = self.v_w / (1 - (self.beta_2 ** self.timestep))
        v_b_hat = self.v_b / (1 - (self.beta_2 ** self.timestep))

        W -= (self.learning_rate / (np.sqrt(v_w_hat) + self.epsilon)) * m_w_hat
        b -= (self.learning_rate / (np.sqrt(v_b_hat) + self.epsilon)) * m_b_hat

        return W, b
PreviousRMSPropNextInitializer

Last updated 7 months ago