SGD
class SGD(Optimizer):
"""
Stochastic Gradient Descent (SGD) optimizer.
θ=θ−η⋅∇L/dθ
Attributes:
learning_rate : float
The learning rate for parameter updates.
"""
def __init__(self, learning_rate=0.01):
super().__init__(learning_rate)
def apply(self, W: np.ndarray, dl_dw: np.ndarray, b: np.ndarray, dl_db: np.ndarray) -> tuple[np.ndarray]:
W -= self.learning_rate * dl_dw
b -= self.learning_rate * dl_db
return W, b
Last updated