k_fold_cross_validation
def k_fold_cross_validation(X, y, k=5, random_seed=None):
'''
Performs a K-Fold Cross Validation on the given dataset.
Used for evaluating a model's performance on different subsets of the data.
Args:
X (np.ndarray): inputs .
y (np.ndarray): Target labels.
k (int): Number of folds.
random_seed (int): Optional random seed.
Returns:
scores (list): List of scores for each fold.
folds (list): List of (X_train, y_train, X_test, y_test) tuples for each fold.
'''
if random_seed is not None:
np.random.seed(random_seed)
indices = np.random.permutation(len(X))
folds_indices = np.array_split(indices, k)
scores = []
folds = []
for i in range(k):
test_indices = folds_indices[i]
train_indices = []
for j in range(k):
if j != i:
train_indices.append(folds_indices[j])
train_indices = np.concatenate(train_indices)
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
folds.append((X_train, y_train, X_test, y_test))
score = np.mean(y_test == y_train[:len(y_test)])
scores.append(score)
return scores, folds
Last updated