k_fold_cross_validation

def k_fold_cross_validation(X, y, k=5, random_seed=None):
    '''
    Performs a K-Fold Cross Validation on the given dataset. 
    Used for evaluating a model's performance on different subsets of the data.
    
    Args:
        X (np.ndarray): inputs .
        y (np.ndarray): Target labels.
        k (int): Number of folds.
        random_seed (int): Optional random seed.

    Returns:
        scores (list): List of scores for each fold.
        folds (list): List of (X_train, y_train, X_test, y_test) tuples for each fold.
    '''
    if random_seed is not None:
        np.random.seed(random_seed)

    indices = np.random.permutation(len(X))
    folds_indices = np.array_split(indices, k)

    scores = []
    folds = []

    for i in range(k):
        test_indices = folds_indices[i]
        
        train_indices = []
        for j in range(k):
            if j != i:
                train_indices.append(folds_indices[j])

        train_indices = np.concatenate(train_indices)

        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = y[train_indices], y[test_indices]

        folds.append((X_train, y_train, X_test, y_test))
        score = np.mean(y_test == y_train[:len(y_test)])  
        scores.append(score)
    
    return scores, folds

Last updated