train_test_split
def train_test_split(X, y, test_size=0.2, random_state=None):
"""Splits data into training and testing sets.
Args:
X (np.ndarray): Feature matrix.
y (np.ndarray): Target variable array.
test_size (float): Proportion of data to use for testing (default 0.2).
random_state (int): Optional random seed for reproducibility.
Returns:
X_train (np.ndarray): Training feature matrix.
X_test (np.ndarray): Testing feature matrix.
y_train (np.ndarray): Training target variable array.
y_test (np.ndarray): Testing target variable array.
"""
if random_state is not None:
np.random.seed(random_state)
num_samples = len(X)
num_test_samples = int(test_size * num_samples)
indices = np.random.permutation(num_samples)
test_indices = indices[:num_test_samples]
train_indices = indices[num_test_samples:]
# Split the data
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
return X_train, X_test, y_train, y_test
Last updated