Breaking Data into a Train and Test Set

June 01, 2019

Imports

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

Load Data

# load the iris dataset and get X and Y data
iris = load_iris()
X = pd.DataFrame(iris.data)
y = pd.DataFrame(iris.target)

Split Data into Train and Test Set

# set aside 20% of train and test data for evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y,
    test_size=0.2, shuffle = True, random_state = 123)
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))

X_train shape: (120, 4)

X_test shape: (30, 4)

y_train shape: (120, 1)

y_test shape: (30, 1)