17. Cross Validation#

# basic libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# models classes
from sklearn import tree
from sklearn import cluster
from sklearn import svm
# datasets
from sklearn import datasets
# model selection tools
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
iris_X, iris_y = datasets.load_iris(return_X_y=True)
type(iris_X), type(iris_y)
(numpy.ndarray, numpy.ndarray)
iris_X.shape, iris_y.shape
((150, 4), (150,))
dt = tree.DecisionTreeClassifier()
iris_X_train,iris_X_test, iris_y_train, iris_y_test = train_test_split(iris_X, iris_y)
cross_val_score(dt,iris_X_train,iris_y_train)
array([0.95652174, 0.91304348, 0.95454545, 0.90909091, 0.95454545])
np.mean(cross_val_score(dt,iris_X_train,iris_y_train))
0.9557312252964426
np.mean(cross_val_score(dt,iris_X_train,iris_y_train,cv=10))
0.9462121212121211
len(iris_X_train)
112
km = cluster.KMeans(n_clusters=3,n_init=10)
cross_val_score(km,iris_X_train,iris_y_train, scoring='adjusted_mutual_info_score')
array([0.782306  , 0.86510452, 0.7988133 , 0.58578645, 0.74089703])
km2 = cluster.KMeans(n_clusters=2,n_init=10)
cross_val_score(km2,iris_X_train,iris_y_train, scoring='adjusted_mutual_info_score')
array([0.67130847, 0.77264264, 0.62622801, 0.74281748, 0.73244825])
param_grid = {'n_clusters':[2,3,4], 
             'n_init':[5,10,15]}
km_opt = GridSearchCV(km,param_grid)
km_opt.fit(iris_X_train)
GridSearchCV(estimator=KMeans(n_clusters=3, n_init=10),
             param_grid={'n_clusters': [2, 3, 4], 'n_init': [5, 10, 15]})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.