Class 29: Choosing a Model

  1. log onto prismia

  2. share your favorite restaraunt on/near campus in the zoom chat

Portfolio PR

# %load http://drsmb.co/310
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn import datasets
from sklearn import cluster
from sklearn import svm
from sklearn import tree
from sklearn import model_selection
iris_X, iris_y = datasets.load_iris(return_X_y= True)
iris_X_train, iris_X_test, iris_y_train, iris_y_test = model_selection.train_test_split(iris_X,iris_y)
dt = tree.DecisionTreeClassifier()
params_dt = {'criterion':['gini','entropy'],'max_depth':[2,3,4],
             'min_samples_leaf':list(range(2,20,2))}
dt_opt = model_selection.GridSearchCV(dt,params_dt)
dt_opt.fit(iris_X_train,iris_y_train)
GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [2, 3, 4],
                         'min_samples_leaf': [2, 4, 6, 8, 10, 12, 14, 16, 18]})
dt_opt.predict(iris_X_test)
array([1, 0, 0, 1, 2, 2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 0, 2, 2, 2, 2, 0, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 0, 2, 1, 2, 2, 1, 2, 2])
dt_opt.best_estimator_.predict(iris_X_test)
array([1, 0, 0, 1, 2, 2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 0, 2, 2, 2, 2, 0, 2,
       2, 0, 2, 1, 0, 1, 1, 0, 0, 2, 1, 2, 2, 1, 2, 2])
pd.DataFrame(dt_opt.cv_results_)
mean_fit_time std_fit_time mean_score_time std_score_time param_criterion param_max_depth param_min_samples_leaf params split0_test_score split1_test_score split2_test_score split3_test_score split4_test_score mean_test_score std_test_score rank_test_score
0 0.000561 0.000138 0.000276 0.000041 gini 2 2 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
1 0.000443 0.000023 0.000238 0.000013 gini 2 4 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
2 0.000484 0.000022 0.000266 0.000011 gini 2 6 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
3 0.000447 0.000027 0.000239 0.000015 gini 2 8 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
4 0.000489 0.000022 0.000255 0.000008 gini 2 10 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
5 0.000473 0.000031 0.000244 0.000019 gini 2 12 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
6 0.000460 0.000031 0.000250 0.000013 gini 2 14 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
7 0.000447 0.000031 0.000249 0.000019 gini 2 16 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
8 0.000477 0.000021 0.000266 0.000032 gini 2 18 {'criterion': 'gini', 'max_depth': 2, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
9 0.000475 0.000038 0.000248 0.000021 gini 3 2 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 1.000000 1.000000 0.954545 0.947431 0.050642 1
10 0.000502 0.000014 0.000261 0.000023 gini 3 4 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
11 0.000470 0.000040 0.000243 0.000024 gini 3 6 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
12 0.000438 0.000026 0.000234 0.000016 gini 3 8 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
13 0.000474 0.000046 0.000252 0.000026 gini 3 10 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
14 0.000499 0.000018 0.000263 0.000010 gini 3 12 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
15 0.000500 0.000020 0.000258 0.000008 gini 3 14 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
16 0.000495 0.000023 0.000259 0.000009 gini 3 16 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
17 0.000481 0.000022 0.000249 0.000015 gini 3 18 {'criterion': 'gini', 'max_depth': 3, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
18 0.000486 0.000025 0.000246 0.000021 gini 4 2 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 1.000000 1.000000 0.954545 0.947431 0.050642 1
19 0.000499 0.000019 0.000257 0.000008 gini 4 4 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
20 0.000481 0.000033 0.000250 0.000025 gini 4 6 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
21 0.000476 0.000029 0.000251 0.000017 gini 4 8 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
22 0.000480 0.000043 0.000248 0.000020 gini 4 10 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
23 0.000473 0.000036 0.000247 0.000023 gini 4 12 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
24 0.000497 0.000020 0.000255 0.000016 gini 4 14 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
25 0.000425 0.000049 0.000234 0.000027 gini 4 16 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
26 0.000433 0.000034 0.000225 0.000008 gini 4 18 {'criterion': 'gini', 'max_depth': 4, 'min_sam... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
27 0.000472 0.000049 0.000247 0.000022 entropy 2 2 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
28 0.000465 0.000039 0.000248 0.000017 entropy 2 4 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
29 0.000451 0.000057 0.000229 0.000030 entropy 2 6 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
30 0.000438 0.000037 0.000226 0.000025 entropy 2 8 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
31 0.000412 0.000060 0.000224 0.000040 entropy 2 10 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
32 0.000480 0.000034 0.000240 0.000025 entropy 2 12 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
33 0.000465 0.000035 0.000246 0.000023 entropy 2 14 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
34 0.000470 0.000032 0.000236 0.000024 entropy 2 16 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
35 0.000443 0.000062 0.000224 0.000025 entropy 2 18 {'criterion': 'entropy', 'max_depth': 2, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
36 0.000450 0.000054 0.000237 0.000034 entropy 3 2 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 1.000000 0.954545 0.929249 0.044445 4
37 0.000466 0.000049 0.000223 0.000029 entropy 3 4 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
38 0.000419 0.000030 0.000211 0.000014 entropy 3 6 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
39 0.000478 0.000057 0.000237 0.000036 entropy 3 8 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
40 0.000463 0.000032 0.000237 0.000017 entropy 3 10 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
41 0.000427 0.000049 0.000225 0.000033 entropy 3 12 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
42 0.000453 0.000041 0.000223 0.000026 entropy 3 14 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
43 0.000422 0.000049 0.000224 0.000032 entropy 3 16 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
44 0.000393 0.000039 0.000203 0.000021 entropy 3 18 {'criterion': 'entropy', 'max_depth': 3, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
45 0.000459 0.000069 0.000225 0.000041 entropy 4 2 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.956522 0.954545 1.000000 0.954545 0.947036 0.042449 3
46 0.000407 0.000035 0.000200 0.000014 entropy 4 4 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.956522 0.954545 0.909091 0.954545 0.928854 0.034616 5
47 0.000489 0.000054 0.000256 0.000019 entropy 4 6 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
48 0.000438 0.000053 0.000226 0.000027 entropy 4 8 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
49 0.000413 0.000072 0.000209 0.000032 entropy 4 10 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
50 0.000400 0.000035 0.000218 0.000026 entropy 4 12 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
51 0.000445 0.000060 0.000241 0.000029 entropy 4 14 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
52 0.000440 0.000031 0.000230 0.000024 entropy 4 16 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
53 0.000470 0.000039 0.000255 0.000019 entropy 4 18 {'criterion': 'entropy', 'max_depth': 4, 'min_... 0.869565 0.913043 0.909091 0.909091 0.954545 0.911067 0.026924 6
df = pd.DataFrame(dt_opt.cv_results_)
df.plot('mean_score_time','mean_test_score')
<AxesSubplot:xlabel='mean_score_time'>
../_images/2020-11-16_7_1.png
%load http://drsmb.co
param_grid = {'kernel':['linear','rbf'], 'C':[.5, 1, 10]}
svm_clf = svm.SVC(kernel='linear')
svm_opt =model_selection.GridSearchCV(svm_clf,param_grid,)
svm_opt.fit(iris_X_train, iris_y_train)
GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [0.5, 1, 10], 'kernel': ['linear', 'rbf']})
df_svm = pd.DataFrame(svm_opt.cv_results_)
df_svm.plot.scatter('mean_score_time','mean_test_score')
<AxesSubplot:xlabel='mean_score_time', ylabel='mean_test_score'>
../_images/2020-11-16_10_1.png
df_svm.plot.scatter('mean_score_time','std_test_score')
<AxesSubplot:xlabel='mean_score_time', ylabel='std_test_score'>
../_images/2020-11-16_11_1.png