26. NN#
Note
Will fill in explanation later
from scipy.special import expit
from sklearn.datasets import make_classification
from sklearn.neural_network import MLPClassifier
from sklearn import svm
import pandas as pd
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set_theme(palette='colorblind')
digits = datasets.load_digits()
digits_X = digits.data
digits_y = digits.target
X_train, X_test, y_train, y_test = model_selection.train_test_split(digits_X,digits_y)
digits.images[0]
array([[ 0., 0., 5., 13., 9., 1., 0., 0.],
[ 0., 0., 13., 15., 10., 15., 5., 0.],
[ 0., 3., 15., 2., 0., 11., 8., 0.],
[ 0., 4., 12., 0., 0., 8., 8., 0.],
[ 0., 5., 8., 0., 0., 9., 8., 0.],
[ 0., 4., 11., 0., 1., 12., 7., 0.],
[ 0., 2., 14., 5., 10., 12., 0., 0.],
[ 0., 0., 6., 13., 10., 0., 0., 0.]])
mlp = MLPClassifier(
hidden_layer_sizes=(16),
max_iter=300,
solver="lbfgs",
verbose=10,
random_state=1,
learning_rate_init=0.1,
)
mlp.fit(X_train, y_train).score(X_test,y_test)
RUNNING THE L-BFGS-B CODE
* * *
Machine precision = 2.220D-16
N = 1210 M = 10
At X0 0 variables are exactly at the bounds
At iterate 0 f= 9.35831D+00 |proj g|= 7.12293D+00
At iterate 1 f= 8.15013D+00 |proj g|= 7.59455D+00
At iterate 2 f= 3.20682D+00 |proj g|= 1.83482D+00
At iterate 3 f= 2.41038D+00 |proj g|= 4.12523D-01
At iterate 4 f= 2.30057D+00 |proj g|= 2.29302D-01
At iterate 5 f= 2.16287D+00 |proj g|= 2.57795D-01
At iterate 6 f= 2.04046D+00 |proj g|= 4.41193D-01
At iterate 7 f= 1.84985D+00 |proj g|= 2.61403D-01
At iterate 8 f= 1.66808D+00 |proj g|= 5.12138D-01
At iterate 9 f= 1.53432D+00 |proj g|= 3.21255D-01
At iterate 10 f= 1.37893D+00 |proj g|= 3.90688D-01
At iterate 11 f= 1.22924D+00 |proj g|= 2.77987D-01
At iterate 12 f= 1.14122D+00 |proj g|= 2.44465D-01
At iterate 13 f= 1.04751D+00 |proj g|= 3.70967D-01
At iterate 14 f= 1.02304D+00 |proj g|= 3.23129D-01
At iterate 15 f= 9.94126D-01 |proj g|= 3.98128D-01
At iterate 16 f= 9.02139D-01 |proj g|= 2.36300D-01
At iterate 17 f= 8.33677D-01 |proj g|= 5.13923D-01
At iterate 18 f= 7.62518D-01 |proj g|= 7.34707D-01
At iterate 19 f= 7.22659D-01 |proj g|= 2.25413D-01
At iterate 20 f= 6.95681D-01 |proj g|= 1.63914D-01
At iterate 21 f= 6.57612D-01 |proj g|= 3.57316D-01
At iterate 22 f= 6.30374D-01 |proj g|= 3.89055D-01
At iterate 23 f= 6.03817D-01 |proj g|= 1.99450D-01
At iterate 24 f= 5.77477D-01 |proj g|= 1.80616D-01
At iterate 25 f= 5.63983D-01 |proj g|= 1.96488D-01
At iterate 26 f= 5.45666D-01 |proj g|= 1.96897D-01
At iterate 27 f= 5.19711D-01 |proj g|= 3.91726D-01
At iterate 28 f= 4.96922D-01 |proj g|= 3.80623D-01
At iterate 29 f= 4.78255D-01 |proj g|= 2.16015D-01
At iterate 30 f= 4.68132D-01 |proj g|= 1.45047D-01
At iterate 31 f= 4.53844D-01 |proj g|= 9.42813D-02
At iterate 32 f= 4.45225D-01 |proj g|= 1.78964D-01
At iterate 33 f= 4.34743D-01 |proj g|= 4.25966D-01
At iterate 34 f= 4.21882D-01 |proj g|= 1.68104D-01
At iterate 35 f= 4.12909D-01 |proj g|= 1.46940D-01
At iterate 36 f= 3.99635D-01 |proj g|= 2.47144D-01
At iterate 37 f= 3.87965D-01 |proj g|= 1.22954D-01
At iterate 38 f= 3.73581D-01 |proj g|= 1.54711D-01
At iterate 39 f= 3.62995D-01 |proj g|= 2.05183D-01
At iterate 40 f= 3.50116D-01 |proj g|= 9.82863D-02
At iterate 41 f= 3.39618D-01 |proj g|= 7.50563D-02
At iterate 42 f= 3.26793D-01 |proj g|= 9.18201D-02
At iterate 43 f= 3.13768D-01 |proj g|= 2.97056D-01
At iterate 44 f= 3.02831D-01 |proj g|= 7.45832D-02
At iterate 45 f= 2.95652D-01 |proj g|= 6.15051D-02
At iterate 46 f= 2.83265D-01 |proj g|= 8.96812D-02
At iterate 47 f= 2.79039D-01 |proj g|= 2.07743D-01
At iterate 48 f= 2.74258D-01 |proj g|= 1.21564D-01
At iterate 49 f= 2.68924D-01 |proj g|= 6.44993D-02
At iterate 50 f= 2.65323D-01 |proj g|= 1.38837D-01
At iterate 51 f= 2.62027D-01 |proj g|= 7.56399D-02
At iterate 52 f= 2.58262D-01 |proj g|= 6.93631D-02
At iterate 53 f= 2.54429D-01 |proj g|= 8.70241D-02
At iterate 54 f= 2.46635D-01 |proj g|= 1.82898D-01
At iterate 55 f= 2.41939D-01 |proj g|= 1.06885D-01
At iterate 56 f= 2.39236D-01 |proj g|= 5.76935D-02
At iterate 57 f= 2.33485D-01 |proj g|= 8.14278D-02
At iterate 58 f= 2.26573D-01 |proj g|= 8.51492D-02
At iterate 59 f= 2.20305D-01 |proj g|= 1.31398D-01
At iterate 60 f= 2.14734D-01 |proj g|= 1.07692D-01
At iterate 61 f= 2.09751D-01 |proj g|= 8.18299D-02
At iterate 62 f= 2.05517D-01 |proj g|= 7.04871D-02
At iterate 63 f= 2.00317D-01 |proj g|= 1.16804D-01
At iterate 64 f= 1.99464D-01 |proj g|= 1.69445D-01
At iterate 65 f= 1.95988D-01 |proj g|= 7.13988D-02
At iterate 66 f= 1.93487D-01 |proj g|= 6.56907D-02
At iterate 67 f= 1.89885D-01 |proj g|= 9.21167D-02
At iterate 68 f= 1.86596D-01 |proj g|= 1.47701D-01
At iterate 69 f= 1.82628D-01 |proj g|= 5.38514D-02
At iterate 70 f= 1.80223D-01 |proj g|= 5.22242D-02
At iterate 71 f= 1.77074D-01 |proj g|= 5.12428D-02
At iterate 72 f= 1.73020D-01 |proj g|= 1.14181D-01
At iterate 73 f= 1.68555D-01 |proj g|= 4.35014D-02
At iterate 74 f= 1.66338D-01 |proj g|= 3.38488D-02
At iterate 75 f= 1.63206D-01 |proj g|= 7.81970D-02
At iterate 76 f= 1.60421D-01 |proj g|= 9.00734D-02
At iterate 77 f= 1.57441D-01 |proj g|= 4.79432D-02
At iterate 78 f= 1.56093D-01 |proj g|= 2.45010D-01
At iterate 79 f= 1.53409D-01 |proj g|= 8.68864D-02
At iterate 80 f= 1.51875D-01 |proj g|= 3.65493D-02
At iterate 81 f= 1.49893D-01 |proj g|= 6.38136D-02
At iterate 82 f= 1.46609D-01 |proj g|= 9.97108D-02
At iterate 83 f= 1.41660D-01 |proj g|= 8.17617D-02
At iterate 84 f= 1.39284D-01 |proj g|= 1.31591D-01
At iterate 85 f= 1.35614D-01 |proj g|= 5.34248D-02
At iterate 86 f= 1.32289D-01 |proj g|= 5.94853D-02
At iterate 87 f= 1.30774D-01 |proj g|= 6.62761D-02
At iterate 88 f= 1.29164D-01 |proj g|= 9.67610D-02
At iterate 89 f= 1.27361D-01 |proj g|= 5.30402D-02
At iterate 90 f= 1.25258D-01 |proj g|= 4.52685D-02
At iterate 91 f= 1.24144D-01 |proj g|= 3.50661D-02
At iterate 92 f= 1.22970D-01 |proj g|= 1.34427D-01
At iterate 93 f= 1.21192D-01 |proj g|= 4.31738D-02
At iterate 94 f= 1.20275D-01 |proj g|= 5.87830D-02
At iterate 95 f= 1.18313D-01 |proj g|= 4.27942D-02
At iterate 96 f= 1.17279D-01 |proj g|= 6.13713D-02
At iterate 97 f= 1.16206D-01 |proj g|= 1.97619D-02
At iterate 98 f= 1.15222D-01 |proj g|= 4.48254D-02
At iterate 99 f= 1.14327D-01 |proj g|= 4.60775D-02
At iterate 100 f= 1.12672D-01 |proj g|= 2.88318D-02
At iterate 101 f= 1.11002D-01 |proj g|= 5.78257D-02
At iterate 102 f= 1.09915D-01 |proj g|= 1.23144D-01
At iterate 103 f= 1.08614D-01 |proj g|= 5.08242D-02
At iterate 104 f= 1.07305D-01 |proj g|= 2.05078D-02
At iterate 105 f= 1.06055D-01 |proj g|= 8.27541D-02
At iterate 106 f= 1.04922D-01 |proj g|= 9.24675D-02
At iterate 107 f= 1.02835D-01 |proj g|= 1.08462D-01
At iterate 108 f= 1.01792D-01 |proj g|= 8.04543D-02
At iterate 109 f= 9.97770D-02 |proj g|= 5.66051D-02
At iterate 110 f= 9.86550D-02 |proj g|= 3.49343D-02
At iterate 111 f= 9.74108D-02 |proj g|= 6.56690D-02
This problem is unconstrained.
At iterate 112 f= 9.57478D-02 |proj g|= 6.15999D-02
At iterate 113 f= 9.43457D-02 |proj g|= 6.21794D-02
At iterate 114 f= 9.30552D-02 |proj g|= 4.32226D-02
At iterate 115 f= 9.22095D-02 |proj g|= 8.02477D-02
At iterate 116 f= 9.12478D-02 |proj g|= 2.45968D-02
At iterate 117 f= 9.04983D-02 |proj g|= 1.69231D-02
At iterate 118 f= 8.99069D-02 |proj g|= 2.17111D-02
At iterate 119 f= 8.83240D-02 |proj g|= 3.39243D-02
At iterate 120 f= 8.77997D-02 |proj g|= 7.31901D-02
At iterate 121 f= 8.66947D-02 |proj g|= 4.13160D-02
At iterate 122 f= 8.64320D-02 |proj g|= 6.24183D-02
At iterate 123 f= 8.45723D-02 |proj g|= 3.85627D-02
At iterate 124 f= 8.35820D-02 |proj g|= 2.46852D-02
At iterate 125 f= 8.25781D-02 |proj g|= 3.38878D-02
At iterate 126 f= 8.17539D-02 |proj g|= 1.05004D-01
At iterate 127 f= 8.10321D-02 |proj g|= 3.49014D-02
At iterate 128 f= 8.03722D-02 |proj g|= 4.26776D-02
At iterate 129 f= 7.92385D-02 |proj g|= 4.55410D-02
At iterate 130 f= 7.84270D-02 |proj g|= 1.86211D-02
At iterate 131 f= 7.76502D-02 |proj g|= 6.93974D-02
At iterate 132 f= 7.67317D-02 |proj g|= 3.78811D-02
At iterate 133 f= 7.62113D-02 |proj g|= 2.53625D-02
At iterate 134 f= 7.52942D-02 |proj g|= 1.73423D-02
At iterate 135 f= 7.44466D-02 |proj g|= 2.50020D-02
At iterate 136 f= 7.37412D-02 |proj g|= 5.32529D-02
At iterate 137 f= 7.25499D-02 |proj g|= 2.64392D-02
At iterate 138 f= 7.17744D-02 |proj g|= 4.02137D-02
At iterate 139 f= 7.06307D-02 |proj g|= 3.14219D-02
At iterate 140 f= 6.97917D-02 |proj g|= 2.02864D-02
At iterate 141 f= 6.92348D-02 |proj g|= 4.27977D-02
At iterate 142 f= 6.84450D-02 |proj g|= 3.07515D-02
At iterate 143 f= 6.75559D-02 |proj g|= 2.90920D-02
At iterate 144 f= 6.70882D-02 |proj g|= 4.75226D-02
At iterate 145 f= 6.65371D-02 |proj g|= 1.25793D-02
At iterate 146 f= 6.55496D-02 |proj g|= 3.16842D-02
At iterate 147 f= 6.46789D-02 |proj g|= 1.10573D-02
At iterate 148 f= 6.34207D-02 |proj g|= 2.38280D-02
At iterate 149 f= 6.27034D-02 |proj g|= 2.54676D-02
At iterate 150 f= 6.17427D-02 |proj g|= 1.98849D-02
At iterate 151 f= 6.03847D-02 |proj g|= 2.82350D-02
At iterate 152 f= 5.99897D-02 |proj g|= 5.79761D-02
At iterate 153 f= 5.86208D-02 |proj g|= 2.98454D-02
At iterate 154 f= 5.80866D-02 |proj g|= 2.01855D-02
At iterate 155 f= 5.76161D-02 |proj g|= 2.44564D-02
At iterate 156 f= 5.71389D-02 |proj g|= 2.76857D-02
At iterate 157 f= 5.63019D-02 |proj g|= 1.20104D-02
At iterate 158 f= 5.52805D-02 |proj g|= 1.63104D-02
At iterate 159 f= 5.47733D-02 |proj g|= 2.97665D-02
At iterate 160 f= 5.40071D-02 |proj g|= 2.19831D-02
At iterate 161 f= 5.29850D-02 |proj g|= 1.42537D-02
At iterate 162 f= 5.24862D-02 |proj g|= 2.19271D-02
At iterate 163 f= 5.20069D-02 |proj g|= 1.65811D-02
At iterate 164 f= 5.14724D-02 |proj g|= 1.25521D-02
At iterate 165 f= 5.08133D-02 |proj g|= 2.41548D-02
At iterate 166 f= 5.04733D-02 |proj g|= 4.90624D-02
At iterate 167 f= 5.00313D-02 |proj g|= 1.47562D-02
At iterate 168 f= 4.94588D-02 |proj g|= 1.66000D-02
At iterate 169 f= 4.88443D-02 |proj g|= 2.08531D-02
At iterate 170 f= 4.87290D-02 |proj g|= 9.40194D-02
At iterate 171 f= 4.81921D-02 |proj g|= 1.80671D-02
At iterate 172 f= 4.80092D-02 |proj g|= 1.45467D-02
At iterate 173 f= 4.77802D-02 |proj g|= 3.79327D-02
At iterate 174 f= 4.75571D-02 |proj g|= 1.96245D-02
At iterate 175 f= 4.72381D-02 |proj g|= 1.64485D-02
At iterate 176 f= 4.67943D-02 |proj g|= 3.05639D-02
At iterate 177 f= 4.64934D-02 |proj g|= 2.28106D-02
At iterate 178 f= 4.61043D-02 |proj g|= 2.00784D-02
At iterate 179 f= 4.58149D-02 |proj g|= 2.85614D-02
At iterate 180 f= 4.55344D-02 |proj g|= 3.75414D-02
At iterate 181 f= 4.51028D-02 |proj g|= 2.76256D-02
At iterate 182 f= 4.45617D-02 |proj g|= 2.67532D-02
At iterate 183 f= 4.38315D-02 |proj g|= 1.94959D-02
At iterate 184 f= 4.34150D-02 |proj g|= 3.07730D-02
At iterate 185 f= 4.29582D-02 |proj g|= 4.28945D-02
At iterate 186 f= 4.27103D-02 |proj g|= 1.84679D-02
At iterate 187 f= 4.23981D-02 |proj g|= 1.30490D-02
At iterate 188 f= 4.19125D-02 |proj g|= 2.83346D-02
At iterate 189 f= 4.14923D-02 |proj g|= 6.55316D-02
At iterate 190 f= 4.10461D-02 |proj g|= 1.11180D-02
At iterate 191 f= 4.07231D-02 |proj g|= 1.74527D-02
At iterate 192 f= 4.03680D-02 |proj g|= 4.20997D-02
At iterate 193 f= 4.00223D-02 |proj g|= 1.32781D-02
At iterate 194 f= 3.97272D-02 |proj g|= 9.83931D-03
At iterate 195 f= 3.94303D-02 |proj g|= 3.07980D-02
At iterate 196 f= 3.92323D-02 |proj g|= 1.15312D-02
At iterate 197 f= 3.90881D-02 |proj g|= 9.07594D-03
At iterate 198 f= 3.87756D-02 |proj g|= 9.97909D-03
At iterate 199 f= 3.84267D-02 |proj g|= 1.81131D-02
At iterate 200 f= 3.81506D-02 |proj g|= 1.75355D-02
At iterate 201 f= 3.80006D-02 |proj g|= 2.05284D-02
At iterate 202 f= 3.78014D-02 |proj g|= 1.60391D-02
At iterate 203 f= 3.74180D-02 |proj g|= 2.27207D-02
At iterate 204 f= 3.71439D-02 |proj g|= 2.70844D-02
At iterate 205 f= 3.69627D-02 |proj g|= 2.25535D-02
At iterate 206 f= 3.68144D-02 |proj g|= 2.16604D-02
At iterate 207 f= 3.64461D-02 |proj g|= 1.52186D-02
At iterate 208 f= 3.63070D-02 |proj g|= 2.92725D-02
At iterate 209 f= 3.60881D-02 |proj g|= 9.68479D-03
At iterate 210 f= 3.59395D-02 |proj g|= 2.31011D-02
At iterate 211 f= 3.57236D-02 |proj g|= 1.71459D-02
At iterate 212 f= 3.54213D-02 |proj g|= 1.28479D-02
At iterate 213 f= 3.50640D-02 |proj g|= 1.77736D-02
At iterate 214 f= 3.46045D-02 |proj g|= 2.15982D-02
At iterate 215 f= 3.42928D-02 |proj g|= 3.47302D-02
At iterate 216 f= 3.41972D-02 |proj g|= 6.54365D-02
At iterate 217 f= 3.39900D-02 |proj g|= 1.56775D-02
At iterate 218 f= 3.39101D-02 |proj g|= 6.13517D-03
At iterate 219 f= 3.37147D-02 |proj g|= 1.29595D-02
At iterate 220 f= 3.34977D-02 |proj g|= 1.52622D-02
At iterate 221 f= 3.31736D-02 |proj g|= 2.08028D-02
At iterate 222 f= 3.27932D-02 |proj g|= 2.49874D-02
At iterate 223 f= 3.23837D-02 |proj g|= 1.82605D-02
At iterate 224 f= 3.19443D-02 |proj g|= 6.98864D-03
At iterate 225 f= 3.17975D-02 |proj g|= 1.06969D-02
At iterate 226 f= 3.16672D-02 |proj g|= 6.85478D-03
At iterate 227 f= 3.15292D-02 |proj g|= 1.03325D-02
At iterate 228 f= 3.12905D-02 |proj g|= 1.41523D-02
At iterate 229 f= 3.09728D-02 |proj g|= 2.33888D-02
At iterate 230 f= 3.08731D-02 |proj g|= 8.59869D-02
At iterate 231 f= 3.04807D-02 |proj g|= 1.93564D-02
At iterate 232 f= 3.03003D-02 |proj g|= 7.05291D-03
At iterate 233 f= 3.01126D-02 |proj g|= 1.24852D-02
At iterate 234 f= 2.99175D-02 |proj g|= 2.34076D-02
At iterate 235 f= 2.96675D-02 |proj g|= 1.19884D-02
At iterate 236 f= 2.94127D-02 |proj g|= 1.27925D-02
At iterate 237 f= 2.91545D-02 |proj g|= 1.61861D-02
At iterate 238 f= 2.88381D-02 |proj g|= 4.67661D-02
At iterate 239 f= 2.83894D-02 |proj g|= 1.93794D-02
At iterate 240 f= 2.81017D-02 |proj g|= 1.14703D-02
At iterate 241 f= 2.78396D-02 |proj g|= 1.57111D-02
At iterate 242 f= 2.75865D-02 |proj g|= 2.75115D-02
At iterate 243 f= 2.74016D-02 |proj g|= 8.12202D-03
At iterate 244 f= 2.73938D-02 |proj g|= 1.71218D-02
At iterate 245 f= 2.72719D-02 |proj g|= 1.80550D-02
At iterate 246 f= 2.70325D-02 |proj g|= 1.60546D-02
At iterate 247 f= 2.69033D-02 |proj g|= 1.08572D-02
At iterate 248 f= 2.67047D-02 |proj g|= 1.28841D-02
At iterate 249 f= 2.63543D-02 |proj g|= 1.47745D-02
At iterate 250 f= 2.60738D-02 |proj g|= 2.03653D-02
At iterate 251 f= 2.58620D-02 |proj g|= 1.17686D-02
At iterate 252 f= 2.56969D-02 |proj g|= 7.10876D-03
At iterate 253 f= 2.54206D-02 |proj g|= 4.75033D-02
At iterate 254 f= 2.52962D-02 |proj g|= 2.00599D-02
At iterate 255 f= 2.52142D-02 |proj g|= 1.16215D-02
At iterate 256 f= 2.50360D-02 |proj g|= 2.00516D-02
At iterate 257 f= 2.48222D-02 |proj g|= 2.31768D-02
At iterate 258 f= 2.45375D-02 |proj g|= 1.89803D-02
At iterate 259 f= 2.42703D-02 |proj g|= 1.46606D-02
At iterate 260 f= 2.40366D-02 |proj g|= 1.04127D-02
At iterate 261 f= 2.37156D-02 |proj g|= 1.73880D-02
At iterate 262 f= 2.34200D-02 |proj g|= 1.47072D-02
At iterate 263 f= 2.34053D-02 |proj g|= 4.29482D-02
At iterate 264 f= 2.30057D-02 |proj g|= 2.12672D-02
At iterate 265 f= 2.26558D-02 |proj g|= 4.98735D-02
At iterate 266 f= 2.23187D-02 |proj g|= 2.29467D-02
At iterate 267 f= 2.20852D-02 |proj g|= 1.60464D-02
At iterate 268 f= 2.19519D-02 |proj g|= 6.98730D-03
At iterate 269 f= 2.18305D-02 |proj g|= 1.05114D-02
At iterate 270 f= 2.17444D-02 |proj g|= 1.66336D-02
At iterate 271 f= 2.16670D-02 |proj g|= 5.23074D-03
At iterate 272 f= 2.15453D-02 |proj g|= 1.16448D-02
At iterate 273 f= 2.14350D-02 |proj g|= 1.14494D-02
At iterate 274 f= 2.11745D-02 |proj g|= 7.24822D-03
At iterate 275 f= 2.10700D-02 |proj g|= 1.64575D-02
At iterate 276 f= 2.09172D-02 |proj g|= 4.24551D-03
At iterate 277 f= 2.08258D-02 |proj g|= 3.98174D-03
At iterate 278 f= 2.06930D-02 |proj g|= 9.39219D-03
At iterate 279 f= 2.06379D-02 |proj g|= 7.26194D-03
At iterate 280 f= 2.05530D-02 |proj g|= 4.74145D-03
At iterate 281 f= 2.05115D-02 |proj g|= 5.63932D-03
At iterate 282 f= 2.04980D-02 |proj g|= 2.16040D-02
At iterate 283 f= 2.03582D-02 |proj g|= 1.00960D-02
At iterate 284 f= 2.03102D-02 |proj g|= 5.67222D-03
At iterate 285 f= 2.02340D-02 |proj g|= 1.04374D-02
At iterate 286 f= 2.02145D-02 |proj g|= 3.95484D-02
At iterate 287 f= 2.01488D-02 |proj g|= 1.98335D-02
At iterate 288 f= 2.01220D-02 |proj g|= 1.76772D-02
At iterate 289 f= 2.01075D-02 |proj g|= 9.77515D-03
At iterate 290 f= 1.95805D-02 |proj g|= 5.39720D-02
At iterate 291 f= 1.95286D-02 |proj g|= 5.47522D-02
At iterate 292 f= 1.90592D-02 |proj g|= 2.28322D-02
At iterate 293 f= 1.89455D-02 |proj g|= 2.70978D-02
At iterate 294 f= 1.85725D-02 |proj g|= 2.18053D-02
At iterate 295 f= 1.83387D-02 |proj g|= 1.89447D-02
At iterate 296 f= 1.82259D-02 |proj g|= 2.24565D-02
At iterate 297 f= 1.79753D-02 |proj g|= 1.67408D-02
At iterate 298 f= 1.76353D-02 |proj g|= 1.35714D-02
At iterate 299 f= 1.74957D-02 |proj g|= 1.52985D-02
At iterate 300 f= 1.71813D-02 |proj g|= 1.45763D-02
* * *
Tit = total number of iterations
Tnf = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip = number of BFGS updates skipped
Nact = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F = final function value
* * *
N Tit Tnf Tnint Skip Nact Projg F
1210 300 327 1 0 0 1.458D-02 1.718D-02
F = 1.7181262323955667E-002
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/sklearn/neural_network/_multilayer_perceptron.py:546: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
0.9111111111111111
svm_clf = svm.SVC(gamma=0.001)
svm_clf.fit(X_train, y_train)
svm_clf.score(X_test,y_test)
0.9911111111111112
svm_clf.support_vectors_.shape
(660, 64)
np.prod(list(svm_clf.support_vectors_.shape))
42240
np.sum([np.prod(list(c.shape)) for c in mlp.coefs_])
1184
[list(c.shape) for c in mlp.coefs_]
[[64, 16], [16, 10]]
X, y = make_classification(n_samples=100, random_state=1,n_features=2,n_redundant=0)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, stratify=y,
random_state=1)
sns.scatterplot(x=X[:,0],y=X[:,1],hue=y)
<Axes: >

clf = MLPClassifier(
hidden_layer_sizes=(1), # 1 hidden layer, 1 aritficial neuron
max_iter=100, # maximum 100 interations in optimization
alpha=1e-4, # regularization
solver="lbfgs", #optimization algorithm
verbose=10, # how much detail to print
activation= 'identity' # how to transform the hidden layer beofore passing it to the next layer
)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
RUNNING THE L-BFGS-B CODE
* * *
Machine precision = 2.220D-16
N = 5 M = 10
At X0 0 variables are exactly at the bounds
At iterate 0 f= 2.72755D+00 |proj g|= 1.41805D+00
At iterate 1 f= 1.31673D+00 |proj g|= 6.92389D-01
At iterate 2 f= 7.54946D-01 |proj g|= 1.81684D-01
At iterate 3 f= 6.86828D-01 |proj g|= 8.66096D-02
At iterate 4 f= 3.98036D-01 |proj g|= 4.15445D-01
At iterate 5 f= 1.26842D-01 |proj g|= 2.05662D-01
At iterate 6 f= 5.23046D-02 |proj g|= 5.29974D-02
At iterate 7 f= 5.00689D-02 |proj g|= 6.65182D-02
At iterate 8 f= 4.79825D-02 |proj g|= 7.02581D-03
At iterate 9 f= 4.79268D-02 |proj g|= 1.21452D-03
At iterate 10 f= 4.79254D-02 |proj g|= 8.12433D-04
At iterate 11 f= 4.79233D-02 |proj g|= 4.93493D-04
At iterate 12 f= 4.79171D-02 |proj g|= 4.72924D-04
At iterate 13 f= 4.79120D-02 |proj g|= 6.78848D-04
At iterate 14 f= 4.79097D-02 |proj g|= 1.83022D-04
At iterate 15 f= 4.79096D-02 |proj g|= 4.06348D-05
* * *
Tit = total number of iterations
Tnf = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip = number of BFGS updates skipped
Nact = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F = final function value
* * *
N Tit Tnf Tnint Skip Nact Projg F
5 15 18 1 0 0 4.063D-05 4.791D-02
F = 4.7909609226466032E-002
CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
This problem is unconstrained.
1.0
clf.activation
'identity'
clf.out_activation_
'logistic'
x_logistic = np.linspace(-10,10,100)
y_logistic = expit(x_logistic)
plt.plot(x_logistic,y_logistic)
[<matplotlib.lines.Line2D at 0x7eff32086ca0>]

clf.coefs_
[array([[-2.40416157],
[ 0.0691716 ]]),
array([[-5.46678735]])]
clf.intercepts_
[array([-0.39615732]), array([3.53086327])]
pt = np.array([-1,2])
np.matmul(pt,clf.coefs_[0]) + clf.intercepts_[0]
array([2.14634745])
expit((np.matmul(pt,clf.coefs_[0]) + clf.intercepts_[0])*clf.coefs_[1] + clf.intercepts_[1])
array([[0.00027382]])
clf.predict_proba([pt])
array([[9.99726179e-01, 2.73821076e-04]])
def aritificial_neuron_template(activation,weights,bias,inputs):
'''
simple artificial neuron
Parameters
----------
activation : function
activation function of the neuron
weights : numpy aray
wights for summing inputs one per input
bias: numpy array
bias term added to the weighted sum
inputs : numpy array
input to the neuron, must be same size as weights
'''
return activation(np.matmul(inputs,weights) +bias)
# two common activation functions
identity_activation = lambda x: x
logistic_activation = lambda x: expit(x)
hidden_neuron = lambda x: aritificial_neuron_template(identity_activation,clf.coefs_[0],
clf.intercepts_[0],x)
output_neuron = lambda h: aritificial_neuron_template(expit,clf.coefs_[1],clf.intercepts_[1],h)
output_neuron(hidden_neuron(pt))
array([0.00027382])
X, y = make_classification(n_samples=200, random_state=1,n_features=4,n_redundant=0,n_informative=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
random_state=5)
pt_4d =np.asarray([[-1,-2,2,-1],[1.5,0,.5,1]])
clf_4d = MLPClassifier(
hidden_layer_sizes=(1),
max_iter=5000,
alpha=1e-4,
solver="lbfgs",
verbose=10,
activation= 'identity'
)
clf_4d.fit(X_train, y_train)
clf_4d.score(X_test, y_test)
RUNNING THE L-BFGS-B CODE
* * *
Machine precision = 2.220D-16
N = 7 M = 10
At X0 0 variables are exactly at the bounds
At iterate 0 f= 7.06082D-01 |proj g|= 2.13807D-01
At iterate 1 f= 4.78890D-01 |proj g|= 9.78564D-02
At iterate 2 f= 4.42781D-01 |proj g|= 3.88770D-02
At iterate 3 f= 4.37379D-01 |proj g|= 2.13806D-02
At iterate 4 f= 4.34824D-01 |proj g|= 9.17808D-03
At iterate 5 f= 4.34743D-01 |proj g|= 6.72591D-03
At iterate 6 f= 4.34657D-01 |proj g|= 7.36787D-04
At iterate 7 f= 4.34656D-01 |proj g|= 1.19546D-04
This problem is unconstrained.
0.84
At iterate 8 f= 4.34656D-01 |proj g|= 2.25059D-05
* * *
Tit = total number of iterations
Tnf = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip = number of BFGS updates skipped
Nact = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F = final function value
* * *
N Tit Tnf Tnint Skip Nact Projg F
7 8 10 1 0 0 2.251D-05 4.347D-01
F = 0.43465595346183000
CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
hidden_neuron_4d = lambda x: aritificial_neuron_template(identity_activation,
clf_4d.coefs_[0],clf_4d.intercepts_[0],x)
output_neuron_4d = lambda x: aritificial_neuron_template(logistic_activation,
clf_4d.coefs_[1],clf_4d.intercepts_[1],x)
output_neuron_4d(hidden_neuron_4d(pt_4d))
array([[0.95357467],
[0.85332897]])
clf_4d.predict_proba(pt_4d)
array([[0.04642533, 0.95357467],
[0.14667103, 0.85332897]])
df = pd.DataFrame(X,columns=['x0','x1','x2','x3'])
df['y'] = y
sns.pairplot(df,hue='y')
<seaborn.axisgrid.PairGrid at 0x7eff31dfe4f0>
