26. NN#

Note

Will fill in explanation later

from scipy.special import expit
from sklearn.datasets import make_classification
from sklearn.neural_network import MLPClassifier
from sklearn import svm
import pandas as pd
import numpy as np


from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn import model_selection

from sklearn.model_selection import train_test_split

import seaborn as sns
sns.set_theme(palette='colorblind')
digits = datasets.load_digits()
digits_X = digits.data
digits_y = digits.target
X_train, X_test, y_train, y_test = model_selection.train_test_split(digits_X,digits_y)
digits.images[0]
array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])
mlp = MLPClassifier(
  hidden_layer_sizes=(16),
  max_iter=300,
  solver="lbfgs",
  verbose=10,
  random_state=1,
  learning_rate_init=0.1,
)
mlp.fit(X_train, y_train).score(X_test,y_test)
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =         1210     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.35831D+00    |proj g|=  7.12293D+00

At iterate    1    f=  8.15013D+00    |proj g|=  7.59455D+00

At iterate    2    f=  3.20682D+00    |proj g|=  1.83482D+00

At iterate    3    f=  2.41038D+00    |proj g|=  4.12523D-01

At iterate    4    f=  2.30057D+00    |proj g|=  2.29302D-01

At iterate    5    f=  2.16287D+00    |proj g|=  2.57795D-01

At iterate    6    f=  2.04046D+00    |proj g|=  4.41193D-01

At iterate    7    f=  1.84985D+00    |proj g|=  2.61403D-01

At iterate    8    f=  1.66808D+00    |proj g|=  5.12138D-01

At iterate    9    f=  1.53432D+00    |proj g|=  3.21255D-01

At iterate   10    f=  1.37893D+00    |proj g|=  3.90688D-01

At iterate   11    f=  1.22924D+00    |proj g|=  2.77987D-01

At iterate   12    f=  1.14122D+00    |proj g|=  2.44465D-01

At iterate   13    f=  1.04751D+00    |proj g|=  3.70967D-01

At iterate   14    f=  1.02304D+00    |proj g|=  3.23129D-01

At iterate   15    f=  9.94126D-01    |proj g|=  3.98128D-01

At iterate   16    f=  9.02139D-01    |proj g|=  2.36300D-01

At iterate   17    f=  8.33677D-01    |proj g|=  5.13923D-01

At iterate   18    f=  7.62518D-01    |proj g|=  7.34707D-01

At iterate   19    f=  7.22659D-01    |proj g|=  2.25413D-01

At iterate   20    f=  6.95681D-01    |proj g|=  1.63914D-01

At iterate   21    f=  6.57612D-01    |proj g|=  3.57316D-01

At iterate   22    f=  6.30374D-01    |proj g|=  3.89055D-01

At iterate   23    f=  6.03817D-01    |proj g|=  1.99450D-01

At iterate   24    f=  5.77477D-01    |proj g|=  1.80616D-01

At iterate   25    f=  5.63983D-01    |proj g|=  1.96488D-01

At iterate   26    f=  5.45666D-01    |proj g|=  1.96897D-01

At iterate   27    f=  5.19711D-01    |proj g|=  3.91726D-01

At iterate   28    f=  4.96922D-01    |proj g|=  3.80623D-01

At iterate   29    f=  4.78255D-01    |proj g|=  2.16015D-01

At iterate   30    f=  4.68132D-01    |proj g|=  1.45047D-01

At iterate   31    f=  4.53844D-01    |proj g|=  9.42813D-02

At iterate   32    f=  4.45225D-01    |proj g|=  1.78964D-01

At iterate   33    f=  4.34743D-01    |proj g|=  4.25966D-01

At iterate   34    f=  4.21882D-01    |proj g|=  1.68104D-01

At iterate   35    f=  4.12909D-01    |proj g|=  1.46940D-01

At iterate   36    f=  3.99635D-01    |proj g|=  2.47144D-01

At iterate   37    f=  3.87965D-01    |proj g|=  1.22954D-01

At iterate   38    f=  3.73581D-01    |proj g|=  1.54711D-01

At iterate   39    f=  3.62995D-01    |proj g|=  2.05183D-01

At iterate   40    f=  3.50116D-01    |proj g|=  9.82863D-02

At iterate   41    f=  3.39618D-01    |proj g|=  7.50563D-02

At iterate   42    f=  3.26793D-01    |proj g|=  9.18201D-02

At iterate   43    f=  3.13768D-01    |proj g|=  2.97056D-01

At iterate   44    f=  3.02831D-01    |proj g|=  7.45832D-02

At iterate   45    f=  2.95652D-01    |proj g|=  6.15051D-02

At iterate   46    f=  2.83265D-01    |proj g|=  8.96812D-02

At iterate   47    f=  2.79039D-01    |proj g|=  2.07743D-01

At iterate   48    f=  2.74258D-01    |proj g|=  1.21564D-01

At iterate   49    f=  2.68924D-01    |proj g|=  6.44993D-02

At iterate   50    f=  2.65323D-01    |proj g|=  1.38837D-01

At iterate   51    f=  2.62027D-01    |proj g|=  7.56399D-02

At iterate   52    f=  2.58262D-01    |proj g|=  6.93631D-02

At iterate   53    f=  2.54429D-01    |proj g|=  8.70241D-02

At iterate   54    f=  2.46635D-01    |proj g|=  1.82898D-01

At iterate   55    f=  2.41939D-01    |proj g|=  1.06885D-01

At iterate   56    f=  2.39236D-01    |proj g|=  5.76935D-02

At iterate   57    f=  2.33485D-01    |proj g|=  8.14278D-02

At iterate   58    f=  2.26573D-01    |proj g|=  8.51492D-02

At iterate   59    f=  2.20305D-01    |proj g|=  1.31398D-01

At iterate   60    f=  2.14734D-01    |proj g|=  1.07692D-01

At iterate   61    f=  2.09751D-01    |proj g|=  8.18299D-02

At iterate   62    f=  2.05517D-01    |proj g|=  7.04871D-02

At iterate   63    f=  2.00317D-01    |proj g|=  1.16804D-01

At iterate   64    f=  1.99464D-01    |proj g|=  1.69445D-01

At iterate   65    f=  1.95988D-01    |proj g|=  7.13988D-02

At iterate   66    f=  1.93487D-01    |proj g|=  6.56907D-02

At iterate   67    f=  1.89885D-01    |proj g|=  9.21167D-02

At iterate   68    f=  1.86596D-01    |proj g|=  1.47701D-01

At iterate   69    f=  1.82628D-01    |proj g|=  5.38514D-02

At iterate   70    f=  1.80223D-01    |proj g|=  5.22242D-02

At iterate   71    f=  1.77074D-01    |proj g|=  5.12428D-02

At iterate   72    f=  1.73020D-01    |proj g|=  1.14181D-01

At iterate   73    f=  1.68555D-01    |proj g|=  4.35014D-02

At iterate   74    f=  1.66338D-01    |proj g|=  3.38488D-02

At iterate   75    f=  1.63206D-01    |proj g|=  7.81970D-02

At iterate   76    f=  1.60421D-01    |proj g|=  9.00734D-02

At iterate   77    f=  1.57441D-01    |proj g|=  4.79432D-02

At iterate   78    f=  1.56093D-01    |proj g|=  2.45010D-01

At iterate   79    f=  1.53409D-01    |proj g|=  8.68864D-02

At iterate   80    f=  1.51875D-01    |proj g|=  3.65493D-02

At iterate   81    f=  1.49893D-01    |proj g|=  6.38136D-02

At iterate   82    f=  1.46609D-01    |proj g|=  9.97108D-02

At iterate   83    f=  1.41660D-01    |proj g|=  8.17617D-02

At iterate   84    f=  1.39284D-01    |proj g|=  1.31591D-01

At iterate   85    f=  1.35614D-01    |proj g|=  5.34248D-02

At iterate   86    f=  1.32289D-01    |proj g|=  5.94853D-02

At iterate   87    f=  1.30774D-01    |proj g|=  6.62761D-02

At iterate   88    f=  1.29164D-01    |proj g|=  9.67610D-02

At iterate   89    f=  1.27361D-01    |proj g|=  5.30402D-02

At iterate   90    f=  1.25258D-01    |proj g|=  4.52685D-02

At iterate   91    f=  1.24144D-01    |proj g|=  3.50661D-02

At iterate   92    f=  1.22970D-01    |proj g|=  1.34427D-01

At iterate   93    f=  1.21192D-01    |proj g|=  4.31738D-02

At iterate   94    f=  1.20275D-01    |proj g|=  5.87830D-02

At iterate   95    f=  1.18313D-01    |proj g|=  4.27942D-02

At iterate   96    f=  1.17279D-01    |proj g|=  6.13713D-02

At iterate   97    f=  1.16206D-01    |proj g|=  1.97619D-02

At iterate   98    f=  1.15222D-01    |proj g|=  4.48254D-02

At iterate   99    f=  1.14327D-01    |proj g|=  4.60775D-02

At iterate  100    f=  1.12672D-01    |proj g|=  2.88318D-02

At iterate  101    f=  1.11002D-01    |proj g|=  5.78257D-02

At iterate  102    f=  1.09915D-01    |proj g|=  1.23144D-01

At iterate  103    f=  1.08614D-01    |proj g|=  5.08242D-02

At iterate  104    f=  1.07305D-01    |proj g|=  2.05078D-02

At iterate  105    f=  1.06055D-01    |proj g|=  8.27541D-02

At iterate  106    f=  1.04922D-01    |proj g|=  9.24675D-02

At iterate  107    f=  1.02835D-01    |proj g|=  1.08462D-01

At iterate  108    f=  1.01792D-01    |proj g|=  8.04543D-02

At iterate  109    f=  9.97770D-02    |proj g|=  5.66051D-02

At iterate  110    f=  9.86550D-02    |proj g|=  3.49343D-02

At iterate  111    f=  9.74108D-02    |proj g|=  6.56690D-02
 This problem is unconstrained.
At iterate  112    f=  9.57478D-02    |proj g|=  6.15999D-02

At iterate  113    f=  9.43457D-02    |proj g|=  6.21794D-02

At iterate  114    f=  9.30552D-02    |proj g|=  4.32226D-02

At iterate  115    f=  9.22095D-02    |proj g|=  8.02477D-02

At iterate  116    f=  9.12478D-02    |proj g|=  2.45968D-02

At iterate  117    f=  9.04983D-02    |proj g|=  1.69231D-02

At iterate  118    f=  8.99069D-02    |proj g|=  2.17111D-02

At iterate  119    f=  8.83240D-02    |proj g|=  3.39243D-02

At iterate  120    f=  8.77997D-02    |proj g|=  7.31901D-02

At iterate  121    f=  8.66947D-02    |proj g|=  4.13160D-02

At iterate  122    f=  8.64320D-02    |proj g|=  6.24183D-02

At iterate  123    f=  8.45723D-02    |proj g|=  3.85627D-02

At iterate  124    f=  8.35820D-02    |proj g|=  2.46852D-02

At iterate  125    f=  8.25781D-02    |proj g|=  3.38878D-02

At iterate  126    f=  8.17539D-02    |proj g|=  1.05004D-01

At iterate  127    f=  8.10321D-02    |proj g|=  3.49014D-02

At iterate  128    f=  8.03722D-02    |proj g|=  4.26776D-02

At iterate  129    f=  7.92385D-02    |proj g|=  4.55410D-02

At iterate  130    f=  7.84270D-02    |proj g|=  1.86211D-02

At iterate  131    f=  7.76502D-02    |proj g|=  6.93974D-02

At iterate  132    f=  7.67317D-02    |proj g|=  3.78811D-02

At iterate  133    f=  7.62113D-02    |proj g|=  2.53625D-02

At iterate  134    f=  7.52942D-02    |proj g|=  1.73423D-02

At iterate  135    f=  7.44466D-02    |proj g|=  2.50020D-02

At iterate  136    f=  7.37412D-02    |proj g|=  5.32529D-02

At iterate  137    f=  7.25499D-02    |proj g|=  2.64392D-02

At iterate  138    f=  7.17744D-02    |proj g|=  4.02137D-02

At iterate  139    f=  7.06307D-02    |proj g|=  3.14219D-02

At iterate  140    f=  6.97917D-02    |proj g|=  2.02864D-02

At iterate  141    f=  6.92348D-02    |proj g|=  4.27977D-02

At iterate  142    f=  6.84450D-02    |proj g|=  3.07515D-02

At iterate  143    f=  6.75559D-02    |proj g|=  2.90920D-02

At iterate  144    f=  6.70882D-02    |proj g|=  4.75226D-02

At iterate  145    f=  6.65371D-02    |proj g|=  1.25793D-02

At iterate  146    f=  6.55496D-02    |proj g|=  3.16842D-02

At iterate  147    f=  6.46789D-02    |proj g|=  1.10573D-02

At iterate  148    f=  6.34207D-02    |proj g|=  2.38280D-02

At iterate  149    f=  6.27034D-02    |proj g|=  2.54676D-02

At iterate  150    f=  6.17427D-02    |proj g|=  1.98849D-02

At iterate  151    f=  6.03847D-02    |proj g|=  2.82350D-02

At iterate  152    f=  5.99897D-02    |proj g|=  5.79761D-02

At iterate  153    f=  5.86208D-02    |proj g|=  2.98454D-02

At iterate  154    f=  5.80866D-02    |proj g|=  2.01855D-02

At iterate  155    f=  5.76161D-02    |proj g|=  2.44564D-02

At iterate  156    f=  5.71389D-02    |proj g|=  2.76857D-02

At iterate  157    f=  5.63019D-02    |proj g|=  1.20104D-02

At iterate  158    f=  5.52805D-02    |proj g|=  1.63104D-02

At iterate  159    f=  5.47733D-02    |proj g|=  2.97665D-02

At iterate  160    f=  5.40071D-02    |proj g|=  2.19831D-02

At iterate  161    f=  5.29850D-02    |proj g|=  1.42537D-02

At iterate  162    f=  5.24862D-02    |proj g|=  2.19271D-02

At iterate  163    f=  5.20069D-02    |proj g|=  1.65811D-02

At iterate  164    f=  5.14724D-02    |proj g|=  1.25521D-02

At iterate  165    f=  5.08133D-02    |proj g|=  2.41548D-02

At iterate  166    f=  5.04733D-02    |proj g|=  4.90624D-02

At iterate  167    f=  5.00313D-02    |proj g|=  1.47562D-02

At iterate  168    f=  4.94588D-02    |proj g|=  1.66000D-02

At iterate  169    f=  4.88443D-02    |proj g|=  2.08531D-02

At iterate  170    f=  4.87290D-02    |proj g|=  9.40194D-02

At iterate  171    f=  4.81921D-02    |proj g|=  1.80671D-02

At iterate  172    f=  4.80092D-02    |proj g|=  1.45467D-02

At iterate  173    f=  4.77802D-02    |proj g|=  3.79327D-02

At iterate  174    f=  4.75571D-02    |proj g|=  1.96245D-02

At iterate  175    f=  4.72381D-02    |proj g|=  1.64485D-02

At iterate  176    f=  4.67943D-02    |proj g|=  3.05639D-02

At iterate  177    f=  4.64934D-02    |proj g|=  2.28106D-02

At iterate  178    f=  4.61043D-02    |proj g|=  2.00784D-02

At iterate  179    f=  4.58149D-02    |proj g|=  2.85614D-02

At iterate  180    f=  4.55344D-02    |proj g|=  3.75414D-02

At iterate  181    f=  4.51028D-02    |proj g|=  2.76256D-02

At iterate  182    f=  4.45617D-02    |proj g|=  2.67532D-02

At iterate  183    f=  4.38315D-02    |proj g|=  1.94959D-02

At iterate  184    f=  4.34150D-02    |proj g|=  3.07730D-02

At iterate  185    f=  4.29582D-02    |proj g|=  4.28945D-02

At iterate  186    f=  4.27103D-02    |proj g|=  1.84679D-02

At iterate  187    f=  4.23981D-02    |proj g|=  1.30490D-02

At iterate  188    f=  4.19125D-02    |proj g|=  2.83346D-02

At iterate  189    f=  4.14923D-02    |proj g|=  6.55316D-02

At iterate  190    f=  4.10461D-02    |proj g|=  1.11180D-02

At iterate  191    f=  4.07231D-02    |proj g|=  1.74527D-02

At iterate  192    f=  4.03680D-02    |proj g|=  4.20997D-02

At iterate  193    f=  4.00223D-02    |proj g|=  1.32781D-02

At iterate  194    f=  3.97272D-02    |proj g|=  9.83931D-03

At iterate  195    f=  3.94303D-02    |proj g|=  3.07980D-02

At iterate  196    f=  3.92323D-02    |proj g|=  1.15312D-02

At iterate  197    f=  3.90881D-02    |proj g|=  9.07594D-03

At iterate  198    f=  3.87756D-02    |proj g|=  9.97909D-03

At iterate  199    f=  3.84267D-02    |proj g|=  1.81131D-02

At iterate  200    f=  3.81506D-02    |proj g|=  1.75355D-02

At iterate  201    f=  3.80006D-02    |proj g|=  2.05284D-02

At iterate  202    f=  3.78014D-02    |proj g|=  1.60391D-02

At iterate  203    f=  3.74180D-02    |proj g|=  2.27207D-02

At iterate  204    f=  3.71439D-02    |proj g|=  2.70844D-02

At iterate  205    f=  3.69627D-02    |proj g|=  2.25535D-02

At iterate  206    f=  3.68144D-02    |proj g|=  2.16604D-02

At iterate  207    f=  3.64461D-02    |proj g|=  1.52186D-02

At iterate  208    f=  3.63070D-02    |proj g|=  2.92725D-02

At iterate  209    f=  3.60881D-02    |proj g|=  9.68479D-03

At iterate  210    f=  3.59395D-02    |proj g|=  2.31011D-02

At iterate  211    f=  3.57236D-02    |proj g|=  1.71459D-02

At iterate  212    f=  3.54213D-02    |proj g|=  1.28479D-02

At iterate  213    f=  3.50640D-02    |proj g|=  1.77736D-02

At iterate  214    f=  3.46045D-02    |proj g|=  2.15982D-02

At iterate  215    f=  3.42928D-02    |proj g|=  3.47302D-02

At iterate  216    f=  3.41972D-02    |proj g|=  6.54365D-02

At iterate  217    f=  3.39900D-02    |proj g|=  1.56775D-02

At iterate  218    f=  3.39101D-02    |proj g|=  6.13517D-03

At iterate  219    f=  3.37147D-02    |proj g|=  1.29595D-02

At iterate  220    f=  3.34977D-02    |proj g|=  1.52622D-02

At iterate  221    f=  3.31736D-02    |proj g|=  2.08028D-02

At iterate  222    f=  3.27932D-02    |proj g|=  2.49874D-02

At iterate  223    f=  3.23837D-02    |proj g|=  1.82605D-02

At iterate  224    f=  3.19443D-02    |proj g|=  6.98864D-03

At iterate  225    f=  3.17975D-02    |proj g|=  1.06969D-02

At iterate  226    f=  3.16672D-02    |proj g|=  6.85478D-03

At iterate  227    f=  3.15292D-02    |proj g|=  1.03325D-02

At iterate  228    f=  3.12905D-02    |proj g|=  1.41523D-02

At iterate  229    f=  3.09728D-02    |proj g|=  2.33888D-02

At iterate  230    f=  3.08731D-02    |proj g|=  8.59869D-02

At iterate  231    f=  3.04807D-02    |proj g|=  1.93564D-02

At iterate  232    f=  3.03003D-02    |proj g|=  7.05291D-03

At iterate  233    f=  3.01126D-02    |proj g|=  1.24852D-02

At iterate  234    f=  2.99175D-02    |proj g|=  2.34076D-02
At iterate  235    f=  2.96675D-02    |proj g|=  1.19884D-02

At iterate  236    f=  2.94127D-02    |proj g|=  1.27925D-02

At iterate  237    f=  2.91545D-02    |proj g|=  1.61861D-02

At iterate  238    f=  2.88381D-02    |proj g|=  4.67661D-02

At iterate  239    f=  2.83894D-02    |proj g|=  1.93794D-02

At iterate  240    f=  2.81017D-02    |proj g|=  1.14703D-02

At iterate  241    f=  2.78396D-02    |proj g|=  1.57111D-02

At iterate  242    f=  2.75865D-02    |proj g|=  2.75115D-02

At iterate  243    f=  2.74016D-02    |proj g|=  8.12202D-03

At iterate  244    f=  2.73938D-02    |proj g|=  1.71218D-02

At iterate  245    f=  2.72719D-02    |proj g|=  1.80550D-02

At iterate  246    f=  2.70325D-02    |proj g|=  1.60546D-02

At iterate  247    f=  2.69033D-02    |proj g|=  1.08572D-02

At iterate  248    f=  2.67047D-02    |proj g|=  1.28841D-02

At iterate  249    f=  2.63543D-02    |proj g|=  1.47745D-02

At iterate  250    f=  2.60738D-02    |proj g|=  2.03653D-02

At iterate  251    f=  2.58620D-02    |proj g|=  1.17686D-02

At iterate  252    f=  2.56969D-02    |proj g|=  7.10876D-03

At iterate  253    f=  2.54206D-02    |proj g|=  4.75033D-02

At iterate  254    f=  2.52962D-02    |proj g|=  2.00599D-02

At iterate  255    f=  2.52142D-02    |proj g|=  1.16215D-02

At iterate  256    f=  2.50360D-02    |proj g|=  2.00516D-02

At iterate  257    f=  2.48222D-02    |proj g|=  2.31768D-02

At iterate  258    f=  2.45375D-02    |proj g|=  1.89803D-02

At iterate  259    f=  2.42703D-02    |proj g|=  1.46606D-02

At iterate  260    f=  2.40366D-02    |proj g|=  1.04127D-02

At iterate  261    f=  2.37156D-02    |proj g|=  1.73880D-02

At iterate  262    f=  2.34200D-02    |proj g|=  1.47072D-02

At iterate  263    f=  2.34053D-02    |proj g|=  4.29482D-02

At iterate  264    f=  2.30057D-02    |proj g|=  2.12672D-02

At iterate  265    f=  2.26558D-02    |proj g|=  4.98735D-02

At iterate  266    f=  2.23187D-02    |proj g|=  2.29467D-02

At iterate  267    f=  2.20852D-02    |proj g|=  1.60464D-02

At iterate  268    f=  2.19519D-02    |proj g|=  6.98730D-03

At iterate  269    f=  2.18305D-02    |proj g|=  1.05114D-02

At iterate  270    f=  2.17444D-02    |proj g|=  1.66336D-02

At iterate  271    f=  2.16670D-02    |proj g|=  5.23074D-03

At iterate  272    f=  2.15453D-02    |proj g|=  1.16448D-02

At iterate  273    f=  2.14350D-02    |proj g|=  1.14494D-02

At iterate  274    f=  2.11745D-02    |proj g|=  7.24822D-03

At iterate  275    f=  2.10700D-02    |proj g|=  1.64575D-02

At iterate  276    f=  2.09172D-02    |proj g|=  4.24551D-03

At iterate  277    f=  2.08258D-02    |proj g|=  3.98174D-03

At iterate  278    f=  2.06930D-02    |proj g|=  9.39219D-03

At iterate  279    f=  2.06379D-02    |proj g|=  7.26194D-03

At iterate  280    f=  2.05530D-02    |proj g|=  4.74145D-03

At iterate  281    f=  2.05115D-02    |proj g|=  5.63932D-03

At iterate  282    f=  2.04980D-02    |proj g|=  2.16040D-02

At iterate  283    f=  2.03582D-02    |proj g|=  1.00960D-02

At iterate  284    f=  2.03102D-02    |proj g|=  5.67222D-03

At iterate  285    f=  2.02340D-02    |proj g|=  1.04374D-02

At iterate  286    f=  2.02145D-02    |proj g|=  3.95484D-02

At iterate  287    f=  2.01488D-02    |proj g|=  1.98335D-02

At iterate  288    f=  2.01220D-02    |proj g|=  1.76772D-02

At iterate  289    f=  2.01075D-02    |proj g|=  9.77515D-03

At iterate  290    f=  1.95805D-02    |proj g|=  5.39720D-02

At iterate  291    f=  1.95286D-02    |proj g|=  5.47522D-02

At iterate  292    f=  1.90592D-02    |proj g|=  2.28322D-02

At iterate  293    f=  1.89455D-02    |proj g|=  2.70978D-02

At iterate  294    f=  1.85725D-02    |proj g|=  2.18053D-02

At iterate  295    f=  1.83387D-02    |proj g|=  1.89447D-02

At iterate  296    f=  1.82259D-02    |proj g|=  2.24565D-02

At iterate  297    f=  1.79753D-02    |proj g|=  1.67408D-02

At iterate  298    f=  1.76353D-02    |proj g|=  1.35714D-02

At iterate  299    f=  1.74957D-02    |proj g|=  1.52985D-02

At iterate  300    f=  1.71813D-02    |proj g|=  1.45763D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
 1210    300    327      1     0     0   1.458D-02   1.718D-02
  F =   1.7181262323955667E-002

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/sklearn/neural_network/_multilayer_perceptron.py:546: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
0.9111111111111111
svm_clf = svm.SVC(gamma=0.001)
svm_clf.fit(X_train, y_train)
svm_clf.score(X_test,y_test)
0.9911111111111112
svm_clf.support_vectors_.shape
(660, 64)
np.prod(list(svm_clf.support_vectors_.shape))
42240
np.sum([np.prod(list(c.shape)) for c in mlp.coefs_])
1184
[list(c.shape) for c in mlp.coefs_]
[[64, 16], [16, 10]]
X, y = make_classification(n_samples=100, random_state=1,n_features=2,n_redundant=0)
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, stratify=y,
             random_state=1)
sns.scatterplot(x=X[:,0],y=X[:,1],hue=y)
<Axes: >
../_images/6f34c0f504b5a332e2b40fa6ae94c47626c56731e62b2569378404225de3e737.png
clf = MLPClassifier(
 hidden_layer_sizes=(1), # 1 hidden layer, 1 aritficial neuron
 max_iter=100, # maximum 100 interations in optimization
 alpha=1e-4, # regularization
 solver="lbfgs", #optimization algorithm  
 verbose=10, # how much detail to print
 activation= 'identity' # how to transform the hidden layer beofore passing it to the next layer
)
clf.fit(X_train, y_train)

clf.score(X_test, y_test)
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  2.72755D+00    |proj g|=  1.41805D+00

At iterate    1    f=  1.31673D+00    |proj g|=  6.92389D-01

At iterate    2    f=  7.54946D-01    |proj g|=  1.81684D-01

At iterate    3    f=  6.86828D-01    |proj g|=  8.66096D-02

At iterate    4    f=  3.98036D-01    |proj g|=  4.15445D-01

At iterate    5    f=  1.26842D-01    |proj g|=  2.05662D-01

At iterate    6    f=  5.23046D-02    |proj g|=  5.29974D-02

At iterate    7    f=  5.00689D-02    |proj g|=  6.65182D-02

At iterate    8    f=  4.79825D-02    |proj g|=  7.02581D-03

At iterate    9    f=  4.79268D-02    |proj g|=  1.21452D-03

At iterate   10    f=  4.79254D-02    |proj g|=  8.12433D-04

At iterate   11    f=  4.79233D-02    |proj g|=  4.93493D-04

At iterate   12    f=  4.79171D-02    |proj g|=  4.72924D-04

At iterate   13    f=  4.79120D-02    |proj g|=  6.78848D-04

At iterate   14    f=  4.79097D-02    |proj g|=  1.83022D-04

At iterate   15    f=  4.79096D-02    |proj g|=  4.06348D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     15     18      1     0     0   4.063D-05   4.791D-02
  F =   4.7909609226466032E-002

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
 This problem is unconstrained.
1.0
clf.activation
'identity'
clf.out_activation_
'logistic'
x_logistic = np.linspace(-10,10,100)
y_logistic = expit(x_logistic)
plt.plot(x_logistic,y_logistic)
[<matplotlib.lines.Line2D at 0x7eff32086ca0>]
../_images/2ed38b0e877e76ddccd806ea0f74b68fd59651ca5609044257c5deeaddc618ab.png
clf.coefs_
[array([[-2.40416157],
        [ 0.0691716 ]]),
 array([[-5.46678735]])]
clf.intercepts_
[array([-0.39615732]), array([3.53086327])]
pt = np.array([-1,2])
np.matmul(pt,clf.coefs_[0]) + clf.intercepts_[0]
array([2.14634745])
expit((np.matmul(pt,clf.coefs_[0]) + clf.intercepts_[0])*clf.coefs_[1] + clf.intercepts_[1])
array([[0.00027382]])
clf.predict_proba([pt])
array([[9.99726179e-01, 2.73821076e-04]])
def aritificial_neuron_template(activation,weights,bias,inputs):
    '''
    simple artificial neuron

    Parameters
    ----------
    activation : function
    activation function of the neuron
    weights : numpy aray
    wights for summing inputs one per input
    bias: numpy array
    bias term added to the weighted sum
    inputs : numpy array
    input to the neuron, must be same size as weights

    '''
    return activation(np.matmul(inputs,weights) +bias)

# two common activation functions
identity_activation = lambda x: x
logistic_activation = lambda x: expit(x)
hidden_neuron = lambda x: aritificial_neuron_template(identity_activation,clf.coefs_[0],
                                                      clf.intercepts_[0],x)
output_neuron = lambda h: aritificial_neuron_template(expit,clf.coefs_[1],clf.intercepts_[1],h)

output_neuron(hidden_neuron(pt))
array([0.00027382])
X, y = make_classification(n_samples=200, random_state=1,n_features=4,n_redundant=0,n_informative=4)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
                          random_state=5)
pt_4d =np.asarray([[-1,-2,2,-1],[1.5,0,.5,1]])
clf_4d = MLPClassifier(
  hidden_layer_sizes=(1),
  max_iter=5000,
  alpha=1e-4,
  solver="lbfgs",
  verbose=10,
  activation= 'identity'
)

clf_4d.fit(X_train, y_train)
clf_4d.score(X_test, y_test)
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            7     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  7.06082D-01    |proj g|=  2.13807D-01

At iterate    1    f=  4.78890D-01    |proj g|=  9.78564D-02

At iterate    2    f=  4.42781D-01    |proj g|=  3.88770D-02

At iterate    3    f=  4.37379D-01    |proj g|=  2.13806D-02

At iterate    4    f=  4.34824D-01    |proj g|=  9.17808D-03

At iterate    5    f=  4.34743D-01    |proj g|=  6.72591D-03

At iterate    6    f=  4.34657D-01    |proj g|=  7.36787D-04

At iterate    7    f=  4.34656D-01    |proj g|=  1.19546D-04
 This problem is unconstrained.
0.84
At iterate    8    f=  4.34656D-01    |proj g|=  2.25059D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    7      8     10      1     0     0   2.251D-05   4.347D-01
  F =  0.43465595346183000     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
hidden_neuron_4d = lambda x: aritificial_neuron_template(identity_activation,
                             clf_4d.coefs_[0],clf_4d.intercepts_[0],x)
output_neuron_4d = lambda x: aritificial_neuron_template(logistic_activation,
                             clf_4d.coefs_[1],clf_4d.intercepts_[1],x)


output_neuron_4d(hidden_neuron_4d(pt_4d))
array([[0.95357467],
       [0.85332897]])
clf_4d.predict_proba(pt_4d)
array([[0.04642533, 0.95357467],
       [0.14667103, 0.85332897]])
df = pd.DataFrame(X,columns=['x0','x1','x2','x3'])
df['y'] = y
sns.pairplot(df,hue='y')
<seaborn.axisgrid.PairGrid at 0x7eff31dfe4f0>
../_images/ec1aefc1a6921883d0564bd9de6c8912b660d60b74a4c4ce5679852e93c30cb3.png