# different learning rate schedules and momentum parametersparams = [{'solver':'sgd','learning_rate':'constant','momentum':0,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'constant','momentum':.9,'nesterovs_momentum':False,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'constant','momentum':.9,'nesterovs_momentum':True,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':0,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':.9,'nesterovs_momentum':True,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':.9,'nesterovs_momentum':False,'learning_rate_init':0.2},{'solver':'adam','learning_rate_init':0.01}]labels = ["constant learning-rate","constant with momentum","constant with Nesterov's momentum","inv-scaling learning-rate","inv-scaling with momentum","inv-scaling with Nesterov's momentum","adam"]plot_args = [{'c':'red','linestyle':'-'},{'c':'green','linestyle':'-'},{'c':'blue','linestyle':'-'},{'c':'red','linestyle':'--'},{'c':'green','linestyle':'--'},{'c':'blue','linestyle':'--'},{'c':'black','linestyle':'-'}]
(三)畫出loss curves
defplot_on_dataset(X,y,ax,name):# for each dataset, plot learning for each learning strategyprint("\nlearning on dataset %s"% name) ax.set_title(name) X =MinMaxScaler().fit_transform(X) mlps = []if name =="digits":# digits is larger but converges fairly quickly max_iter =15else: max_iter =400for label, param inzip(labels, params):print("training: %s"% label) mlp =MLPClassifier(verbose=0, random_state=0, max_iter=max_iter, **param) mlp.fit(X, y) mlps.append(mlp)print("Training set score: %f"% mlp.score(X, y))print("Training set loss: %f"% mlp.loss_)for mlp, label, args inzip(mlps, labels, plot_args): ax.plot(mlp.loss_curve_, label=label, **args)fig, axes = plt.subplots(2, 2, figsize=(15, 10))# load / generate some toy datasetsiris = datasets.load_iris()digits = datasets.load_digits()data_sets = [(iris.data, iris.target), (digits.data, digits.target), datasets.make_circles(noise=0.2, factor=0.5, random_state=1), datasets.make_moons(noise=0.3, random_state=0)]for ax, data, name inzip(axes.ravel(), data_sets, ['iris', 'digits','circles', 'moons']):plot_on_dataset(*data, ax=ax, name=name)fig.legend(ax.get_lines(), labels=labels, ncol=3, loc="upper center")plt.show()
圖3:四種資料對於不同學習方法的loss curves下降比較圖
(四)完整程式碼
print(__doc__)import matplotlib.pyplot as pltfrom sklearn.neural_network import MLPClassifierfrom sklearn.preprocessing import MinMaxScalerfrom sklearn import datasets# different learning rate schedules and momentum parametersparams = [{'solver':'sgd','learning_rate':'constant','momentum':0,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'constant','momentum':.9,'nesterovs_momentum':False,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'constant','momentum':.9,'nesterovs_momentum':True,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':0,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':.9,'nesterovs_momentum':True,'learning_rate_init':0.2},{'solver':'sgd','learning_rate':'invscaling','momentum':.9,'nesterovs_momentum':False,'learning_rate_init':0.2},{'solver':'adam','learning_rate_init':0.01}]labels = ["constant learning-rate","constant with momentum","constant with Nesterov's momentum","inv-scaling learning-rate","inv-scaling with momentum","inv-scaling with Nesterov's momentum","adam"]plot_args = [{'c':'red','linestyle':'-'},{'c':'green','linestyle':'-'},{'c':'blue','linestyle':'-'},{'c':'red','linestyle':'--'},{'c':'green','linestyle':'--'},{'c':'blue','linestyle':'--'},{'c':'black','linestyle':'-'}]defplot_on_dataset(X,y,ax,name):# for each dataset, plot learning for each learning strategyprint("\nlearning on dataset %s"% name) ax.set_title(name) X =MinMaxScaler().fit_transform(X) mlps = []if name =="digits":# digits is larger but converges fairly quickly max_iter =15else: max_iter =400for label, param inzip(labels, params):print("training: %s"% label) mlp =MLPClassifier(verbose=0, random_state=0, max_iter=max_iter, **param) mlp.fit(X, y) mlps.append(mlp)print("Training set score: %f"% mlp.score(X, y))print("Training set loss: %f"% mlp.loss_)for mlp, label, args inzip(mlps, labels, plot_args): ax.plot(mlp.loss_curve_, label=label, **args)fig, axes = plt.subplots(2, 2, figsize=(15, 10))# load / generate some toy datasetsiris = datasets.load_iris()digits = datasets.load_digits()data_sets = [(iris.data, iris.target), (digits.data, digits.target), datasets.make_circles(noise=0.2, factor=0.5, random_state=1), datasets.make_moons(noise=0.3, random_state=0)]for ax, data, name inzip(axes.ravel(), data_sets, ['iris', 'digits','circles', 'moons']):plot_on_dataset(*data, ax=ax, name=name)fig.legend(ax.get_lines(), labels=labels, ncol=3, loc="upper center")plt.show()