MAX_TIME = 60
INIT_SIZE = 10
DEVICE = "cpu" # "cuda:0"11 Hyperparameter Tuning: PyTorch With fashionMNIST Data Using Hold-out Data Sets
import pickle
import socket
from datetime import datetime
from dateutil.tz import tzlocal
start_time = datetime.now(tzlocal())
HOSTNAME = socket.gethostname().split(".")[0]
experiment_name = '11-torch' + "_" + HOSTNAME + "_" + str(MAX_TIME) + "min_" + str(INIT_SIZE) + "init_" + str(start_time).split(".", 1)[0].replace(' ', '_')
experiment_name = experiment_name.replace(':', '-')
experiment_nameThis notebook exemplifies hyperparameter tuning with SPOT (spotPython). The hyperparameter software SPOT was developed in R (statistical programming language), see Open Access book “Hyperparameter Tuning for Machine and Deep Learning with R - A Practical Guide”, available here: https://link.springer.com/book/10.1007/978-981-19-5170-1.
pip list | grep "spot[RiverPython]"# import sys
# !{sys.executable} -m pip install --upgrade build
# !{sys.executable} -m pip install --upgrade --force-reinstall spotPythonfrom tabulate import tabulate
import copy
import warnings
import numbers
import json
import calendar
import math
import datetime as dt
import numpy as np
from math import inf
import pandas as pd
from scipy.optimize import differential_evolution
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from functools import partial
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from spotPython.spot import spot
from spotPython.hyperparameters.values import (
add_core_model_to_fun_control,
assign_values,
convert_keys,
get_bound_values,
get_default_hyperparameters_for_core_model,
get_default_values,
get_dict_with_levels_and_types,
get_values_from_dict,
get_var_name,
get_var_type,
iterate_dict_values,
modify_hyper_parameter_levels,
modify_hyper_parameter_bounds,
replace_levels_with_positions,
return_conf_list_from_var_dict,
get_one_core_model_from_X,
transform_hyper_parameter_values,
get_dict_with_levels_and_types,
convert_keys,
iterate_dict_values,
)
from spotPython.torch.traintest import evaluate_cv, evaluate_hold_out
from spotPython.utils.convert import class_for_name
from spotPython.utils.eda import (
get_stars,
gen_design_table)
from spotPython.utils.transform import transform_hyper_parameter_values
from spotPython.utils.convert import get_Xy_from_df
from spotPython.utils.init import fun_control_init
from spotPython.plot.validation import plot_cv_predictions, plot_roc, plot_confusion_matrix
from spotPython.data.torch_hyper_dict import TorchHyperDict
from spotPython.fun.hypertorch import HyperTorch
warnings.filterwarnings("ignore")
# Neural Net specific imports:
from spotPython.torch.netfashionMNIST import Net_fashionMNISTprint(torch.__version__)
# Check that MPS is available
if not torch.backends.mps.is_available():
if not torch.backends.mps.is_built():
print("MPS not available because the current PyTorch install was not "
"built with MPS enabled.")
else:
print("MPS not available because the current MacOS version is not 12.3+ "
"and/or you do not have an MPS-enabled device on this machine.")
else:
mps_device = torch.device("mps")
print("MPS device: ", mps_device)11.1 Step 1: Initialization of the Empty fun_control Dictionary
fun_control = fun_control_init(task="classification",
tensorboard_path="runs/10_spot_hpt_sklearn_classification")11.2 Step 2: Load fashionMNIST Data
def load_data(data_dir="./data"):
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root=data_dir,
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root=data_dir,
train=False,
download=True,
transform=ToTensor(),
)
return training_data, test_datatrain, test = load_data()
train.data.shape, test.data.shapen_samples = len(train)
# add the dataset to the fun_control
fun_control.update({"data": None,
"train": train,
"test": test,
"n_samples": n_samples,
"target_column": None})11.3 Step 3: Specification of the Preprocessing Model
# categorical_columns = []
# one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
# prep_model = ColumnTransformer(
# transformers=[
# ("categorical", one_hot_encoder, categorical_columns),
# ],
# remainder=StandardScaler(),
# )
prep_model = None
fun_control.update({"prep_model": prep_model})11.4 Step 4: Select algorithm and core_model_hyper_dict
spotPython implements a class which is similar to the class described in the PyTorch tutorial. The class is called Net_fashionMNIST and is implemented in the file netcifar10.py. The class is imported here.
Note: In addition to the class Net from the PyTorch tutorial, the class Net_CIFAR10 has additional attributes, namely:
- learning rate (
lr), - batchsize (
batch_size), - epochs (
epochs), and - k_folds (
k_folds).
Further attributes can be easily added to the class, e.g., optimizer or loss_function.
core_model = Net_fashionMNIST
fun_control = add_core_model_to_fun_control(core_model=core_model,
fun_control=fun_control,
hyper_dict=TorchHyperDict,
filename=None)11.5 Step 5: Modify hyper_dict Hyperparameters for the Selected Algorithm aka core_model
11.5.1 Modify hyperparameter of type factor
# fun_control = modify_hyper_parameter_levels(fun_control, "leaf_model", ["LinearRegression"])
# fun_control["core_model_hyper_dict"]11.5.2 Modify hyperparameter of type numeric and integer (boolean)
# fun_control = modify_hyper_parameter_bounds(fun_control, "delta", bounds=[1e-10, 1e-6])
# fun_control = modify_hyper_parameter_bounds(fun_control, "min_samples_split", bounds=[3, 20])
#fun_control = modify_hyper_parameter_bounds(fun_control, "merit_preprune", bounds=[0, 0])
# fun_control["core_model_hyper_dict"]
fun_control = modify_hyper_parameter_bounds(fun_control, "k_folds", bounds=[0, 0])
fun_control = modify_hyper_parameter_bounds(fun_control, "patience", bounds=[2, 2])11.6 Step 6: Selection of the Objective (Loss) Function
from torch.nn import CrossEntropyLoss
loss_function = CrossEntropyLoss()
fun_control.update({"loss_function": loss_function})In addition to the loss functions, spotPython provides access to a large number of metrics.
- The key
"metric_sklearn"is used for metrics that follow thescikit-learnconventions. - The key
"river_metric"is used for the river based evaluation (Montiel et al. 2021) viaeval_oml_iter_progressive, and - the key
"metric_torch"is used for the metrics fromTorchMetrics.
TorchMetrics is a collection of more than 90 PyTorch metrics1.
Because the PyTorch tutorial uses the accuracy as metric, we use the same metric here. Currently, accuracy is computed in the tutorial’s example code. We will use TorchMetrics instead, because it offers more flexibilty, e.g., it can be used for regression and classification. Furthermore, TorchMetrics offers the following advantages:
- A standardized interface to increase reproducibility
- Reduces Boilerplate
- Distributed-training compatible
- Rigorously tested
- Automatic accumulation over batches
- Automatic synchronization between multiple devices
Therefore, we set
import torchmetrics
metric_torch = torchmetrics.Accuracy(task="multiclass", num_classes=10)spotPython performs minimization by default. If accuracy should be maximized, then the objective function has to be multiplied by -1. Therefore, weights is set to -1 in this case.
fun = HyperTorch(seed=123, log_level=50).fun_torch
loss_function = CrossEntropyLoss()
weights = 1.0
metric_torch = torchmetrics.Accuracy(task="multiclass", num_classes=10)
shuffle = True
eval = "train_hold_out"
device = DEVICE
show_batch_interval = 100_000
path="torch_model.pt"
fun_control.update({
"data_dir": None,
"checkpoint_dir": None,
"horizon": None,
"oml_grace_period": None,
"weights": weights,
"step": None,
"log_level": 50,
"weight_coeff": None,
"metric_torch": metric_torch,
"metric_river": None,
"metric_sklearn": None,
"loss_function": loss_function,
"shuffle": shuffle,
"eval": eval,
"device": device,
"show_batch_interval": show_batch_interval,
"path": path,
})fun_control11.7 Step 7: Calling the SPOT Function
11.7.1 Prepare the SPOT Parameters
Get types and variable names as well as lower and upper bounds for the hyperparameters.
var_type = get_var_type(fun_control)
var_name = get_var_name(fun_control)
fun_control.update({"var_type": var_type,
"var_name": var_name})
lower = get_bound_values(fun_control, "lower")
upper = get_bound_values(fun_control, "upper")print(gen_design_table(fun_control))11.7.2 Run the Spot Optimizer
- Run SPOT for approx. x mins (
max_time). - Note: the run takes longer, because the evaluation time of initial design (here:
initi_size, 20 points) is not considered.
from spotPython.hyperparameters.values import get_default_hyperparameters_as_array
hyper_dict=TorchHyperDict().load()
X_start = get_default_hyperparameters_as_array(fun_control, hyper_dict)
X_startspot_tuner = spot.Spot(fun=fun,
lower = lower,
upper = upper,
fun_evals = inf,
fun_repeats = 1,
max_time = MAX_TIME,
noise = False,
tolerance_x = np.sqrt(np.spacing(1)),
var_type = var_type,
var_name = var_name,
infill_criterion = "y",
n_points = 1,
seed=123,
log_level = 50,
show_models= False,
show_progress= True,
fun_control = fun_control,
design_control={"init_size": INIT_SIZE,
"repeats": 1},
surrogate_control={"noise": True,
"cod_type": "norm",
"min_theta": -4,
"max_theta": 3,
"n_theta": len(var_name),
"model_optimizer": differential_evolution,
"model_fun_evals": 10_000,
"log_level": 50
})
spot_tuner.run(X_start=X_start)11.7.3 Results
SAVE = False
LOAD = False
if SAVE:
result_file_name = "res_" + experiment_name + ".pkl"
with open(result_file_name, 'wb') as f:
pickle.dump(spot_tuner, f)
if LOAD:
result_file_name = "res_ch10-friedman-hpt-0_maans03_60min_20init_1K_2023-04-14_10-11-19.pkl"
with open(result_file_name, 'rb') as f:
spot_tuner = pickle.load(f)- Show the Progress of the hyperparameter tuning:
spot_tuner.yspot_tuner.plot_progress(log_y=False, filename="../Figures.d/" + experiment_name+"_progress.pdf")Print the results
print(gen_design_table(fun_control=fun_control, spot=spot_tuner))11.8 Show variable importance
spot_tuner.plot_importance(threshold=0.025, filename="../Figures.d/" + experiment_name+"_importance.pdf")11.9 Get SPOT Results
The architecture of the spotPython model can be obtained by the following code:
from spotPython.hyperparameters.values import get_one_core_model_from_X
X = spot_tuner.to_all_dim(spot_tuner.min_X.reshape(1,-1))
model_spot = get_one_core_model_from_X(X, fun_control)
model_spot11.10 Get Default Hyperparameters
from spotPython.hyperparameters.values import get_one_core_model_from_X
fc = fun_control
fc.update({"core_model_hyper_dict":
hyper_dict[fun_control["core_model"].__name__]})
model_default = get_one_core_model_from_X(X_start, fun_control=fc)
model_default11.11 Evaluation of the Default and the Tuned Architectures
The method train_tuned takes a model architecture without trained weights and trains this model with the train data. The train data is split into train and validation data. The validation data is used for early stopping. The trained model weights are saved as a dictionary.
from spotPython.torch.traintest import (
train_tuned,
test_tuned,
)
train_tuned(net=model_default, train_dataset=train, shuffle=True,
loss_function=fun_control["loss_function"],
metric=fun_control["metric_torch"],
device = DEVICE, show_batch_interval=1_000_000,
path=None,
task=fun_control["task"],)
test_tuned(net=model_default, test_dataset=test,
loss_function=fun_control["loss_function"],
metric=fun_control["metric_torch"],
shuffle=False,
device = DEVICE,
task=fun_control["task"],)The following code trains the model model_spot. If path is set to a filename, e.g., path = "model_spot_trained.pt", the weights of the trained model will be saved to this file.
train_tuned(net=model_spot, train_dataset=train,
loss_function=fun_control["loss_function"],
metric=fun_control["metric_torch"],
shuffle=True,
device = DEVICE,
path=None,
task=fun_control["task"],)
#| echo: true
test_tuned(net=model_spot, test_dataset=test,
shuffle=False,
loss_function=fun_control["loss_function"],
metric=fun_control["metric_torch"],
device = DEVICE,
task=fun_control["task"],)11.12 Detailed Hyperparameter Plots
filename = "./figures/" + experiment_name
spot_tuner.plot_important_hyperparameter_contour(filename=filename)11.13 Parallel Coordinates Plot
spot_tuner.parallel_plot()11.14 Plot all Combinations of Hyperparameters
- Warning: this may take a while.
PLOT_ALL = False
if PLOT_ALL:
n = spot_tuner.k
for i in range(n-1):
for j in range(i+1, n):
spot_tuner.plot_contour(i=i, j=j, min_z=min_z, max_z = max_z)