# Import the packages
# Utilities
import os
import logging
# For visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# For modelling
import tensorflow as tf
from tensorflow import feature_column as fc
from tensorflow.keras import layers, models
from tensorflow import keras
from sklearn.model_selection import train_test_split
# Set TF logger to only print errors (dismiss warnings)
"tensorflow").setLevel(logging.ERROR) logging.getLogger(
Download and prepare the dataset
Kaggle competition: Tabular Playground Series - Aug 2021 -https://www.kaggle.com/c/tabular-playground-series-aug-2021
Dataset: The dataset is used for this competition is synthetic, but based on a real dataset and generated using a CTGAN. The original dataset deals with calculating the loss associated with a loan defaults. Although the features are anonymized, they have properties relating to real-world features.
Target: loss column
Notebook The code of this notebook is inspired by the lab Intro to Keras Tuner from Robert Crowe used on Course Machine Learning Modeling Pipelines in Production by DeepLearning.AI
Loading the dataset
import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
for dirname, _, filenames in os.walk('./input'):
for filename in filenames:
print(os.path.join(dirname, filename))
if not os.path.isdir("/tmp/data"):
"/tmp/data") os.makedirs(
import pandas as pd
from sklearn.model_selection import train_test_split
# Read the data
#X = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv', index_col='id')
#X_test_full = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv', index_col='id')
= pd.read_csv('./input/train.csv', index_col='id')
Xx #X_test = pd.read_csv('./input/test.csv', index_col='id')
# Remove rows with missing target, separate target from predictors
= Xx.copy(deep=True)
X =0, subset=['loss'], inplace=True)
X.dropna(axis#Y = X.loss
#X.drop(['loss'], axis=1, inplace=True)
# Describe columns
='all').transpose() X.describe(include
from sklearn.preprocessing import StandardScaler
= StandardScaler()
scaler = scaler.fit_transform(X)
X_scaled #X_test_index = X_test.index
#X_test_scaled = scaler.transform(X_test)
#X_train, X_valid, Y_train, Y_valid = train_test_split(X_scaled, Y, train_size=0.8, test_size = 0.2,stratify=Y, random_state = 123)
= train_test_split(X, test_size=0.2, random_state = 123) train, valid
print(train.shape)
print(valid.shape)
"/tmp/data/tabular-train.csv", index=False)
pd.DataFrame(train).to_csv("/tmp/data/tabular-valid.csv", index=False)
pd.DataFrame(valid).to_csv(#pd.DataFrame(X_test_scaled).to_csv("/tmp/data/tabular-test.csv")
!ls -l /tmp/data/*.csv
'/tmp/data/tabular-train.csv').columns pd.read_csv(
'/tmp/data/tabular-valid.csv').pop('loss').columns pd.read_csv(
Create input pipeline
# Specify which column is the target
= 'loss'
LABEL_COLUMN
# Specify numerical columns
# Note you should create another list with STRING_COLS if you
# had text data but in this case all features are numerical
= ['f0', 'f1',
NUMERIC_COLS 'f2', 'f3',
'f4', 'f5', 'f6']
# A function to separate features and labels
def features_and_labels(row_data):
= row_data.pop(LABEL_COLUMN)
label return row_data, label
# A utility method to create a tf.data dataset from a CSV file
def load_dataset(pattern, batch_size=1, mode='eval'):
= tf.data.experimental.make_csv_dataset(pattern, batch_size)
dataset
= dataset.map(features_and_labels) # features, label
dataset if mode == 'train':
# Notice the repeat method is used so this dataset will loop infinitely
= dataset.shuffle(1000).repeat()
dataset # take advantage of multi-threading; 1=AUTOTUNE
= dataset.prefetch(1)
dataset return dataset
Building DNN Model
def build_dnn_model():
# input layer
= {
inputs =colname, shape=(), dtype='float32')
colname: layers.Input(namefor colname in NUMERIC_COLS
}
# feature_columns
= {
feature_columns
colname: fc.numeric_column(colname)for colname in NUMERIC_COLS
}
# Constructor for DenseFeatures takes a list of numeric columns
# and the resulting tensor takes a dictionary of Input layers
= layers.DenseFeatures(feature_columns.values())(inputs)
dnn_inputs
# two hidden layers of 32 and 8 units, respectively
= layers.Dense(32, activation='relu', name='h1')(dnn_inputs)
h1 = layers.Dense(8, activation='relu', name='h2')(h1)
h2
# final output is a linear activation because this is a regression problem
= layers.Dense(1, activation='linear', name='fare')(h2)
output
# Create model with inputs and output
= models.Model(inputs, output)
model
# compile model (Mean Squared Error is suitable for regression)
compile(optimizer='adam',
model.='mse',
loss=[
metrics='rmse'),
tf.keras.metrics.RootMeanSquaredError(name'mse'
])
return model
# Save compiled model into a variable
= build_dnn_model()
model
# Plot the layer architecture and relationship between input features
#tf.keras.utils.plot_model(model, 'dnn_model.png', show_shapes=False, rankdir='LR')
Training the model
= 20
NUM_EPOCHS = 32
TRAIN_BATCH_SIZE = len(pd.read_csv('/tmp/data/tabular-train.csv'))
NUM_TRAIN_EXAMPLES = len(pd.read_csv('/tmp/data/tabular-valid.csv'))
NUM_EVAL_EXAMPLES
print(f"training split has {NUM_TRAIN_EXAMPLES} examples\n")
print(f"evaluation split has {NUM_EVAL_EXAMPLES} examples\n")
# Training dataset
= load_dataset('/tmp/data/tabular-train*', TRAIN_BATCH_SIZE, 'train')
trainds
# Evaluation dataset
= load_dataset('/tmp/data/tabular-valid*', 1000, 'eval').take(NUM_EVAL_EXAMPLES//1000)
evalds
# Needs to be specified since the dataset is infinite
# This happens because the repeat method was used when creating the dataset
= NUM_TRAIN_EXAMPLES // TRAIN_BATCH_SIZE
steps_per_epoch
# Train the model and save the history
= model.fit(trainds,
history =evalds,
validation_data=NUM_EPOCHS
epochs=steps_per_epoch) ,steps_per_epoch
print(type(trainds))
# Callback funtion
= 5
DESIRED_MAE class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('mean_absolute_error') < DESIRED_MAE):
print("\nReached {}% MAE so cancelling training!".format(DESIRED_MAE))
self.model.stop_training = True
# Number of training epochs.
= 5 NUM_EPOCHS
#inputs = keras.Input(shape=())
= keras.Input(shape=(X_train.shape[1]))
inputs
= tf.keras.models.Sequential([
model_dnn 512,activation='relu',name='dense_1'),
tf.keras.layers.Dense(0.2),
tf.keras.layers.Dropout(10,activation='relu',name='dense_2'),
tf.keras.layers.Dense(0.2),
tf.keras.layers.Dropout(1,activation='relu',name='dense_3')
tf.keras.layers.Dense(
])
compile(loss="mean_absolute_error",
model_dnn.=keras.optimizers.Adam(learning_rate=0.001),
optimizer=["mean_absolute_error"])
metrics#model_dnn.summary()
= model_dnn.fit(X_train, Y_train,epochs=NUM_EPOCHS,
history =[myCallback()]) callbacks
# Print model summary
model_dnn.summary()
= model_dnn.evaluate(X_valid, Y_valid, return_dict=True) b_eval_dict
Let’s define a helper function for displaying the results so it’s easier to compare later.
# Define helper function
def print_results(model, model_name, eval_dict):
'''
Prints the values of the hyparameters to tune, and the results of model evaluation
Args:
model (Model) - Keras model to evaluate
model_name (string) - arbitrary string to be used in identifying the model
eval_dict (dict) - results of model.evaluate
'''
print(f'\n{model_name}:')
print(f'number of units in 1st Dense layer: {model.get_layer("dense_1").units}')
print(f'learning rate for the optimizer: {model.optimizer.lr.numpy()}')
for key,value in eval_dict.items():
print(f'{key}: {value}')
# Print results for baseline model
'BASELINE MODEL', b_eval_dict) print_results(model_dnn,
That’s it for getting the results for a single set of hyperparameters. Let´s use Keras Tuner by having an API to automatically search for the optimal hyperparameters set.
Keras Tuner
To perform hypertuning with Keras Tuner, you will need to:
- Define the model
- Select which hyperparameters to tune
- Define its search space
- Define the search strategy
Install and import packages
You will start by installing and importing the required packages.
# Install Keras Tuner
!pip install -q -U keras-tuner
# Import required packages
import tensorflow as tf
import kerastuner as kt
Define the model
The model you set up for hypertuning is called a hypermodel. When you build this model, you define the hyperparameter search space in addition to the model architecture.
You can define a hypermodel through two approaches:
- By using a model builder function
- By subclassing the
HyperModel
class of the Keras Tuner API
In this lab, you will take the first approach: you will use a model builder function to define the image classification model. This function returns a compiled model and uses hyperparameters you define inline to hypertune the model.
The function below basically builds the same model you used earlier. The difference is there are two hyperparameters that are setup for tuning:
- the number of hidden units of the first Dense layer
- the learning rate of the Adam optimizer
You will see that this is done with a HyperParameters object which configures the hyperparameter you’d like to tune. For this exercise, you will:
use its
Int()
method to define the search space for the Dense units. This allows you to set a minimum and maximum value, as well as the step size when incrementing between these values.use its
Choice()
method for the learning rate. This allows you to define discrete values to include in the search space when hypertuning.
You can view all available methods and its sample usage in the official documentation.
def model_builder(hp):
'''
Builds the model and sets up the hyperparameters to tune.
Args:
hp - Keras tuner object
Returns:
model with hyperparameters to tune
'''
# Initialize the Sequential API and start stacking the layers
= keras.Sequential()
model #model.add(keras.layers.Flatten(input_shape=(28, 28)))
# Tune the number of units in the first Dense layer
# Choose an optimal value between 32-512
= hp.Int('units', min_value=32, max_value=512, step=32)
hp_units =hp_units, activation='relu', name='dense_1'))
model.add(keras.layers.Dense(units
# Add next layers
0.2))
model.add(keras.layers.Dropout(10,activation='relu',name='dense_2'))
model.add(tf.keras.layers.Dense(0.2))
model.add(keras.layers.Dropout(1,activation='relu',name='dense_3'))
model.add(tf.keras.layers.Dense(
# Tune the learning rate for the optimizer
# Choose an optimal value from 0.01, 0.001, or 0.0001
= hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
hp_learning_rate
compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
model.="mean_absolute_error",
loss=['mean_absolute_error'])
metrics
return model
Instantiate the Tuner and perform hypertuning
Now that you have the model builder, you can then define how the tuner can find the optimal set of hyperparameters, also called the search strategy. Keras Tuner has four tuners available with built-in strategies - RandomSearch
, Hyperband
, BayesianOptimization
, and Sklearn
.
In this tutorial, you will use the Hyperband tuner. Hyperband is an algorithm specifically developed for hyperparameter optimization. It uses adaptive resource allocation and early-stopping to quickly converge on a high-performing model. This is done using a sports championship style bracket wherein the algorithm trains a large number of models for a few epochs and carries forward only the top-performing half of models to the next round. You can read about the intuition behind the algorithm in section 3 of this paper.
Hyperband determines the number of models to train in a bracket by computing 1 + logfactor
(max_epochs
) and rounding it up to the nearest integer. You will see these parameters (i.e. factor
and max_epochs
passed into the initializer below). In addition, you will also need to define the following to instantiate the Hyperband tuner:
- the hypermodel (built by your model builder function)
- the
objective
to optimize (e.g. validation accuracy) - a
directory
to save logs and checkpoints for every trial (model configuration) run during the hyperparameter search. If you re-run the hyperparameter search, the Keras Tuner uses the existing state from these logs to resume the search. To disable this behavior, pass an additionaloverwrite=True
argument while instantiating the tuner. - the
project_name
to differentiate with other runs. This will be used as a subdirectory name under thedirectory
.
You can refer to the documentation for other arguments you can pass in.
# Instantiate the tuner
= kt.Hyperband(model_builder,
tuner ='val_mean_absolute_error',
objective=5,
max_epochs=3,
factor='kt_dir',
directory='kt_hyperband') project_name
Let’s see a summary of the hyperparameters that you will tune:
# Display hypertuning settings
tuner.search_space_summary()
You can pass in a callback to stop training early when a metric is not improving. Below, we define an EarlyStopping callback to monitor the validation loss and stop training if it’s not improving after 5 epochs.
= tf.keras.callbacks.EarlyStopping(monitor='val_mean_absolute_error', patience=5) stop_early
You will now run the hyperparameter search. The arguments for the search method are the same as those used for tf.keras.model.fit
in addition to the callback above. This will take around 10 minutes to run.
# Perform hypertuning
=NUM_EPOCHS, validation_split=0.2, callbacks=[stop_early]) tuner.search(X_train, Y_train, epochs
You can get the top performing model with the get_best_hyperparameters() method.
# Get the optimal hyperparameters from the results
=tuner.get_best_hyperparameters()[0]
best_hps
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")
Build and train the model
Now that you have the best set of hyperparameters, you can rebuild the hypermodel with these values and retrain it.
# Build the model with the optimal hyperparameters
= tuner.hypermodel.build(best_hps)
h_model #h_model.summary()
# Train the hypertuned model
=NUM_EPOCHS, validation_split=0.2) h_model.fit(X_train, Y_train, epochs
You will then get its performance against the test set.
# Evaluate the hypertuned model against the test set
= h_model.evaluate(X_valid, Y_valid, return_dict=True) h_eval_dict
We can compare the results we got with the baseline model we used at the start of the notebook. Results may vary but you will usually get a model that has less units in the dense layer, while having comparable loss and accuracy. This indicates that you reduced the model size and saved compute resources while still having more or less the same accuracy.
# Print results of the baseline and hypertuned model
#print_results(b_model, 'BASELINE MODEL', b_eval_dict)
'HYPERTUNED MODEL', h_eval_dict) print_results(h_model,
Possible Improvements
If you want to keep practicing with Keras Tuner in this notebook, you can do a factory reset (Runtime > Factory reset runtime
) and take on any of the following:
- hypertune the dropout layer with
hp.Float()
orhp.Choice()
- hypertune the activation function of the 1st dense layer with
hp.Choice()
- determine the optimal number of Dense layers you can add to improve the model. You can use the code here as reference.
- explore pre-defined
HyperModel
classes - HyperXception and HyperResNet for computer vision applications.
Wrap Up
In this tutorial, you used Keras Tuner to conveniently tune hyperparameters. You defined which ones to tune, the search space, and search strategy to arrive at the optimal set of hyperparameters. These concepts will again be discussed in the next sections but in the context of AutoML, a package that automates the entire machine learning pipeline. On to the next!