Hyperparameter optimization

Imports

import os
import tempfile

import lightgbm as lgb
import optuna
import pandas as pd
from datasetsforecast.m4 import M4, M4Evaluation, M4Info
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from utilsforecast.plotting import plot_series

from mlforecast import MLForecast
from mlforecast.auto import (
    AutoLightGBM,
    AutoMLForecast,
    AutoModel,
    AutoRidge,
    ridge_space,
)
from mlforecast.lag_transforms import ExponentiallyWeightedMean, RollingMean

Data setup

def get_data(group, horizon):
    df, *_ = M4.load(directory='data', group=group)
    df['ds'] = df['ds'].astype('int')
    df['unique_id'] = df['unique_id'].astype('category')
    return df.groupby('unique_id').head(-horizon).copy()

group = 'Hourly'
horizon = M4Info[group].horizon
train = get_data(group, horizon)

Optimization

Default optimization

We have default search spaces for some models and we can define default features to look for based on the length of the seasonal period of your data. For this example we’ll use hourly data, for which we’ll set 24 (one day) as the season length.

optuna.logging.set_verbosity(optuna.logging.ERROR)
auto_mlf = AutoMLForecast(
    models={'lgb': AutoLightGBM(), 'ridge': AutoRidge()},
    freq=1,
    season_length=24,
)
auto_mlf.fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,  # number of trials to run
)

AutoMLForecast(models={'lgb': AutoModel(model=LGBMRegressor), 'ridge': AutoModel(model=Ridge)})

We can now use these models to predict

preds = auto_mlf.predict(horizon)
preds.head()

	unique_id	ds	lgb	ridge
0	H1	701	680.534943	604.140123
1	H1	702	599.038307	523.364874
2	H1	703	572.808421	479.174481
3	H1	704	564.573783	444.540062
4	H1	705	543.046026	419.987657

And evaluate them

def evaluate(df, group):
    results = []
    for model in df.columns.drop(['unique_id', 'ds']):
        model_res = M4Evaluation.evaluate(
            'data', group, df[model].to_numpy().reshape(-1, horizon)
        )
        model_res.index = [model]
        results.append(model_res)
    return pd.concat(results).T.round(2)

evaluate(preds, group)

	lgb	ridge
SMAPE	18.78	20.00
MASE	5.07	1.29
OWA	1.57	0.81

Tuning model parameters

You can provide your own model with its search space to perform the optimization. The search space should be a function that takes an optuna trial and returns the model parameters.

def my_lgb_config(trial: optuna.Trial):
    return {
        'learning_rate': 0.05,
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 2, 128, log=True),
        'objective': trial.suggest_categorical('objective', ['l1', 'l2', 'mape']),
    }

my_lgb = AutoModel(
    model=lgb.LGBMRegressor(),
    config=my_lgb_config,
)
auto_mlf = AutoMLForecast(
    models={'my_lgb': my_lgb},
    freq=1,
    season_length=24,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)

	my_lgb
SMAPE	18.67
MASE	4.79
OWA	1.51

Tuning scikit-learn pipelines

We internally use BaseEstimator.set_params for each configuration, so if you’re using a scikit-learn pipeline you can tune its parameters as you normally would with scikit-learn’s searches.

ridge_pipeline = make_pipeline(
    ColumnTransformer(
        [('encoder', OneHotEncoder(), ['unique_id'])],
        remainder='passthrough',
    ),
    Ridge()
)
my_auto_ridge = AutoModel(
    ridge_pipeline,
    # the space must have the name of the estimator followed by the parameter
    # you could also tune the encoder here
    lambda trial: {f'ridge__{k}': v for k, v in ridge_space(trial).items()},
)
auto_mlf = AutoMLForecast(
    models={'ridge': my_auto_ridge},
    freq=1,
    season_length=24,
    fit_config=lambda trial: {'static_features': ['unique_id']}
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)

	ridge
SMAPE	18.50
MASE	1.24
OWA	0.76

Tuning features

The MLForecast class defines the features to build in its constructor. You can tune the features by providing a function through the init_config argument, which will take an optuna trial and produce a configuration to pass to the MLForecast constructor.

def my_init_config(trial: optuna.Trial):
    lag_transforms = [
        ExponentiallyWeightedMean(alpha=0.3),
        RollingMean(window_size=24 * 7, min_samples=1),
    ]
    lag_to_transform = trial.suggest_categorical('lag_to_transform', [24, 48])
    return {
        'lags': [24 * i for i in range(1, 7)],  # this won't be tuned
        'lag_transforms': {lag_to_transform: lag_transforms},
    }

auto_mlf = AutoMLForecast(
    models=[AutoRidge()],
    freq=1,
    season_length=24,
    init_config=my_init_config,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)

	AutoRidge
SMAPE	13.31
MASE	1.67
OWA	0.71

Tuning fit parameters

The MLForecast.fit method takes some arguments that could improve the forecasting performance of your models, such as dropna and static_features. If you want to tune those you can provide a function to the fit_config argument.

def my_fit_config(trial: optuna.Trial):
    if trial.suggest_int('use_id', 0, 1):
        static_features = ['unique_id']
    else:
        static_features = None
    return {
        'static_features': static_features
    }

auto_mlf = AutoMLForecast(
    models=[AutoLightGBM()],
    freq=1,
    season_length=24,
    fit_config=my_fit_config,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)

	AutoLightGBM
SMAPE	18.78
MASE	5.07
OWA	1.57

Accessing the optimization results

After the process has finished the results are available under the results_ attribute of the AutoMLForecast object. There will be one result per model and the best configuration can be found under the config user attribute.

len(auto_mlf.results_)

auto_mlf.results_['AutoLightGBM'].best_trial.user_attrs['config']

{'model_params': {'bagging_freq': 1,
  'learning_rate': 0.05,
  'verbosity': -1,
  'n_estimators': 169,
  'lambda_l1': 0.027334069690310565,
  'lambda_l2': 0.0026599310838681858,
  'num_leaves': 112,
  'feature_fraction': 0.7118273996694524,
  'bagging_fraction': 0.8229470565333281,
  'objective': 'l2'},
 'mlf_init_params': {'lags': [48],
  'target_transforms': None,
  'lag_transforms': {1: [ExponentiallyWeightedMean(alpha=0.9)]},
  'date_features': None,
  'num_threads': 1},
 'mlf_fit_params': {'static_features': None}}

Individual models

There is one optimization process per model. This is because different models can make use of different features. So after the optimization process is done for each model the best configuration is used to retrain the model using all of the data. These final models are MLForecast objects and are saved in the models_ attribute.

auto_mlf.models_

{'AutoLightGBM': MLForecast(models=[AutoLightGBM], freq=1, lag_features=['lag48', 'exponentially_weighted_mean_lag1_alpha0.9'], date_features=[], num_threads=1)}

Saving

You can use the AutoMLForecast.save method to save the best models found. This produces one directory per model.

with tempfile.TemporaryDirectory() as tmpdir:
    auto_mlf.save(tmpdir)
    print(os.listdir(tmpdir))

['AutoLightGBM']

Since each model is an MLForecast object you can load it by itself.

with tempfile.TemporaryDirectory() as tmpdir:
    auto_mlf.save(tmpdir)
    loaded = MLForecast.load(f'{tmpdir}/AutoLightGBM')
    print(loaded)

MLForecast(models=[AutoLightGBM], freq=1, lag_features=['lag48', 'exponentially_weighted_mean_lag1_alpha0.9'], date_features=[], num_threads=1)

Getting Started

How-to guides

Tutorials

API Reference

Hyperparameter optimization

Imports

Data setup

Optimization

Default optimization

Tuning model parameters

Tuning scikit-learn pipelines

Tuning features

Tuning fit parameters

Accessing the optimization results

Individual models

Saving

Getting Started

How-to guides

Tutorials

API Reference

​Imports

​Data setup

​Optimization

​Default optimization

​Tuning model parameters

​Tuning scikit-learn pipelines

​Tuning features

​Tuning fit parameters

​Accessing the optimization results

​Individual models

​Saving

Imports

Data setup

Optimization

Default optimization

Tuning model parameters

Tuning scikit-learn pipelines

Tuning features

Tuning fit parameters

Accessing the optimization results

Individual models

Saving