Skip to main content

Documentation Index

Fetch the complete documentation index at: https://nixtlaverse.nixtla.io/llms.txt

Use this file to discover all available pages before exploring further.

Tune your forecasting models

Imports

import os
import tempfile
import time

import lightgbm as lgb
import optuna
import pandas as pd
from datasetsforecast.m4 import M4, M4Evaluation, M4Info
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from utilsforecast.plotting import plot_series

from mlforecast import MLForecast
from mlforecast.auto import (
    AutoLightGBM,
    AutoMLForecast,
    AutoModel,
    AutoRidge,
    ridge_space,
)
from mlforecast.lag_transforms import ExponentiallyWeightedMean, RollingMean
/Users/janrathfelder/miniconda3/envs/mlforecast-dev/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Data setup

def get_data(group, horizon):
    df, *_ = M4.load(directory='data', group=group)
    df['ds'] = df['ds'].astype('int')
    df['unique_id'] = df['unique_id'].astype('category')
    return df.groupby('unique_id').head(-horizon).copy()

group = 'Hourly'
horizon = M4Info[group].horizon
train = get_data(group, horizon)
/var/folders/nk/kvcs64mn4nbfqfw1ff_czjn80000gn/T/ipykernel_89215/3410956497.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  return df.groupby('unique_id').head(-horizon).copy()

Optimization

Default optimization

We have default search spaces for some models and we can define default features to look for based on the length of the seasonal period of your data. For this example we’ll use hourly data, for which we’ll set 24 (one day) as the season length.
optuna.logging.set_verbosity(optuna.logging.ERROR)
auto_mlf = AutoMLForecast(
    models={'lgb': AutoLightGBM(), 'ridge': AutoRidge()},
    freq=1,
    season_length=24,
)
auto_mlf.fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,  # number of trials to run
)
AutoMLForecast(models={'lgb': AutoModel(model=LGBMRegressor), 'ridge': AutoModel(model=Ridge)})
We can now use these models to predict
preds = auto_mlf.predict(horizon)
preds.head()
unique_iddslgbridge
0H1701680.534943604.140123
1H1702599.038307523.364874
2H1703572.808421479.174481
3H1704564.573783444.540062
4H1705543.046026419.987657
And evaluate them
def evaluate(df, group):
    results = []
    for model in df.columns.drop(['unique_id', 'ds']):
        model_res = M4Evaluation.evaluate(
            'data', group, df[model].to_numpy().reshape(-1, horizon)
        )
        model_res.index = [model]
        results.append(model_res)
    return pd.concat(results).T.round(2)

evaluate(preds, group)
lgbridge
SMAPE18.7820.00
MASE5.071.29
OWA1.570.81

Tuning model parameters

You can provide your own model with its search space to perform the optimization. The search space should be a function that takes an optuna trial and returns the model parameters.
def my_lgb_config(trial: optuna.Trial):
    return {
        'learning_rate': 0.05,
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 2, 128, log=True),
        'objective': trial.suggest_categorical('objective', ['l1', 'l2', 'mape']),
    }

my_lgb = AutoModel(
    model=lgb.LGBMRegressor(),
    config=my_lgb_config,
)
auto_mlf = AutoMLForecast(
    models={'my_lgb': my_lgb},
    freq=1,
    season_length=24,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)
my_lgb
SMAPE18.64
MASE4.76
OWA1.50

Tuning scikit-learn pipelines

We internally use BaseEstimator.set_params for each configuration, so if you’re using a scikit-learn pipeline you can tune its parameters as you normally would with scikit-learn’s searches.
ridge_pipeline = make_pipeline(
    ColumnTransformer(
        [('encoder', OneHotEncoder(), ['unique_id'])],
        remainder='passthrough',
    ),
    Ridge()
)
my_auto_ridge = AutoModel(
    ridge_pipeline,
    # the space must have the name of the estimator followed by the parameter
    # you could also tune the encoder here
    lambda trial: {f'ridge__{k}': v for k, v in ridge_space(trial).items()},
)
auto_mlf = AutoMLForecast(
    models={'ridge': my_auto_ridge},
    freq=1,
    season_length=24,
    fit_config=lambda trial: {'static_features': ['unique_id']}
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)
ridge
SMAPE18.50
MASE1.24
OWA0.76

Tuning features

The MLForecast class defines the features to build in its constructor. You can tune the features by providing a function through the init_config argument, which will take an optuna trial and produce a configuration to pass to the MLForecast constructor.
def my_init_config(trial: optuna.Trial):
    lag_transforms = [
        ExponentiallyWeightedMean(alpha=0.3),
        RollingMean(window_size=24 * 7, min_samples=1),
    ]
    lag_to_transform = trial.suggest_categorical('lag_to_transform', [24, 48])
    return {
        'lags': [24 * i for i in range(1, 7)],  # this won't be tuned
        'lag_transforms': {lag_to_transform: lag_transforms},
    }

auto_mlf = AutoMLForecast(
    models=[AutoRidge()],
    freq=1,
    season_length=24,
    init_config=my_init_config,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)
/Users/janrathfelder/Documents/data_science/GitHub/mlforecast/mlforecast/auto.py:269: UserWarning: `season_length` is not used when `init_config` is provided.
  warnings.warn("`season_length` is not used when `init_config` is provided.")
AutoRidge
SMAPE13.31
MASE1.67
OWA0.71

Tuning fit parameters

The MLForecast.fit method takes some arguments that could improve the forecasting performance of your models, such as dropna and static_features. If you want to tune those you can provide a function to the fit_config argument.
def my_fit_config(trial: optuna.Trial):
    if trial.suggest_int('use_id', 0, 1):
        static_features = ['unique_id']
    else:
        static_features = None
    return {
        'static_features': static_features
    }

auto_mlf = AutoMLForecast(
    models=[AutoLightGBM()],
    freq=1,
    season_length=24,
    fit_config=my_fit_config,
).fit(
    train,
    n_windows=2,
    h=horizon,
    num_samples=2,
)
preds = auto_mlf.predict(horizon)
evaluate(preds, group)
AutoLightGBM
SMAPE18.78
MASE5.07
OWA1.57

M5 example: reuse CV splits + global/group rolling means

This example shows both features in two small snippets: We can speed up the tuning process by reusing the cv splits. In traditional tuning, the cv splits are created inside each tuning round, which can be slow depending on data size and number of iterations. With reuse_cv_splits=True these splits are created once and consumed inside the tuning rounds without the need to split the data again.
  • We benchmark reuse_cv_splits=True against False while keeping the search space fixed to a single configuration. We still run multiple Optuna trials, but every trial evaluates the exact same model and feature settings, so the timing difference comes from reusing the cached CV windows instead of rebuilding them on each trial.
  • For the example below, the speed up when setting reuse_cv_splits=True is around 22% compared to reuse_cv_splits=True (standard setting)
  • We inspect the resulting feature values for RollingMean(..., global_=True) and RollingMean(..., groupby=[...]) to see how the local, global, and grouped aggregations differ.
All rolling window functions support global_ and groupby; here we only show RollingMean.
from datasetsforecast.m5 import M5

m5_static = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']


def get_m5_subset(directory='data', n_series=100):
    y_df, _, S_df = M5.load(directory=directory)
    y_df['ds'] = pd.to_datetime(y_df['ds'])

    # global_ lag transforms require aligned series ends
    end_ds = y_df.groupby('unique_id')['ds'].max()
    common_end = end_ds.mode().iat[0]
    keep_ids = end_ds[end_ds == common_end].index[:n_series]

    train = y_df[y_df['unique_id'].isin(keep_ids)].copy()
    train = train.merge(S_df, on='unique_id')
    return train

m5_train = get_m5_subset()

/var/folders/nk/kvcs64mn4nbfqfw1ff_czjn80000gn/T/ipykernel_89215/1808854623.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  end_ds = y_df.groupby('unique_id')['ds'].max()
m5_benchmark_model = AutoModel(
    model=make_pipeline(
        ColumnTransformer(
            [('encoder', OneHotEncoder(handle_unknown='ignore'), m5_static)],
            remainder='passthrough',
        ),
        Ridge(),
    ),
    config=lambda trial: {'ridge__alpha': 1.0},
)


def m5_benchmark_init_config(trial: optuna.Trial):
    return {
        'lags': [1, 7, 28],
        'lag_transforms': {
            1: [
                RollingMean(window_size=28),
                RollingMean(window_size=28, global_=True),
                RollingMean(window_size=28, groupby=['cat_id']),
                RollingMean(window_size=28, groupby=['state_id', 'cat_id']),
            ]
        },
    }


def m5_benchmark_fit_config(trial: optuna.Trial):
    return {'static_features': m5_static}


def benchmark_m5_tuning(reuse_cv_splits: bool) -> float:
    automl = AutoMLForecast(
        models={'ridge': m5_benchmark_model},
        freq='D',
        init_config=m5_benchmark_init_config,
        fit_config=m5_benchmark_fit_config,
        reuse_cv_splits=reuse_cv_splits,
    )
    start = time.perf_counter()
    automl.fit(
        m5_train,
        n_windows=20,
        h=7,
        num_samples=30,
    )
    return time.perf_counter() - start


m5_timing = pd.DataFrame(
    [
        {'reuse_cv_splits': False, 'seconds': benchmark_m5_tuning(False)},
        {'reuse_cv_splits': True, 'seconds': benchmark_m5_tuning(True)},
    ]
)

baseline = m5_timing.loc[m5_timing["reuse_cv_splits"].eq(False), "seconds"].iloc[0]
reuse = m5_timing.loc[m5_timing["reuse_cv_splits"].eq(True), "seconds"].iloc[0]

pct_faster = (baseline / reuse - 1) * 100
print(
    f"Using reuse_cv_splits=True is {pct_faster:.1f}% faster than the traditional way of tuning "
    f"(reuse_cv_splits=False): {baseline:.2f}s -> {reuse:.2f}s."
)
Using reuse_cv_splits=True is 22.5% faster than the traditional way of tuning (reuse_cv_splits=False): 46.25s -> 37.74s.
m5_feature_demo = MLForecast(
    models=Ridge(),
    freq='D',
    lags=[1],
    lag_transforms={
        1: [
            RollingMean(window_size=7),
            RollingMean(window_size=7, global_=True),
            RollingMean(window_size=7, groupby=['state_id']),
            RollingMean(window_size=7, groupby=['state_id', 'store_id']),
        ]
    },
)

m5_feature_values = m5_feature_demo.preprocess(
    m5_train,
    static_features=m5_static,
    dropna=False,
)

feature_cols = [
    'rolling_mean_lag1_window_size7',
    'global_rolling_mean_lag1_window_size7',
    'groupby_state_id_rolling_mean_lag1_window_size7',
    'groupby_state_id__store_id_rolling_mean_lag1_window_size7',
]
last_ds = m5_feature_values['ds'].max()

(
    m5_feature_values.loc[
        m5_feature_values['ds'].eq(last_ds),
        ['ds', 'unique_id', 'state_id', 'cat_id'] + feature_cols,
    ]
    .sort_values(['state_id', 'cat_id', 'unique_id'])
    .head(12)
)

dsunique_idstate_idcat_idrolling_mean_lag1_window_size7global_rolling_mean_lag1_window_size7groupby_state_id_rolling_mean_lag1_window_size7groupby_state_id__store_id_rolling_mean_lag1_window_size7
19682016-06-19FOODS_1_001_CA_1CAFOODS0.857143137.42857464.42857411.428572
39372016-06-19FOODS_1_001_CA_2CAFOODS1.142857137.42857464.42857423.714285
59062016-06-19FOODS_1_001_CA_3CAFOODS1.714286137.42857464.42857419.571428
78742016-06-19FOODS_1_001_CA_4CAFOODS0.428571137.42857464.4285749.714286
216322016-06-19FOODS_1_002_CA_1CAFOODS1.285714137.42857464.42857411.428572
236012016-06-19FOODS_1_002_CA_2CAFOODS0.714286137.42857464.42857423.714285
255702016-06-19FOODS_1_002_CA_3CAFOODS0.285714137.42857464.42857419.571428
275382016-06-19FOODS_1_002_CA_4CAFOODS0.571429137.42857464.4285749.714286
413002016-06-19FOODS_1_003_CA_1CAFOODS0.142857137.42857464.42857411.428572
432692016-06-19FOODS_1_003_CA_2CAFOODS1.428571137.42857464.42857423.714285
452382016-06-19FOODS_1_003_CA_3CAFOODS0.428571137.42857464.42857419.571428
472062016-06-19FOODS_1_003_CA_4CAFOODS0.142857137.42857464.4285749.714286
m5_feature_values.describe()
dsylag1rolling_mean_lag1_window_size7global_rolling_mean_lag1_window_size7groupby_state_id_rolling_mean_lag1_window_size7groupby_state_id__store_id_rolling_mean_lag1_window_size7
count181878181878.000000181778.000000181178.000000181552.000000181552.000000181552.000000
mean2013-12-12 09:05:38.1123612161.3414101.3413831.338734128.77195744.17959612.859269
min2011-01-29 00:00:000.0000000.0000000.00000032.8571435.2857140.428571
25%2012-09-22 00:00:000.0000000.0000000.14285792.28571329.1428576.857143
50%2013-12-23 00:00:000.0000000.0000000.428571129.07142641.57143011.285714
75%2015-03-23 00:00:001.0000001.0000001.142857162.00000057.00000017.000000
max2016-06-19 00:00:00116.000000116.00000049.285713298.714294149.57142661.000000
stdNaN3.6152883.6158643.12348751.01202021.4522027.960689

Accessing the optimization results

After the process has finished the results are available under the results_ attribute of the AutoMLForecast object. There will be one result per model and the best configuration can be found under the config user attribute.
len(auto_mlf.results_)
1
auto_mlf.results_['AutoLightGBM'].best_trial.user_attrs['config']
{'model_params': {'bagging_freq': 1,
  'learning_rate': 0.05,
  'verbosity': -1,
  'n_estimators': 169,
  'lambda_l1': 0.02733406969031059,
  'lambda_l2': 0.002659931083868188,
  'num_leaves': 112,
  'feature_fraction': 0.7118273996694524,
  'bagging_fraction': 0.8229470565333281,
  'objective': 'l2'},
 'mlf_init_params': {'lags': [48],
  'target_transforms': None,
  'lag_transforms': {1: [ExponentiallyWeightedMean(alpha=0.9)]},
  'date_features': None,
  'num_threads': 1},
 'mlf_fit_params': {'static_features': None}}

Individual models

There is one optimization process per model. This is because different models can make use of different features. So after the optimization process is done for each model the best configuration is used to retrain the model using all of the data. These final models are MLForecast objects and are saved in the models_ attribute.
auto_mlf.models_
{'AutoLightGBM': MLForecast(models=[AutoLightGBM], freq=1, lag_features=['lag48', 'exponentially_weighted_mean_lag1_alpha0.9'], date_features=[], num_threads=1)}

Saving

You can use the AutoMLForecast.save method to save the best models found. This produces one directory per model.
with tempfile.TemporaryDirectory() as tmpdir:
    auto_mlf.save(tmpdir)
    print(os.listdir(tmpdir))
['AutoLightGBM']
Since each model is an MLForecast object you can load it by itself.
with tempfile.TemporaryDirectory() as tmpdir:
    auto_mlf.save(tmpdir)
    loaded = MLForecast.load(f'{tmpdir}/AutoLightGBM')
    print(loaded)
MLForecast(models=[AutoLightGBM], freq=1, lag_features=['lag48', 'exponentially_weighted_mean_lag1_alpha0.9'], date_features=[], num_threads=1)