This part of the project documentation focuses on an information-oriented approach. Use it as a reference for the technical implementation of the mlpForecaster project code.

MLPForecastModel

MLPForecastModel(
    data_pipeline=None,
    target_series: list[str] | str = ["NetLoad"],
    unknown_features: list[str] = [],
    calendar_variables: list[str] = [],
    known_calendar_features: list[str] = [],
    known_continuous_features: list[str] = [],
    input_window_size: int = 96,
    forecast_horizon: int = 48,
    embedding_size: int = 28,
    embedding_type: str = None,
    combination_type: str = "addition-comb",
    hidden_size: int = 64,
    num_layers: int = 2,
    expansion_factor: int = 2,
    residual: bool = False,
    activation_function: str = "ReLU",
    out_activation_function: str = "Identity",
    dropout_rate: float = 0.25,
    alpha: float = 0.1,
    num_attention_heads: int = 4,
    metric: str = "mae",
    learning_rate: float = 0.001,
    weight_decay: float = 1e-06,
    prob_decay_1: float = 0.75,
    prob_decay_2: float = 0.9,
    gamma: float = 0.01,
    max_epochs: int = 10,
)

Bases: BaseForecastModel

MLP Forecast Model for time series point forecasting.

Attributes:

n_out (int) –

Number of output series.
n_channels (int) –

Number of input channels.
model (object) –

Model object.
hparams (dict) –

Hyperparameters for the model.

Parameters:

data_pipeline (object, default: None ) –

Data pipeline object containing the series and features. Defaults to None.
embedding_size (int, default: 28 ) –

Dimensionality of the embedding space. Defaults to 28.
embedding_type (str, default: None ) –

Type of embedding to use.\ Options: 'PosEmb', 'RotaryEmb', 'CombinedEmb'. Defaults to None.
combination_type (str, default: 'addition-comb' ) –

Type of combination to use. Options: \ 'attn-comb', 'weighted-comb', 'addition-comb'. Defaults to 'attn-comb'.
hidden_size (int, default: 64 ) –

Dimensionality of the hidden layers. Defaults to 64.
num_layers (int, default: 2 ) –

Number of layers in the MLP. Defaults to 2.
expansion_factor (int, default: 2 ) –

Factor to expand the size of layers. Defaults to 2.
residual (bool, default: False ) –

Whether to use residual connections. Defaults to False.
activation_function (str, default: 'ReLU' ) –

\ Activation function to use in the hidden layers. Defaults to "ReLU".
out_activation_function (str, default: 'Identity' ) –

Activation function to use in the output layer. \ Defaults to "Identity".
dropout_rate (float, default: 0.25 ) –

Dropout rate for regularization. Defaults to 0.25.
alpha (float, default: 0.1 ) –

Alpha parameter for the loss function. Defaults to 0.1.
num_attention_heads (int, default: 4 ) –

Number of attention heads. Defaults to 4.
metric (str, default: 'mae' ) –

Metric to evaluate the model. Defaults to "mae".
learning_rate (float, default: 0.001 ) –

Learning rate for the optimizer. Defaults to 1e-3.
weight_decay (float, default: 1e-06 ) –

Weight decay for the optimizer. Defaults to 1e-6.
prob_decay_1 (float, default: 0.75 ) –

First probability decay rate. Defaults to 0.75.
prob_decay_2 (float, default: 0.9 ) –

Second probability decay rate. Defaults to 0.9.
gamma (float, default: 0.01 ) –

Gamma parameter. Defaults to 0.01.
max_epochs (int, default: 10 ) –

Maximum number of epochs for training. Defaults to 10.

Example

kwargs = { 'data_pipeline': data_pipeline, 'embedding_size': 20, 'embedding_type': None, 'combination_type': 'Add', 'hidden_size': 64, 'num_layers': 2, 'activation_function': 'ReLU', 'out_activation_function': 'ReLU', 'dropout_rate': 0.25, 'alpha': 0.25, 'num_attention_heads': 4, 'metric': 'smape', 'learning_rate': 1e-4, 'weight_decay': 1e-6, 'prob_decay_1': 0.75, 'prob_decay_2': 0.9, 'gamma': 0.01, 'max_epochs': 50 }

model = MLPForecastModel(**kwargs)

Source code in mlpforecast/model/deterministic.py

def __init__(
    self,
    data_pipeline=None,
    target_series: list[str] | str = ["NetLoad"],
    unknown_features: list[str] = [],
    calendar_variables: list[str] = [],
    known_calendar_features: list[str] = [],
    known_continuous_features: list[str] = [],
    input_window_size: int = 96,
    forecast_horizon: int = 48,
    embedding_size: int = 28,
    embedding_type: str = None,
    combination_type: str = "addition-comb",
    hidden_size: int = 64,
    num_layers: int = 2,
    expansion_factor: int = 2,
    residual: bool = False,
    activation_function: str = "ReLU",
    out_activation_function: str = "Identity",
    dropout_rate: float = 0.25,
    alpha: float = 0.1,
    num_attention_heads: int = 4,
    metric: str = "mae",
    learning_rate: float = 1e-3,
    weight_decay: float = 1e-6,
    prob_decay_1: float = 0.75,
    prob_decay_2: float = 0.9,
    gamma: float = 0.01,
    max_epochs: int = 10,
):
    r"""
    Multilayer Perceptron (MLP) Forecast Model for time series forecasting.

    Args:
        data_pipeline (object, optional): Data pipeline object containing the series and features. Defaults to None.
        embedding_size (int, optional): Dimensionality of the embedding space. Defaults to 28.
        embedding_type (str, optional): Type of embedding to use.\
              Options: 'PosEmb', 'RotaryEmb', 'CombinedEmb'. Defaults to None.
        combination_type (str, optional): Type of combination to use. Options: \
            'attn-comb', 'weighted-comb', 'addition-comb'. Defaults to 'attn-comb'.
        hidden_size (int, optional): Dimensionality of the hidden layers. Defaults to 64.
        num_layers (int, optional): Number of layers in the MLP. Defaults to 2.
        expansion_factor (int, optional): Factor to expand the size of layers. Defaults to 2.
        residual (bool, optional): Whether to use residual connections. Defaults to False.
        activation_function (str, optional): \
              Activation function to use in the hidden layers. Defaults to "ReLU".
        out_activation_function (str, optional): Activation function to use in the output layer. \
            Defaults to "Identity".
        dropout_rate (float, optional): Dropout rate for regularization. Defaults to 0.25.
        alpha (float, optional): Alpha parameter for the loss function. Defaults to 0.1.
        num_attention_heads (int, optional): Number of attention heads. Defaults to 4.
        metric (str, optional): Metric to evaluate the model. Defaults to "mae".
        learning_rate (float, optional): Learning rate for the optimizer. Defaults to 1e-3.
        weight_decay (float, optional): Weight decay for the optimizer. Defaults to 1e-6.
        prob_decay_1 (float, optional): First probability decay rate. Defaults to 0.75.
        prob_decay_2 (float, optional): Second probability decay rate. Defaults to 0.9.
        gamma (float, optional): Gamma parameter. Defaults to 0.01.
        max_epochs (int, optional): Maximum number of epochs for training. Defaults to 10.

    Example:
        kwargs = {
            'data_pipeline': data_pipeline,
            'embedding_size': 20,
            'embedding_type': None,
            'combination_type': 'Add',
            'hidden_size': 64,
            'num_layers': 2,
            'activation_function': 'ReLU',
            'out_activation_function': 'ReLU',
            'dropout_rate': 0.25,
            'alpha': 0.25,
            'num_attention_heads': 4,
            'metric': 'smape',
            'learning_rate': 1e-4,
            'weight_decay': 1e-6,
            'prob_decay_1': 0.75,
            'prob_decay_2': 0.9,
            'gamma': 0.01,
            'max_epochs': 50
        }

        model = MLPForecastModel(**kwargs)
    """
    super().__init__(data_pipeline, metric)

    assert len(target_series) > 0, "target_series should not be empty."

    self.n_out = len(target_series)
    n_unknown = len(unknown_features) + self.n_out
    n_covariates = len(known_calendar_features) + len(known_continuous_features)
    self.n_channels = n_unknown + n_covariates

    self.model = MLPForecastNetwork(
        n_target_series=self.n_out,
        n_unknown_features=len(unknown_features),
        n_known_calendar_features=len(known_calendar_features),
        n_known_continuous_features=len(known_continuous_features),
        embedding_size=embedding_size,
        embedding_type=embedding_type,
        combination_type=combination_type,
        hidden_size=hidden_size,
        num_layers=num_layers,
        forecast_horizon=forecast_horizon,
        input_window_size=input_window_size,
        activation_function=activation_function,
        out_activation_function=out_activation_function,
        dropout_rate=dropout_rate,
        alpha=alpha,
        num_attention_heads=num_attention_heads,
    )

configure_optimizers

configure_optimizers()

Configure optimizers and learning rate schedulers.

Returns:

tuple –

A tuple containing the optimizer and the scheduler.

Source code in mlpforecast/model/deterministic.py

def configure_optimizers(self):
    """
    Configure optimizers and learning rate schedulers.

    Returns:
        (tuple): A tuple containing the optimizer and the scheduler.
    """
    p1 = int(self.hparams["prob_decay_1"] * self.hparams["max_epochs"])
    p2 = int(self.hparams["prob_decay_2"] * self.hparams["max_epochs"])

    optimizer = torch.optim.Adam(
        self.parameters(),
        lr=self.hparams["learning_rate"],
        weight_decay=self.hparams["weight_decay"],
    )
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[p1, p2], gamma=self.hparams["gamma"]
    )
    return [optimizer], [scheduler]

forecast

forecast(x)

Generate forecast for the given input.

Parameters:

x (tensor) –

Input data for forecasting.

Returns:

tensor –

Forecasted values.

Source code in mlpforecast/model/deterministic.py

def forecast(self, x):
    """
    Generate forecast for the given input.

    Args:
        x (tensor): Input data for forecasting.

    Returns:
        (tensor): Forecasted values.
    """
    return self.model.forecast(x)

forward

forward(x)

Forward pass of the model.

Parameters:

x (tensor) –

Input data.

Source code in mlpforecast/model/deterministic.py

def forward(self, x):
    """
    Forward pass of the model.

    Args:
        x (tensor): Input data.
    """
    return self.model(x)

on_load_checkpoint

on_load_checkpoint(checkpoint)

Load the data pipeline from a file.

Parameters:

checkpoint (dict) –

Checkpoint dictionary.

Source code in mlpforecast/model/base_model.py

def on_load_checkpoint(self, checkpoint):
    """
    Load the data pipeline from a file.

    Args:
        checkpoint (dict): Checkpoint dictionary.
    """
    self.data_pipeline = joblib.load(checkpoint["data_pipeline_path"])

on_save_checkpoint

on_save_checkpoint(checkpoint)

Save the data pipeline to a file and add the file path to the checkpoint dictionary.

Parameters:

checkpoint (dict) –

Checkpoint dictionary.

Source code in mlpforecast/model/base_model.py

def on_save_checkpoint(self, checkpoint):
    """
    Save the data pipeline to a file and add the file path to the checkpoint dictionary.

    Args:
        checkpoint (dict): Checkpoint dictionary.
    """
    # Save the pipeline to a file
    data_pipeline_path = f"{self.checkpoint_path}/data_pipeline.pkl"
    joblib.dump(self.data_pipeline, data_pipeline_path)
    # Add the pipeline file path to the checkpoint dictionary
    checkpoint["data_pipeline_path"] = data_pipeline_path

training_step

training_step(batch, batch_idx)

Perform a single training step.

Parameters:

batch (tuple) –

A batch of training data.
batch_idx (int) –

Index of the batch.

Returns:

tensor –

The loss value for the batch.

Source code in mlpforecast/model/deterministic.py

def training_step(self, batch, batch_idx):
    """
    Perform a single training step.

    Args:
        batch (tuple): A batch of training data.
        batch_idx (int): Index of the batch.

    Returns:
        (tensor): The loss value for the batch.
    """
    loss, metric = self.model.step(batch, self.tra_metric_fcn)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    self.log(f"train_{self.hparams['metric']}", metric, prog_bar=True, logger=True)
    return loss

validation_step

validation_step(batch, batch_idx)

Perform a single validation step.

Parameters:

batch (tuple) –

A batch of validation data.
batch_idx (int) –

Index of the batch.

Returns:

tensor –

The loss value for the batch.

Source code in mlpforecast/model/deterministic.py

def validation_step(self, batch, batch_idx):
    """
    Perform a single validation step.

    Args:
        batch (tuple): A batch of validation data.
        batch_idx (int): Index of the batch.

    Returns:
        (tensor): The loss value for the batch.
    """
    loss, metric = self.model.step(batch, self.val_metric_fcn)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    self.log(f"val_{self.hparams['metric']}", metric, prog_bar=True, logger=True)

BaseForecastModel

BaseForecastModel(data_pipeline=None, metric='mae')

Bases: LightningModule

Base class for all forecasting models.

Attributes:

model (Module) –

PyTorch model.
data_pipeline (Pipeline) –

Data pipeline.
tra_metric_fcn (Metric) –

Training metric function.
val_metric_fcn (Metric) –

Validation metric function.
size (float) –

Model size in MB.
checkpoint_path (str) –

Path to save checkpoints.

Parameters:

data_pipeline (Pipeline, default: None ) –

Data pipeline.
metric (str, default: 'mae' ) –

Metric to use for evaluation. Options: 'mae', 'mse', 'smape'.

Source code in mlpforecast/model/base_model.py

def __init__(self, data_pipeline=None, metric="mae"):
    """
    Initialize the model.

    Args:
        data_pipeline (sklearn.pipeline.Pipeline): Data pipeline.
        metric (str): Metric to use for evaluation. Options: 'mae', 'mse', 'smape'.
    """
    super().__init__()

    self.model = None
    self.data_pipeline = data_pipeline
    # get model size

    if self.model is not None:
        param_size = 0
        for param in self.model.parameters():
            param_size += param.nelement() * param.element_size()
        buffer_size = 0
        for buffer in self.model.buffers():
            buffer_size += buffer.nelement() * buffer.element_size()

        self.size = (param_size + buffer_size) / 1024**2
        logger.info(f"Model size: {self.size:.3f}MB")

    # Initialize metric functions
    if metric == "mae":
        self.tra_metric_fcn = torchmetrics.MeanAbsoluteError()
        self.val_metric_fcn = torchmetrics.MeanAbsoluteError()

    elif metric == "mse":
        self.tra_metric_fcn = torchmetrics.MeanSquaredError()
        self.val_metric_fcn = torchmetrics.MeanSquaredError()

    elif metric == "smape":
        self.tra_metric_fcn = torchmetrics.SymmetricMeanAbsolutePercentageError()
        self.val_metric_fcn = torchmetrics.SymmetricMeanAbsolutePercentageError()
    else:
        raise ValueError("Invalid metric. Please select 'mae', 'smape', 'mse'.")
    self.save_hyperparameters()
    self.checkpoint_path = "./"

on_load_checkpoint

on_load_checkpoint(checkpoint)

Load the data pipeline from a file.

Parameters:

checkpoint (dict) –

Checkpoint dictionary.

Source code in mlpforecast/model/base_model.py

def on_load_checkpoint(self, checkpoint):
    """
    Load the data pipeline from a file.

    Args:
        checkpoint (dict): Checkpoint dictionary.
    """
    self.data_pipeline = joblib.load(checkpoint["data_pipeline_path"])

on_save_checkpoint

on_save_checkpoint(checkpoint)

Save the data pipeline to a file and add the file path to the checkpoint dictionary.

Parameters:

checkpoint (dict) –

Checkpoint dictionary.

Source code in mlpforecast/model/base_model.py

def on_save_checkpoint(self, checkpoint):
    """
    Save the data pipeline to a file and add the file path to the checkpoint dictionary.

    Args:
        checkpoint (dict): Checkpoint dictionary.
    """
    # Save the pipeline to a file
    data_pipeline_path = f"{self.checkpoint_path}/data_pipeline.pkl"
    joblib.dump(self.data_pipeline, data_pipeline_path)
    # Add the pipeline file path to the checkpoint dictionary
    checkpoint["data_pipeline_path"] = data_pipeline_path