Skip to content

This part of the project documentation focuses on an information-oriented approach. Use it as a reference for the technical implementation of the mlpForecaster project code.

plot_cdf_

plot_cdf_(ax, data, x_col, hue_col, label)

Plot a Cumulative Distribution Function (CDF) on the given axes.

Parameters:

  • ax (Axes) –

    The axes on which to plot.

  • data (DataFrame) –

    The data to plot.

  • x_col (str) –

    The column in data to plot on the x-axis.

  • hue_col (str) –

    The column in data to use for color grouping.

  • label (str) –

    The label for the x-axis.

Returns:

  • ax ( Axes ) –

    The axes with the plot.

Source code in mlpforecast/plot/visual_functions.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def plot_cdf_(ax, data, x_col, hue_col, label):
    """
    Plot a Cumulative Distribution Function (CDF) on the given axes.

    Args:
        ax (matplotlib.axes.Axes): The axes on which to plot.
        data (DataFrame): The data to plot.
        x_col (str): The column in `data` to plot on the x-axis.
        hue_col (str): The column in `data` to use for color grouping.
        label (str): The label for the x-axis.

    Returns:
        ax (matplotlib.axes.Axes): The axes with the plot.
    """
    sns.kdeplot(
        data,
        x=x_col,
        ax=ax,
        hue=hue_col,
        cumulative=True,
        common_norm=False,
        common_grid=False,
        palette="tab20",
    )
    ax.autoscale()
    ax.set_xlabel(label)
    ax.set_ylim(0, 1)
    return ax

plot_correlation

plot_correlation(
    ax,
    corr_df,
    cmap=sns.diverging_palette(240, 10, as_cmap=True),
)

Plots a heatmap of the correlation matrix.

Parameters:

  • ax (Axes) –

    The axes on which to plot the heatmap.

  • corr_df (DataFrame) –

    DataFrame containing the correlation data with three columns: two for the pairs of items and one for the correlation values.

  • cmap (Colormap, default: diverging_palette(240, 10, as_cmap=True) ) –

    Colormap to use for the heatmap. Default is a diverging palette from Seaborn.

Returns:

  • ax ( Axes ) –

    The Axes object with the heatmap.

Source code in mlpforecast/plot/visual_functions.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def plot_correlation(ax, corr_df, cmap=sns.diverging_palette(240, 10, as_cmap=True)):
    """
    Plots a heatmap of the correlation matrix.

    Args:
        ax (matplotlib.axes.Axes): The axes on which to plot the heatmap.
        corr_df (pandas.DataFrame): \
            DataFrame containing the correlation data with three columns: \
                two for the pairs of items and one for the correlation values.
        cmap (matplotlib.colors.Colormap, optional): \
            Colormap to use for the heatmap. Default is a diverging palette from Seaborn.

    Returns:
        ax (matplotlib.axes.Axes): The Axes object with the heatmap.
    """
    columns = list(corr_df.columns)
    corr = corr_df.pivot(index=columns[1], columns=columns[0], values=columns[-1])
    ax = sns.heatmap(corr.T, linewidths=0.5, cmap=cmap, center=0, annot=True, fmt=".1g")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90, horizontalalignment="right")
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, horizontalalignment="right")
    ax.set_title("")
    ax.set_ylabel("")
    ax.set_xlabel("")
    return ax

plot_distribution

plot_distribution(
    ax,
    df,
    index_col="HOUR",
    val_col="WindSpeed",
    hue_col=None,
)

Plot the distribution of a specified variable with mean and standard deviation bands.

This function creates a pivot table to calculate the mean and standard deviation of val_col grouped by index_col and optionally by hue_col. It then plots the mean values with bands representing one standard deviation above and below the mean.

Parameters:

  • ax (Axes) –

    The axes on which to plot.

  • df (DataFrame) –

    The data frame containing the data.

  • index_col (str, default: 'HOUR' ) –

    The column in df to use as the index for pivoting.

  • val_col (str, default: 'WindSpeed' ) –

    The column in df containing the values to plot.

  • hue_col (str, default: None ) –

    The column in df to use for color grouping.

Returns:

  • ax ( Axes ) –

    The axes with the plot.

Source code in mlpforecast/plot/visual_functions.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def plot_distribution(ax, df, index_col="HOUR", val_col="WindSpeed", hue_col=None):
    """
    Plot the distribution of a specified variable with mean and standard deviation bands.

    This function creates a pivot table to \
        calculate the mean and standard deviation of `val_col`
    grouped by `index_col` and optionally by `hue_col`. \
          It then plots the mean values with bands
    representing one standard deviation above and below the mean.

    Args:
        ax (matplotlib.axes.Axes): The axes on which to plot.
        df (pd.DataFrame): The data frame containing the data.
        index_col (str): The column in `df` to use as the index for pivoting.
        val_col (str): The column in `df` containing the values to plot.
        hue_col (str, optional): The column in `df` to use for color grouping.

    Returns:
        ax (matplotlib.axes.Axes): The axes with the plot.
    """
    # Calculate mean values for the pivot table
    mean_values = pd.pivot_table(
        df, index=index_col, values=val_col, columns=hue_col, aggfunc=np.mean
    )
    # Calculate standard deviation values for the pivot table
    std_dev = pd.pivot_table(
        df, index=index_col, values=val_col, columns=hue_col, aggfunc=np.std
    )

    # Plot the mean values
    mean_values.plot(ax=ax)

    # Fill the area between mean ± standard deviation
    ax.fill_between(
        np.arange(len(mean_values)),
        (mean_values - std_dev).min(axis=1),
        (mean_values + std_dev).max(axis=1),
        color="lightsteelblue",
        alpha=0.5,
    )

    # Set plot labels and title
    ax.set_xlabel(index_col)
    ax.set_ylabel(val_col)
    ax.set_title(f"Distribution of {val_col} by {index_col}")

    return ax

plot_kde_

plot_kde_(ax, data, x_col, hue_col, label)

Plot a Kernel Density Estimate (KDE) and histogram on the given axes.

Parameters:

  • ax (Axes) –

    The axes on which to plot.

  • data (DataFrame) –

    The data to plot.

  • x_col (str) –

    The column in data to plot on the x-axis.

  • hue_col (str) –

    The column in data to use for color grouping.

  • label (str) –

    The label for the x-axis.

Returns:

  • ax ( Axes ) –

    The axes with the plot.

Source code in mlpforecast/plot/visual_functions.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def plot_kde_(ax, data, x_col, hue_col, label):
    """
    Plot a Kernel Density Estimate (KDE) and histogram on the given axes.

    Args:
        ax (matplotlib.axes.Axes): The axes on which to plot.
        data (DataFrame): The data to plot.
        x_col (str): The column in `data` to plot on the x-axis.
        hue_col (str): The column in `data` to use for color grouping.
        label (str): The label for the x-axis.

    Returns:
        ax (matplotlib.axes.Axes): The axes with the plot.
    """
    sns.histplot(data, x=x_col, ax=ax, hue=hue_col, palette="tab20", kde=True)
    ax.autoscale()
    ax.set_xlabel(label)
    return ax

plot_prediction

plot_prediction(ax, true, mu, date=None, true_max=None)

Plots the true values and predicted values on the provided axes.

Parameters:

  • ax (Axes) –

    The axes on which to plot.

  • true (array - like) –

    The true values to be plotted.

  • mu (array - like) –

    The predicted values to be plotted.

  • date (array - like, default: None ) –

    The date or time values for the x-axis. If None, uses an array of indices.

  • true_max (float, default: None ) –

    The maximum value of the true values for scaling. If None, it is computed from true.

Returns:

  • ax ( Axes ) –

    The axes with the plot.

Source code in mlpforecast/plot/visual_functions.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def plot_prediction(ax, true, mu, date=None, true_max=None):
    """
    Plots the true values and predicted values on the provided axes.

    Args:
        ax (matplotlib.axes.Axes): The axes on which to plot.
        true (array-like): The true values to be plotted.
        mu (array-like): The predicted values to be plotted.
        date (array-like, optional): \
            The date or time values for the x-axis. If None, uses an array of indices.
        true_max (float, optional): \
            The maximum value of the true values for scaling. If None, it is computed from `true`.

    Returns:
        ax (matplotlib.axes.Axes): The axes with the plot.
    """
    # Set default date range if not provided
    date = np.arange(len(true)) if date is None else date

    # Plot true values
    (true_line,) = ax.plot(date, true, ".", mec="#ff7f0e", mfc="None", label="True")

    # Plot predicted values
    (pred_line,) = ax.plot(date, mu, c="#1f77b4", alpha=0.8, label="Pred")

    # Set y-axis label
    ax.set_ylabel("Power (W)")

    # Auto-scale the axes tightly
    ax.autoscale(tight=True)

    # Determine the maximum value for true values if not provided
    if true_max is None:
        true_max = np.max(true)

    # Return axes, line objects, and labels
    ax.legend(
        [true_line, pred_line],
        ["True", "Pred"],
        loc="upper center",
        bbox_to_anchor=(0.5, -0.15),
        ncol=3,
    )

    return ax

scatter_plot

scatter_plot(
    data,
    variables,
    targets,
    hue_col=None,
    n_sample=1000,
    random_state=111,
)

Creates a scatter plot matrix using Altair.

Parameters:

  • data (DataFrame) –

    The data to plot.

  • variables (list of str) –

    List of column names to be used as variables for the x-axis.

  • targets (list of str) –

    List of column names to be used as targets for the y-axis.

  • hue_col (str, default: None ) –

    Column name for the color encoding. Default is None.

  • n_sample (int, default: 1000 ) –

    Number of samples to draw from the data for plotting. Default is 1000.

  • random_state (int, default: 111 ) –

    Seed for random sampling. Default is 111.

Returns:

  • chart ( Chart ) –

    The Altair chart object with the scatter plot matrix.

Source code in mlpforecast/plot/visual_functions.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def scatter_plot(
    data, variables, targets, hue_col=None, n_sample=1000, random_state=111
):
    """
    Creates a scatter plot matrix using Altair.

    Args:
        data (pandas.DataFrame): The data to plot.
        variables (list of str): List of column names to be used as variables for the x-axis.
        targets (list of str): List of column names to be used as targets for the y-axis.
        hue_col (str, optional): Column name for the color encoding. Default is None.
        n_sample (int, optional): Number of samples to draw from the data for plotting. Default is 1000.
        random_state (int, optional): Seed for random sampling. Default is 111.

    Returns:
        chart (alt.Chart): The Altair chart object with the scatter plot matrix.
    """
    data = (
        data.sample(n=n_sample, random_state=random_state)
        if n_sample is not None
        else data
    )
    chart = alt.Chart(data)
    if hue_col is not None:
        chart = chart.mark_point(filled=True, opacity=0.7).encode(
            alt.X(
                alt.repeat("column"), type="quantitative", scale=alt.Scale(zero=False)
            ),
            alt.Y(alt.repeat("row"), type="quantitative", scale=alt.Scale(zero=False)),
            color=f"{hue_col}:N",
        )
    else:
        chart = chart.mark_point(filled=True, opacity=0.7).encode(
            alt.X(
                alt.repeat("column"), type="quantitative", scale=alt.Scale(zero=False)
            ),
            alt.Y(alt.repeat("row"), type="quantitative", scale=alt.Scale(zero=False)),
        )

    chart = (
        chart.properties(width=150, height=150)
        .repeat(row=[targets], column=variables)
        .configure_axis(grid=False, labelFontSize=12, titleFontSize=12)
        .configure_view(strokeOpacity=0)
    )
    return chart

visualise_timeseries_altair

visualise_timeseries_altair(
    data, y_col, figure_path=None, y_label="Power (kW)"
)

Visualizes time series data using Altair.

Parameters:

  • data (DataFrame) –

    The data to plot, with a datetime index and the columns to be plotted.

  • y_col (list of str) –

    List of column names to plot on the y-axis.

  • figure_path (str, default: None ) –

    Path to save the figure. If None, the figure is not saved. Default is None.

  • y_label (str, default: 'Power (kW)' ) –

    Label for the y-axis. Default is 'Power (kW)'.

Returns:

  • chart ( Chart ) –

    The Altair chart object with the time series plot.

Source code in mlpforecast/plot/visual_functions.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def visualise_timeseries_altair(data, y_col, figure_path=None, y_label="Power (kW)"):
    """
    Visualizes time series data using Altair.

    Args:
        data (pandas.DataFrame): The data to plot, with a datetime index and the columns to be plotted.
        y_col (list of str): List of column names to plot on the y-axis.
        figure_path (str, optional): Path to save the figure. If None, the figure is not saved. Default is None.
        y_label (str, optional): Label for the y-axis. Default is 'Power (kW)'.

    Returns:
        chart (alt.Chart): The Altair chart object with the time series plot.
    """
    chart = (
        alt.Chart(data.reset_index())
        .mark_point(filled=True, opacity=0.7)
        .encode(
            x=alt.X(
                "timestamp:T", scale=alt.Scale(zero=False), axis=alt.Axis(title="Date")
            ),
            y=alt.X(f"{y_col[0]}:Q", scale=alt.Scale(zero=False), title=y_label),
            color=alt.value(colors[0]),
        )
    )
    if len(y_col) > 1:
        for i in range(1, len(y_col)):
            chart += (
                alt.Chart(data.reset_index())
                .mark_point(filled=True, opacity=0.7)
                .encode(
                    x=alt.X("timestamp:T", axis=alt.Axis(title="Date")),
                    y=alt.X(f"{y_col[i]}:Q", title=y_label),
                    color=alt.value(colors[i]),
                )
            )
    chart = (
        chart.configure_axis(grid=False, labelFontSize=12, titleFontSize=12)
        .configure_view(strokeOpacity=0)
        .properties(width=900, height=100)
    )

    return chart