GroupedProphet Example Scripts

The scripts included below show the various options available for utilizing the GroupedProphet API. For an alternative view of these examples with data visualizations, see the notebooks here

GroupedProphet Example

This script shows a simple example of training a series of Prophet models, saving a GroupedProphet instance, loading that instance, cross validating through backtesting, and generating a forecast for each group.

GroupedProphet Script
 1from diviner.utils.example_utils.example_data_generator import generate_example_data
 2from diviner import GroupedProphet
 3
 4
 5def execute_grouped_prophet():
 6    """
 7    This function call will generate synthetic group time series data in a normalized format.
 8    The structure will be of:
 9
10    ============ ====== =========== =========== ===========
11    ds           y      group_key_1 group_key_2 group_key_3
12    ============ ====== =========== =========== ===========
13    "2016-02-01" 1234.5 A           B           C
14    ============ ====== =========== =========== ===========
15
16    With the grouping key values that are generated per ``ds`` and ``y`` values assigned in a
17    non-deterministic fashion.
18
19    For utililzation of this API, the normalized representation of the data is required, such that
20    a particular target variables' data 'y' and the associated indexed datetime values in ``'ds'``
21    are 'stacked' (unioned) from a more traditional denormalized data storage paradigm.
22
23    For guidance on this data transposition from denormalized representations, see:
24    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html
25    """
26
27    generated_data = generate_example_data(
28        column_count=3,
29        series_count=10,
30        series_size=365 * 5,
31        start_dt="2016-02-01",
32        days_period=1,
33    )
34
35    # Extract the normalized grouped datetime series data
36    training_data = generated_data.df
37
38    # Extract the names of the grouping columns that define the unique series data
39    grouping_key_columns = generated_data.key_columns
40
41    # Create a GroupedProphet model instance
42    grouped_model = GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
43        training_data, grouping_key_columns
44    )
45
46    # Save the model to the local file system
47    save_path = "/tmp/grouped_prophet.gpm"
48    grouped_model.save(path="/tmp/grouped_prophet.gpm")
49
50    # Load the model from the local storage location
51    retrieved_model = GroupedProphet.load(save_path)
52
53    # Score the model and print the results
54    model_scores = retrieved_model.cross_validate_and_score(
55        horizon="30 days",
56        period="180 days",
57        initial="365 days",
58        parallel="threads",
59        rolling_window=0.05,
60        monthly=False,
61    )
62
63    print(f"Model scores:\n{model_scores.to_string()}")
64
65    # Run a forecast for each group
66    forecasts = retrieved_model.forecast(horizon=20, frequency="D")
67
68    print(f"Forecasted data:\n{forecasts[:50].to_string()}")
69
70    # Extract the parameters from each model for logging
71    params = retrieved_model.extract_model_params()
72
73    print(f"Model parameters:\n{params.to_string()}")
74
75
76if __name__ == "__main__":
77    execute_grouped_prophet()

GroupedProphet Subset Group Prediction Example

This script shows a simple example of training a series of Prophet models and generating a group subset prediction.

GroupedProphet Subset Groups Script
 1from diviner.utils.example_utils.example_data_generator import generate_example_data
 2from diviner import GroupedProphet
 3
 4if __name__ == "__main__":
 5
 6    generated_data = generate_example_data(
 7        column_count=3,
 8        series_count=10,
 9        series_size=365 * 5,
10        start_dt="2016-02-01",
11        days_period=1,
12    )
13
14    # Extract the normalized grouped datetime series data
15    training_data = generated_data.df
16
17    # Extract the names of the grouping columns that define the unique series data
18    group_key_columns = generated_data.key_columns
19
20    # Create a GroupedProphet model instance
21    grouped_model = GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
22        training_data, group_key_columns
23    )
24
25    # Get a subset of group keys to generate forecasts for
26    group_df = training_data.copy()
27    group_df["groups"] = list(zip(*[group_df[c] for c in group_key_columns]))
28    distinct_groups = group_df["groups"].unique()
29    groups_to_predict = list(distinct_groups[:3])
30
31    print("-" * 65)
32    print(f"\nUnique groups that have been modeled: \n{distinct_groups}\n")
33    print(f"Subset of groups to generate predictions for: \n{groups_to_predict}\n")
34    print("-" * 65)
35
36    forecasts = grouped_model.predict_groups(
37        groups=groups_to_predict,
38        horizon=60,
39        frequency="D",
40        predict_col="forecast_values",
41        on_error="warn",
42    )
43
44    print(f"\nForecast values:\n{forecasts.to_string()}")

Supplementary

Note

To run these examples for yourself with the data generator example, utilize the following code:

Synthetic Data Generator
 1import itertools
 2import pandas as pd
 3import numpy as np
 4import string
 5import random
 6from datetime import timedelta, datetime
 7from collections import namedtuple
 8
 9
10def _generate_time_series(series_size: int):
11    residuals = np.random.lognormal(
12        mean=np.random.uniform(low=0.5, high=3.0),
13        sigma=np.random.uniform(low=0.6, high=0.98),
14        size=series_size,
15    )
16    trend = [
17        np.polyval([23.0, 1.0, 5], x)
18        for x in np.linspace(start=0, stop=np.random.randint(low=0, high=4), num=series_size)
19    ]
20    seasonality = [
21        90 * np.sin(2 * np.pi * 1000 * (i / (series_size * 200))) + 40
22        for i in np.arange(0, series_size)
23    ]
24
25    return residuals + trend + seasonality + np.random.uniform(low=20.0, high=1000.0)
26
27
28def _generate_grouping_columns(column_count: int, series_count: int):
29    candidate_list = list(string.ascii_uppercase)
30    candidates = random.sample(
31        list(itertools.permutations(candidate_list, column_count)), series_count
32    )
33    column_names = sorted([f"key{x}" for x in range(column_count)], reverse=True)
34    return [dict(zip(column_names, entries)) for entries in candidates]
35
36
37def _generate_raw_df(
38    column_count: int,
39    series_count: int,
40    series_size: int,
41    start_dt: str,
42    days_period: int,
43):
44    candidates = _generate_grouping_columns(column_count, series_count)
45    start_date = datetime.strptime(start_dt, "%Y-%M-%d")
46    dates = np.arange(
47        start_date,
48        start_date + timedelta(days=series_size * days_period),
49        timedelta(days=days_period),
50    )
51    df_collection = []
52    for entry in candidates:
53        generated_series = _generate_time_series(series_size)
54        series_dict = {"ds": dates, "y": generated_series}
55        series_df = pd.DataFrame.from_dict(series_dict)
56        for column, value in entry.items():
57            series_df[column] = value
58        df_collection.append(series_df)
59    return pd.concat(df_collection)
60
61
62def generate_example_data(
63    column_count: int,
64    series_count: int,
65    series_size: int,
66    start_dt: str,
67    days_period: int = 1,
68):
69
70    Structure = namedtuple("Structure", "df key_columns")
71    data = _generate_raw_df(column_count, series_count, series_size, start_dt, days_period)
72    key_columns = list(data.columns)
73
74    for key in ["ds", "y"]:
75        key_columns.remove(key)
76
77    return Structure(data, key_columns)