GroupedProphet Example Scripts

The scripts included below show the various options available for utilizing the GroupedProphet API. For an alternative view of these examples with data visualizations, see the notebooks here

Scripts

GroupedProphet Example
GroupedProphet Subset Group Prediction Example
Supplementary

GroupedProphet Example 

This script shows a simple example of training a series of Prophet models, saving a GroupedProphet instance, loading that instance, cross validating through backtesting, and generating a forecast for each group.

GroupedProphet Script

from diviner.utils.example_utils.example_data_generator import generate_example_data
from diviner import GroupedProphet


def execute_grouped_prophet():
    """
    This function call will generate synthetic group time series data in a normalized format.
    The structure will be of:

    ============ ====== =========== =========== ===========
    ds           y      group_key_1 group_key_2 group_key_3
    ============ ====== =========== =========== ===========
    "2016-02-01" 1234.5 A           B           C
    ============ ====== =========== =========== ===========

    With the grouping key values that are generated per ``ds`` and ``y`` values assigned in a
    non-deterministic fashion.

    For utililzation of this API, the normalized representation of the data is required, such that
    a particular target variables' data 'y' and the associated indexed datetime values in ``'ds'``
    are 'stacked' (unioned) from a more traditional denormalized data storage paradigm.

    For guidance on this data transposition from denormalized representations, see:
    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html
    """

    generated_data = generate_example_data(
        column_count=3,
        series_count=10,
        series_size=365 * 5,
        start_dt="2016-02-01",
        days_period=1,
    )

    # Extract the normalized grouped datetime series data
    training_data = generated_data.df

    # Extract the names of the grouping columns that define the unique series data
    grouping_key_columns = generated_data.key_columns

    # Create a GroupedProphet model instance
    grouped_model = GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
        training_data, grouping_key_columns
    )

    # Save the model to the local file system
    save_path = "/tmp/grouped_prophet.gpm"
    grouped_model.save(path="/tmp/grouped_prophet.gpm")

    # Load the model from the local storage location
    retrieved_model = GroupedProphet.load(save_path)

    # Score the model and print the results
    model_scores = retrieved_model.cross_validate_and_score(
        horizon="30 days",
        period="180 days",
        initial="365 days",
        parallel="threads",
        rolling_window=0.05,
        monthly=False,
    )

    print(f"Model scores:\n{model_scores.to_string()}")

    # Run a forecast for each group
    forecasts = retrieved_model.forecast(horizon=20, frequency="D")

    print(f"Forecasted data:\n{forecasts[:50].to_string()}")

    # Extract the parameters from each model for logging
    params = retrieved_model.extract_model_params()

    print(f"Model parameters:\n{params.to_string()}")


if __name__ == "__main__":
    execute_grouped_prophet()

GroupedProphet Subset Group Prediction Example 

This script shows a simple example of training a series of Prophet models and generating a group subset prediction.

GroupedProphet Subset Groups Script

from diviner.utils.example_utils.example_data_generator import generate_example_data
from diviner import GroupedProphet

if __name__ == "__main__":

    generated_data = generate_example_data(
        column_count=3,
        series_count=10,
        series_size=365 * 5,
        start_dt="2016-02-01",
        days_period=1,
    )

    # Extract the normalized grouped datetime series data
    training_data = generated_data.df

    # Extract the names of the grouping columns that define the unique series data
    group_key_columns = generated_data.key_columns

    # Create a GroupedProphet model instance
    grouped_model = GroupedProphet(n_changepoints=20, uncertainty_samples=0).fit(
        training_data, group_key_columns
    )

    # Get a subset of group keys to generate forecasts for
    group_df = training_data.copy()
    group_df["groups"] = list(zip(*[group_df[c] for c in group_key_columns]))
    distinct_groups = group_df["groups"].unique()
    groups_to_predict = list(distinct_groups[:3])

    print("-" * 65)
    print(f"\nUnique groups that have been modeled: \n{distinct_groups}\n")
    print(f"Subset of groups to generate predictions for: \n{groups_to_predict}\n")
    print("-" * 65)

    forecasts = grouped_model.predict_groups(
        groups=groups_to_predict,
        horizon=60,
        frequency="D",
        predict_col="forecast_values",
        on_error="warn",
    )

    print(f"\nForecast values:\n{forecasts.to_string()}")

Supplementary 

Note

To run these examples for yourself with the data generator example, utilize the following code:

Synthetic Data Generator

import itertools
import pandas as pd
import numpy as np
import string
import random
from datetime import timedelta, datetime
from collections import namedtuple


def _generate_time_series(series_size: int):
    residuals = np.random.lognormal(
        mean=np.random.uniform(low=0.5, high=3.0),
        sigma=np.random.uniform(low=0.6, high=0.98),
        size=series_size,
    )
    trend = [
        np.polyval([23.0, 1.0, 5], x)
        for x in np.linspace(start=0, stop=np.random.randint(low=0, high=4), num=series_size)
    ]
    seasonality = [
        90 * np.sin(2 * np.pi * 1000 * (i / (series_size * 200))) + 40
        for i in np.arange(0, series_size)
    ]

    return residuals + trend + seasonality + np.random.uniform(low=20.0, high=1000.0)


def _generate_grouping_columns(column_count: int, series_count: int):
    candidate_list = list(string.ascii_uppercase)
    candidates = random.sample(
        list(itertools.permutations(candidate_list, column_count)), series_count
    )
    column_names = sorted([f"key{x}" for x in range(column_count)], reverse=True)
    return [dict(zip(column_names, entries)) for entries in candidates]


def _generate_raw_df(
    column_count: int,
    series_count: int,
    series_size: int,
    start_dt: str,
    days_period: int,
):
    candidates = _generate_grouping_columns(column_count, series_count)
    start_date = datetime.strptime(start_dt, "%Y-%M-%d")
    dates = np.arange(
        start_date,
        start_date + timedelta(days=series_size * days_period),
        timedelta(days=days_period),
    )
    df_collection = []
    for entry in candidates:
        generated_series = _generate_time_series(series_size)
        series_dict = {"ds": dates, "y": generated_series}
        series_df = pd.DataFrame.from_dict(series_dict)
        for column, value in entry.items():
            series_df[column] = value
        df_collection.append(series_df)
    return pd.concat(df_collection)


def generate_example_data(
    column_count: int,
    series_count: int,
    series_size: int,
    start_dt: str,
    days_period: int = 1,
):

    Structure = namedtuple("Structure", "df key_columns")
    data = _generate_raw_df(column_count, series_count, series_size, start_dt, days_period)
    key_columns = list(data.columns)

    for key in ["ds", "y"]:
        key_columns.remove(key)

    return Structure(data, key_columns)

GroupedProphet Example Scripts