From 27e0a511a4bb91b024fd61e8630f5aa1a1db9d51 Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:07:12 -0500 Subject: [PATCH 1/6] fix attribute that caused error --- jobs/kpi-forecasting/kpi_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/kpi-forecasting/kpi_forecasting.py b/jobs/kpi-forecasting/kpi_forecasting.py index e340a567..12544b3e 100644 --- a/jobs/kpi-forecasting/kpi_forecasting.py +++ b/jobs/kpi-forecasting/kpi_forecasting.py @@ -71,7 +71,7 @@ def get_predict_dates(self, observed_df): or self._default_end_date() ) return pd.DataFrame( - {"submission_date": pd.date_range(start_date, end_date).date} + {"submission_date": pd.date_range(self.start_date, self.end_date).date} ) def fit(self, observed_df): From 85037e3a6bb88b6d4674b25a2b1dce874c780d90 Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:30:08 -0500 Subject: [PATCH 2/6] add integration test to ensure code runs end-to-end --- .circleci/config.yml | 5 +++++ jobs/kpi-forecasting/kpi_forecasting.py | 9 ++++----- jobs/kpi-forecasting/kpi_forecasting/inputs.py | 7 +++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5a7c497c..477fbd4c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -258,6 +258,11 @@ jobs: - run: name: Test Code command: docker run app:build pytest --ruff --ruff-format + - run: + name: integration test for kpi + command: | + docker un app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_mobile.yaml --no-write + docker un app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_desktop.yaml --no-write build-job-mozaggregator2bq: diff --git a/jobs/kpi-forecasting/kpi_forecasting.py b/jobs/kpi-forecasting/kpi_forecasting.py index 12544b3e..2cd0292e 100644 --- a/jobs/kpi-forecasting/kpi_forecasting.py +++ b/jobs/kpi-forecasting/kpi_forecasting.py @@ -1,7 +1,6 @@ import pandas as pd from datetime import datetime, timezone, timedelta import json -import pickle from kpi_forecasting.inputs import CLI, load_yaml from kpi_forecasting.models.prophet_forecast import ( @@ -160,6 +159,8 @@ def _default_end_date(self) -> str: def main() -> None: # Load the config config_path = CLI().args.config + will_write = CLI().args.write + print(will_write) pipeline = KPIPipeline(config_path) @@ -169,10 +170,8 @@ def main() -> None: summarized = pipeline.predict_and_summarize( fit_model, predict_dates.copy(), observed_df ) - pipeline.write_results(fit_model, summarized, predict_dates.copy()) - - with open("main_model.pkl", "wb") as f: - pickle.dump(fit_model, f) + if will_write: + pipeline.write_results(fit_model, summarized, predict_dates.copy()) if __name__ == "__main__": diff --git a/jobs/kpi-forecasting/kpi_forecasting/inputs.py b/jobs/kpi-forecasting/kpi_forecasting/inputs.py index 14da5545..4271e47e 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/inputs.py +++ b/jobs/kpi-forecasting/kpi_forecasting/inputs.py @@ -16,6 +16,13 @@ def __post_init__(self) -> None: self.parser.add_argument( "-c", "--config", type=str, help="Path to configuration yaml file" ) + self.parser.add_argument( + "--write", + type=bool, + help="If true, write results", + default=True, + action=argparse.BooleanOptionalAction, + ) self.args = self.parser.parse_args() From 24f6b34d41de696a945623260c57f96c10ee392b Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:33:33 -0500 Subject: [PATCH 3/6] fix typo --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 477fbd4c..67152e2a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -261,8 +261,8 @@ jobs: - run: name: integration test for kpi command: | - docker un app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_mobile.yaml --no-write - docker un app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_desktop.yaml --no-write + docker run app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_mobile.yaml --no-write + docker run app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_desktop.yaml --no-write build-job-mozaggregator2bq: From 685e92acc249749e58712fbfb82f00502bc4ca61 Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:40:37 -0500 Subject: [PATCH 4/6] revert CI --- .circleci/config.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 67152e2a..5a7c497c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -258,11 +258,6 @@ jobs: - run: name: Test Code command: docker run app:build pytest --ruff --ruff-format - - run: - name: integration test for kpi - command: | - docker run app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_mobile.yaml --no-write - docker run app:build python ./kpi_forecasting.py -c ./kpi_forecasting/configs/dau_desktop.yaml --no-write build-job-mozaggregator2bq: From 184675e3910fd7e91f7ee5382b4ec5138eddc55f Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:42:41 -0500 Subject: [PATCH 5/6] updated README --- jobs/kpi-forecasting/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jobs/kpi-forecasting/README.md b/jobs/kpi-forecasting/README.md index 2f4c51e4..cffc3299 100644 --- a/jobs/kpi-forecasting/README.md +++ b/jobs/kpi-forecasting/README.md @@ -66,8 +66,7 @@ install_name_tool -add_rpath /PATH/TO/CONDA/envs/kpi-forecasting-dev/lib/cmdstan ### Running locally A metric can be forecasted by using a command line argument that passes the relevant YAML file to the `kpi_forecasting.py` script. -[Here are approaches for accessing a Docker container's terminal](https://docs.docker.com/desktop/use-desktop/container/#integrated-terminal). - +[Here are approaches for accessing a Docker container's terminal](https://docs.docker.com/desktop/use-desktop/container/#integrated-terminal). The `--no-write` argument can also be passed to essentially run a test and ensure the pipeline runs end-to-end. For example, the following command forecasts Desktop DAU numbers: ```sh @@ -182,3 +181,8 @@ The forecast objects in this repo implement an interface similar to `sklearn` or The `BaseEnsembleForecast` makes it possible to fit multiple models over the data, where different subsets of the data have different models applied to them. These subsets are referred to as "segments" in the code. Only one kind of model is supported, and different instances of this model are fit over the different segments. The type of model is set by the `model_class` argument, and should be a class that implements the same interface as `BaseForecast`. The `fit` and `predict` methods in `BaseEnsembleForecast` determine which segment each row of incoming data belongs to and uses the `fit` and `predict` methods of the model class on the segment. This can be seen in the `FunnelForecast` object, which uses the `BaseEnsembleForecast` with `ProphetAutotunerForecast` as the model_class. +## Testing +Before merging, run the pipeline with the `--no-write` flag to ensure it runs end-to-end, IE: + +`python ./kpi_forecasting.py --no-write -c ./kpi_forecasting/configs/dau_mobile.yaml` + From 34fb8e0722a6b345cf97018b8db4ff7ceab5de05 Mon Sep 17 00:00:00 2001 From: Jared Snyder Date: Wed, 9 Oct 2024 11:45:41 -0500 Subject: [PATCH 6/6] remove print --- jobs/kpi-forecasting/kpi_forecasting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jobs/kpi-forecasting/kpi_forecasting.py b/jobs/kpi-forecasting/kpi_forecasting.py index 2cd0292e..a5ee32ce 100644 --- a/jobs/kpi-forecasting/kpi_forecasting.py +++ b/jobs/kpi-forecasting/kpi_forecasting.py @@ -160,7 +160,6 @@ def main() -> None: # Load the config config_path = CLI().args.config will_write = CLI().args.write - print(will_write) pipeline = KPIPipeline(config_path)