From f3053cc71b73f005563a8ccc6ab5a284a1a24ea2 Mon Sep 17 00:00:00 2001 From: Jon Bannister Date: Wed, 14 Jan 2026 09:45:30 +0000 Subject: [PATCH 1/4] Add standalone scheduler process for improved reliability The scheduler can now run as a separate process instead of a background thread in the webapp. This allows the scheduler to be restarted independently in Kubernetes deployments without affecting the webapp. New CLI command: notebooker-cli start-scheduler New webapp flag: --scheduler-management-only When using the standalone scheduler, the webapp should be started with --scheduler-management-only so it can manage jobs without executing them. --- CLAUDE.md | 120 ++++++++++++++++++++++++ docs/webapp/webapp.rst | 47 ++++++++++ notebooker/_entrypoints.py | 52 ++++++++++ notebooker/scheduler_core.py | 108 +++++++++++++++++++++ notebooker/settings.py | 1 + notebooker/standalone_scheduler.py | 88 +++++++++++++++++ notebooker/web/app.py | 28 ++---- tests/integration/web/test_app.py | 23 ++++- tests/unit/test_scheduler_core.py | 120 ++++++++++++++++++++++++ tests/unit/test_standalone_scheduler.py | 103 ++++++++++++++++++++ 10 files changed, 670 insertions(+), 20 deletions(-) create mode 100644 CLAUDE.md create mode 100644 notebooker/scheduler_core.py create mode 100644 notebooker/standalone_scheduler.py create mode 100644 tests/unit/test_scheduler_core.py create mode 100644 tests/unit/test_standalone_scheduler.py diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..7193c93f --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,120 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Notebooker is a production system for executing and scheduling Jupyter Notebooks as parametrized reports. It converts notebooks (stored as .py files via Jupytext) into web-based reports with results stored in MongoDB. + +## Common Commands + +### Python Development +```bash +# Install in development mode +pip install -e ".[test]" + +# Install kernel for notebook execution +python -m ipykernel install --user --name=notebooker_kernel + +# Run tests (requires MongoDB) +pytest -svvvvv --junitxml=test-results/junit.xml + +# Code quality +flake8 notebooker tests +black --check -l 120 notebooker tests + +# Build docs +pip install -e ".[docs]" +sphinx-build -b html docs/ build/sphinx/html +``` + +### JavaScript Development +```bash +cd notebooker/web/static/ + +yarn install --frozen-lockfile +yarn run lint # ESLint +yarn run format # Prettier +yarn run bundle # Browserify scheduler.js +yarn test # Jest +``` + +### Quick Demo +```bash +cd docker && docker-compose up +# Access at http://localhost:8080/ +``` + +## Architecture + +### Core Components + +- **`notebooker/execute_notebook.py`** - Notebook execution engine using Papermill +- **`notebooker/_entrypoints.py`** - Click-based CLI (`notebooker-cli`) +- **`notebooker/web/app.py`** - Flask webapp with Gevent WSGI server +- **`notebooker/serialization/`** - Storage backend interfaces +- **`notebooker/serializers/`** - MongoDB implementation (PyMongoResultSerializer) + +### Entry Points +- `notebooker-cli` - Main CLI with subcommands: `start-webapp`, `execute-notebook`, `cleanup-old-reports` +- `notebooker_execute` - Docker-compatible entrypoint +- `notebooker_template_sanity_check` - Template validation +- `notebooker_template_regression_test` - Regression testing + +### Execution Flow +1. Templates stored as .py files (Jupytext format) in git +2. Converted to .ipynb via `generate_ipynb_from_py()` +3. Executed with Papermill using parameters +4. Output converted to HTML/PDF via nbconvert +5. Results stored in MongoDB with GridFS for large files + +### Web App Routes +- `/run_report/` - Execute notebooks +- `/results/` - Serve completed reports +- `/pending/` - Monitor running reports +- `/scheduler/` - Schedule management + +### Template Parameters +Define parameters in templates using the Jupytext tag format: +```python +# + {"tags": ["parameters"]} +param_name = "default_value" +``` + +## Key Configuration + +- `NOTEBOOK_KERNEL_NAME` - Kernel for execution (default: `notebooker_kernel`) +- `PY_TEMPLATE_BASE_DIR` - Git repo containing templates +- `SERIALIZER_CLS` / `SERIALIZER_CONFIG` - Storage backend config +- `NOTEBOOKER_DISABLE_GIT` - Skip git pulls during execution +- `SCHEDULER_MANAGEMENT_ONLY` - Webapp manages jobs but doesn't execute them (use with standalone scheduler) + +## Standalone Scheduler + +The scheduler can run as a standalone process instead of a background thread in the webapp: + +```bash +# Webapp (manages jobs, doesn't execute) +notebooker-cli start-webapp --scheduler-management-only + +# Standalone scheduler (executes jobs) +notebooker-cli start-scheduler +``` + +Key files: `scheduler_core.py` (shared infrastructure), `standalone_scheduler.py` (standalone process) + +## Version Consistency + +When bumping versions, update all of: +- `notebooker/version.py` +- `CHANGELOG.md` +- `docs/conf.py` +- `notebooker/web/static/package.json` + +## Testing + +Tests require MongoDB. The test suite uses pytest-server-fixtures for MongoDB test servers. Test directories: +- `tests/unit/` - Unit tests +- `tests/integration/` - Integration tests +- `tests/regression/` - Regression tests +- `tests/sanity/` - Sanity checks diff --git a/docs/webapp/webapp.rst b/docs/webapp/webapp.rst index d26edcdd..770f3504 100644 --- a/docs/webapp/webapp.rst +++ b/docs/webapp/webapp.rst @@ -181,3 +181,50 @@ environments or where the reports can reveal sensitive data if misconfigured. Please note that read-only mode does not change the functionality of the scheduler; users will still be able to modify schedules and it will execute as intended. To disable the scheduler you can add :code:`--disable-scheduler` to the command line arguments of the webapp; likewise git pulls can be prevented by using :code:`--disable-git`. + + +Standalone scheduler +-------------------- + +.. note:: + Available from version 0.8.0 onwards. + +By default, the scheduler runs as a background thread within the webapp process. While convenient, +this approach has a drawback: if the scheduler thread dies, the only way to recover is to restart +the entire webapp. + +For production deployments, especially in Kubernetes, you can run the scheduler as a standalone +process. This allows the scheduler to be restarted independently without affecting the webapp, +improving reliability. + +**Starting the standalone scheduler:** + +.. code-block:: bash + + notebooker-cli start-scheduler --mongo-host localhost:27017 + +**Starting the webapp in management-only mode:** + +When using a standalone scheduler, the webapp should be started with :code:`--scheduler-management-only`. +This allows users to create, update, and delete scheduled jobs via the UI, but the webapp won't +execute them - that's handled by the standalone scheduler. + +.. code-block:: bash + + notebooker-cli start-webapp --scheduler-management-only --mongo-host localhost:27017 + +**Deployment configuration:** + ++---------------------------+----------------------------------------+-----------------------------------+ +| Deployment | Webapp flags | Scheduler process | ++===========================+========================================+===================================+ +| Traditional (default) | (none) | Not needed | ++---------------------------+----------------------------------------+-----------------------------------+ +| Standalone scheduler | :code:`--scheduler-management-only` | :code:`start-scheduler` | ++---------------------------+----------------------------------------+-----------------------------------+ +| No scheduling | :code:`--disable-scheduler` | Not needed | ++---------------------------+----------------------------------------+-----------------------------------+ + +.. warning:: + Only run one scheduler process at a time. Running multiple schedulers won't corrupt data + (APScheduler uses MongoDB locking), but may cause inefficiencies. diff --git a/notebooker/_entrypoints.py b/notebooker/_entrypoints.py index 631fcc87..13cf2443 100644 --- a/notebooker/_entrypoints.py +++ b/notebooker/_entrypoints.py @@ -11,6 +11,7 @@ from notebooker.serialization import SERIALIZER_TO_CLI_OPTIONS from notebooker.settings import BaseConfig, WebappConfig from notebooker.snapshot import snap_latest_successful_notebooks +from notebooker.standalone_scheduler import run_standalone_scheduler from notebooker.utils.cleanup import delete_old_reports from notebooker.web.app import main @@ -137,6 +138,13 @@ def base_notebooker( help="If --disable-scheduler is given, then the scheduling back-end of the webapp will not start up. It will also " "not display the scheduler from the front-end of the webapp.", ) +@click.option( + "--scheduler-management-only", + default=False, + is_flag=True, + help="If --scheduler-management-only is given, the webapp can create/update/delete scheduled jobs but will not " + "execute them. Use this when running a separate standalone scheduler process.", +) @click.option( "--scheduler-mongo-database", default="", @@ -164,6 +172,7 @@ def start_webapp( debug, base_cache_dir, disable_scheduler, + scheduler_management_only, scheduler_mongo_database, scheduler_mongo_collection, readonly_mode, @@ -174,12 +183,55 @@ def start_webapp( web_config.DEBUG = debug web_config.CACHE_DIR = base_cache_dir web_config.DISABLE_SCHEDULER = disable_scheduler + web_config.SCHEDULER_MANAGEMENT_ONLY = scheduler_management_only web_config.SCHEDULER_MONGO_DATABASE = scheduler_mongo_database web_config.SCHEDULER_MONGO_COLLECTION = scheduler_mongo_collection web_config.READONLY_MODE = readonly_mode return main(web_config) +@base_notebooker.command() +@click.option("--logging-level", default="INFO", help="The logging level. Set to DEBUG for lots of extra info.") +@click.option( + "--scheduler-mongo-database", + default="", + help="The name of the mongo database which is used for the scheduler. " + "Defaults to the same as the serializer's mongo database.", +) +@click.option( + "--scheduler-mongo-collection", + default="", + help="The name of the mongo collection for the scheduler. " + "Defaults to the same as the serializer's mongo collection + '_scheduler'.", +) +@pass_config +def start_scheduler( + config: BaseConfig, + logging_level, + scheduler_mongo_database, + scheduler_mongo_collection, +): + """ + Start the scheduler as a standalone process. + + Use this when you want to run the scheduler separately from the webapp, + for example in a Kubernetes deployment where the scheduler can be + restarted independently. + + The webapp should be started with --scheduler-management-only when + using a standalone scheduler. + """ + import logging + logging.basicConfig(level=logging.getLevelName(logging_level)) + + # Copy config and add scheduler-specific settings + scheduler_config = BaseConfig.copy_existing(config) + scheduler_config.SCHEDULER_MONGO_DATABASE = scheduler_mongo_database + scheduler_config.SCHEDULER_MONGO_COLLECTION = scheduler_mongo_collection + + return run_standalone_scheduler(scheduler_config) + + @base_notebooker.command() @click.option("--report-name", help="The name of the template to execute, relative to the template directory.") @click.option( diff --git a/notebooker/scheduler_core.py b/notebooker/scheduler_core.py new file mode 100644 index 00000000..66eb7785 --- /dev/null +++ b/notebooker/scheduler_core.py @@ -0,0 +1,108 @@ +""" +Shared scheduler infrastructure for Notebooker. + +This module provides common functions for setting up the APScheduler-based +job scheduler, used by both the webapp (in-process or management-only mode) +and the standalone scheduler process. +""" +import logging +from typing import Dict, Any + +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.jobstores.mongodb import MongoDBJobStore + +from notebooker.serialization.mongo import MongoResultSerializer +from notebooker.serialization.serialization import get_serializer_from_cls +from notebooker.settings import BaseConfig + +logger = logging.getLogger(__name__) + + +def get_jobstore_config(config: BaseConfig) -> Dict[str, Any]: + """ + Extract MongoDB jobstore configuration from the serializer config. + + Parameters + ---------- + config : BaseConfig + The notebooker configuration containing serializer settings. + + Returns + ------- + dict + A dictionary containing: + - 'client': The MongoDB client instance + - 'database': The database name for the scheduler + - 'collection': The collection name for the scheduler + + Raises + ------ + ValueError + If the serializer is not a MongoResultSerializer. + """ + serializer = get_serializer_from_cls(config.SERIALIZER_CLS, **config.SERIALIZER_CONFIG) + if not isinstance(serializer, MongoResultSerializer): + raise ValueError( + "We cannot support scheduling if we are not using a Mongo Result serializer, " + "since we re-use the connection details from the serializer to store metadata " + "about scheduling." + ) + + client = serializer.get_mongo_connection() + + # Allow config overrides for database/collection, with sensible defaults + scheduler_db = getattr(config, "SCHEDULER_MONGO_DATABASE", "") or serializer.database_name + scheduler_collection = ( + getattr(config, "SCHEDULER_MONGO_COLLECTION", "") + or f"{serializer.result_collection_name}_scheduler" + ) + + return { + "client": client, + "database": scheduler_db, + "collection": scheduler_collection, + } + + +def create_scheduler(jobstore_config: Dict[str, Any], paused: bool = False) -> BackgroundScheduler: + """ + Create and start a BackgroundScheduler with MongoDB jobstore. + + Parameters + ---------- + jobstore_config : dict + Configuration from get_jobstore_config() containing client, database, collection. + paused : bool, optional + If True, the scheduler is started but immediately paused. This allows + job CRUD operations to work without actually executing jobs. Useful for + the webapp when running in management-only mode. Default is False. + + Returns + ------- + BackgroundScheduler + A started (and optionally paused) scheduler instance. + """ + jobstores = { + "mongo": MongoDBJobStore( + database=jobstore_config["database"], + collection=jobstore_config["collection"], + client=jobstore_config["client"], + ) + } + + scheduler = BackgroundScheduler( + jobstores=jobstores, + job_defaults={"misfire_grace_time": 60 * 60}, # 1 hour grace time + ) + + scheduler.start() + + if paused: + scheduler.pause() + logger.info("Scheduler started in paused (management-only) mode") + else: + logger.info("Scheduler started") + + scheduler.print_jobs() + + return scheduler diff --git a/notebooker/settings.py b/notebooker/settings.py index daf2d01e..1e6b1ec9 100644 --- a/notebooker/settings.py +++ b/notebooker/settings.py @@ -61,4 +61,5 @@ class WebappConfig(BaseConfig): SCHEDULER_MONGO_DATABASE: str = "" SCHEDULER_MONGO_COLLECTION: str = "" DISABLE_SCHEDULER: bool = False + SCHEDULER_MANAGEMENT_ONLY: bool = False # When True, job CRUD works but jobs aren't executed READONLY_MODE: bool = False diff --git a/notebooker/standalone_scheduler.py b/notebooker/standalone_scheduler.py new file mode 100644 index 00000000..29446d2b --- /dev/null +++ b/notebooker/standalone_scheduler.py @@ -0,0 +1,88 @@ +""" +Standalone scheduler process for Notebooker. + +This module provides a standalone scheduler that can run separately from +the webapp, allowing for better reliability in Kubernetes deployments. +When the scheduler runs as a separate process, it can be restarted +independently without affecting the webapp. + +Usage: + notebooker-cli start-scheduler [OPTIONS] + +The webapp should be started with --scheduler-management-only when using +a standalone scheduler, so that it can manage jobs without executing them. +""" +import logging +import signal +import sys +import time + +from notebooker.scheduler_core import get_jobstore_config, create_scheduler +from notebooker.settings import BaseConfig + +logger = logging.getLogger(__name__) + +# Global reference to scheduler for signal handler +_scheduler = None + + +def _shutdown_handler(signum, frame): + """Handle SIGTERM/SIGINT for graceful shutdown.""" + global _scheduler + sig_name = signal.Signals(signum).name + logger.info(f"Received {sig_name}, shutting down scheduler...") + + if _scheduler is not None: + try: + _scheduler.shutdown(wait=True) + logger.info("Scheduler shutdown complete") + except Exception as e: + logger.error(f"Error during scheduler shutdown: {e}") + + sys.exit(0) + + +def run_standalone_scheduler(config: BaseConfig): + """ + Run the scheduler as a standalone process. + + This function: + 1. Sets up the GLOBAL_CONFIG for run_report() to use + 2. Creates and starts the scheduler with MongoDB jobstore + 3. Registers signal handlers for graceful shutdown + 4. Keeps the process alive until terminated + + Parameters + ---------- + config : BaseConfig + The notebooker configuration containing serializer settings and + scheduler configuration (SCHEDULER_MONGO_DATABASE, SCHEDULER_MONGO_COLLECTION). + """ + global _scheduler + + # Set up GLOBAL_CONFIG so run_report() can access it + # This is needed because scheduled jobs call run_report() which + # relies on GLOBAL_CONFIG being set + from notebooker.web import app as app_module + app_module.GLOBAL_CONFIG = config + + logger.info("Starting standalone scheduler...") + + # Get jobstore configuration from serializer + jobstore_config = get_jobstore_config(config) + + # Create and start scheduler (not paused - we want to execute jobs) + _scheduler = create_scheduler(jobstore_config, paused=False) + + # Register signal handlers for graceful shutdown + signal.signal(signal.SIGTERM, _shutdown_handler) + signal.signal(signal.SIGINT, _shutdown_handler) + + logger.info("Standalone scheduler is running. Press Ctrl+C to stop.") + + # Keep the process alive + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + _shutdown_handler(signal.SIGINT, None) diff --git a/notebooker/web/app.py b/notebooker/web/app.py index c7aaeb7d..08a95e1e 100644 --- a/notebooker/web/app.py +++ b/notebooker/web/app.py @@ -6,13 +6,11 @@ import sys import time -from apscheduler.schedulers.background import BackgroundScheduler -from apscheduler.jobstores.mongodb import MongoDBJobStore from flask import Flask from gevent.pywsgi import WSGIServer from notebooker.constants import CANCEL_MESSAGE, JobStatus -from notebooker.serialization.mongo import MongoResultSerializer +from notebooker.scheduler_core import get_jobstore_config, create_scheduler from notebooker.serialization.serialization import initialize_serializer_from_config, get_serializer_from_cls from notebooker.settings import WebappConfig from notebooker.utils.filesystem import _cleanup_dirs, initialise_base_dirs @@ -99,21 +97,15 @@ def setup_scheduler(flask_app, web_config): if web_config.DISABLE_SCHEDULER: flask_app.apscheduler = None return flask_app - serializer = get_serializer_from_cls(web_config.SERIALIZER_CLS, **web_config.SERIALIZER_CONFIG) - if isinstance(serializer, MongoResultSerializer): - client = serializer.get_mongo_connection() - database = web_config.SCHEDULER_MONGO_DATABASE or serializer.database_name - collection = web_config.SCHEDULER_MONGO_COLLECTION or f"{serializer.result_collection_name}_scheduler" - jobstores = {"mongo": MongoDBJobStore(database=database, collection=collection, client=client)} - else: - raise ValueError( - "We cannot support scheduling if we are not using a Mongo Result serializer, " - "since we re-use the connection details from the serializer to store metadata " - "about scheduling." - ) - scheduler = BackgroundScheduler(jobstores=jobstores, job_defaults={"misfire_grace_time": 60 * 60}) - scheduler.start() - scheduler.print_jobs() + + jobstore_config = get_jobstore_config(web_config) + + # In management-only mode, the scheduler is paused so jobs can be + # created/updated/deleted but won't be executed. Use this when running + # a separate standalone scheduler process. + paused = getattr(web_config, "SCHEDULER_MANAGEMENT_ONLY", False) + + scheduler = create_scheduler(jobstore_config, paused=paused) flask_app.apscheduler = scheduler return flask_app diff --git a/tests/integration/web/test_app.py b/tests/integration/web/test_app.py index f7c2fca5..c17ba646 100644 --- a/tests/integration/web/test_app.py +++ b/tests/integration/web/test_app.py @@ -12,8 +12,27 @@ def test_setup_scheduler_disabled(flask_app, webapp_config): def test_setup_scheduler(flask_app, webapp_config, test_db_name, test_lib_name): webapp_config.DISABLE_SCHEDULER = False scheduler_coll = f"{test_lib_name}_scheduler" - with mock.patch("notebooker.web.app.BackgroundScheduler") as sched: - with mock.patch("notebooker.web.app.MongoDBJobStore") as jobstore: + with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as sched: + with mock.patch("notebooker.scheduler_core.MongoDBJobStore") as jobstore: app = setup_scheduler(flask_app, webapp_config) assert app.apscheduler is not None jobstore.assert_called_with(database=test_db_name, collection=scheduler_coll, client=mock.ANY) + + +def test_setup_scheduler_management_only(flask_app, webapp_config, test_db_name, test_lib_name): + """Test that SCHEDULER_MANAGEMENT_ONLY creates a paused scheduler.""" + webapp_config.DISABLE_SCHEDULER = False + webapp_config.SCHEDULER_MANAGEMENT_ONLY = True + scheduler_coll = f"{test_lib_name}_scheduler" + with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as sched_cls: + with mock.patch("notebooker.scheduler_core.MongoDBJobStore") as jobstore: + mock_scheduler = mock.MagicMock() + sched_cls.return_value = mock_scheduler + + app = setup_scheduler(flask_app, webapp_config) + + assert app.apscheduler is not None + # Verify scheduler was started AND paused + mock_scheduler.start.assert_called_once() + mock_scheduler.pause.assert_called_once() + jobstore.assert_called_with(database=test_db_name, collection=scheduler_coll, client=mock.ANY) diff --git a/tests/unit/test_scheduler_core.py b/tests/unit/test_scheduler_core.py new file mode 100644 index 00000000..2b2c6c87 --- /dev/null +++ b/tests/unit/test_scheduler_core.py @@ -0,0 +1,120 @@ +import mock +import pytest + +from notebooker.constants import DEFAULT_SERIALIZER, DEFAULT_DATABASE_NAME, DEFAULT_RESULT_COLLECTION_NAME +from notebooker.settings import WebappConfig +from notebooker.scheduler_core import get_jobstore_config, create_scheduler + + +class TestGetJobstoreConfig: + def test_extracts_config_from_serializer(self, webapp_config): + """Test that get_jobstore_config extracts correct values from serializer.""" + config = get_jobstore_config(webapp_config) + + assert "client" in config + assert config["client"] is not None + assert config["database"] == DEFAULT_DATABASE_NAME + assert config["collection"] == f"{DEFAULT_RESULT_COLLECTION_NAME}_scheduler" + + def test_respects_custom_database_override(self, webapp_config): + """Test that custom scheduler database is respected.""" + webapp_config.SCHEDULER_MONGO_DATABASE = "custom_db" + config = get_jobstore_config(webapp_config) + + assert config["database"] == "custom_db" + assert config["collection"] == f"{DEFAULT_RESULT_COLLECTION_NAME}_scheduler" + + def test_respects_custom_collection_override(self, webapp_config): + """Test that custom scheduler collection is respected.""" + webapp_config.SCHEDULER_MONGO_COLLECTION = "custom_scheduler_coll" + config = get_jobstore_config(webapp_config) + + assert config["database"] == DEFAULT_DATABASE_NAME + assert config["collection"] == "custom_scheduler_coll" + + def test_raises_for_non_mongo_serializer(self): + """Test that a non-Mongo serializer raises ValueError.""" + config = WebappConfig( + SERIALIZER_CLS="PyMongoResultSerializer", # valid but we'll mock it + SERIALIZER_CONFIG={}, + ) + + # Mock get_serializer_from_cls to return a non-Mongo serializer (just a mock object) + with mock.patch("notebooker.scheduler_core.get_serializer_from_cls") as mock_get_serializer: + mock_get_serializer.return_value = mock.MagicMock() # Not a MongoResultSerializer + with pytest.raises(ValueError, match="Mongo Result serializer"): + get_jobstore_config(config) + + +class TestCreateScheduler: + def test_creates_running_scheduler(self): + """Test that create_scheduler creates a running scheduler.""" + mock_client = mock.MagicMock() + jobstore_config = { + "client": mock_client, + "database": "test_db", + "collection": "test_scheduler", + } + + with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: + with mock.patch("notebooker.scheduler_core.MongoDBJobStore") as mock_jobstore_cls: + mock_scheduler = mock.MagicMock() + mock_scheduler_cls.return_value = mock_scheduler + + scheduler = create_scheduler(jobstore_config, paused=False) + + # Verify jobstore was created with correct params + mock_jobstore_cls.assert_called_once_with( + database="test_db", + collection="test_scheduler", + client=mock_client, + ) + + # Verify scheduler was started but not paused + mock_scheduler.start.assert_called_once() + mock_scheduler.pause.assert_not_called() + mock_scheduler.print_jobs.assert_called_once() + + assert scheduler is mock_scheduler + + def test_creates_paused_scheduler(self): + """Test that create_scheduler with paused=True pauses the scheduler.""" + mock_client = mock.MagicMock() + jobstore_config = { + "client": mock_client, + "database": "test_db", + "collection": "test_scheduler", + } + + with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: + with mock.patch("notebooker.scheduler_core.MongoDBJobStore"): + mock_scheduler = mock.MagicMock() + mock_scheduler_cls.return_value = mock_scheduler + + scheduler = create_scheduler(jobstore_config, paused=True) + + # Verify scheduler was started AND paused + mock_scheduler.start.assert_called_once() + mock_scheduler.pause.assert_called_once() + + assert scheduler is mock_scheduler + + def test_scheduler_created_with_correct_defaults(self): + """Test that scheduler is created with correct job defaults.""" + mock_client = mock.MagicMock() + jobstore_config = { + "client": mock_client, + "database": "test_db", + "collection": "test_scheduler", + } + + with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: + with mock.patch("notebooker.scheduler_core.MongoDBJobStore"): + mock_scheduler = mock.MagicMock() + mock_scheduler_cls.return_value = mock_scheduler + + create_scheduler(jobstore_config) + + # Verify misfire_grace_time is set to 1 hour + call_kwargs = mock_scheduler_cls.call_args[1] + assert call_kwargs["job_defaults"] == {"misfire_grace_time": 60 * 60} diff --git a/tests/unit/test_standalone_scheduler.py b/tests/unit/test_standalone_scheduler.py new file mode 100644 index 00000000..e36b148c --- /dev/null +++ b/tests/unit/test_standalone_scheduler.py @@ -0,0 +1,103 @@ +import mock +import signal + +from notebooker.standalone_scheduler import run_standalone_scheduler, _shutdown_handler + + +class TestStandaloneScheduler: + def test_sets_global_config(self, webapp_config): + """Test that run_standalone_scheduler sets GLOBAL_CONFIG.""" + with mock.patch("notebooker.standalone_scheduler.get_jobstore_config") as mock_get_config: + with mock.patch("notebooker.standalone_scheduler.create_scheduler") as mock_create: + with mock.patch("notebooker.standalone_scheduler.signal.signal"): + with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): + with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): + mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_scheduler = mock.MagicMock() + mock_create.return_value = mock_scheduler + + # Import the app module to check GLOBAL_CONFIG + from notebooker.web import app as app_module + original_config = app_module.GLOBAL_CONFIG + + try: + run_standalone_scheduler(webapp_config) + except (KeyboardInterrupt, SystemExit): + pass + + # Verify GLOBAL_CONFIG was set + assert app_module.GLOBAL_CONFIG is webapp_config + + # Restore original + app_module.GLOBAL_CONFIG = original_config + + def test_creates_scheduler_not_paused(self, webapp_config): + """Test that standalone scheduler is created without pausing.""" + with mock.patch("notebooker.standalone_scheduler.get_jobstore_config") as mock_get_config: + with mock.patch("notebooker.standalone_scheduler.create_scheduler") as mock_create: + with mock.patch("notebooker.standalone_scheduler.signal.signal"): + with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): + with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): + mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_scheduler = mock.MagicMock() + mock_create.return_value = mock_scheduler + + try: + run_standalone_scheduler(webapp_config) + except (KeyboardInterrupt, SystemExit): + pass + + # Verify scheduler was created with paused=False + mock_create.assert_called_once() + call_args = mock_create.call_args + assert call_args[1].get("paused", False) is False + + def test_registers_signal_handlers(self, webapp_config): + """Test that SIGTERM and SIGINT handlers are registered.""" + with mock.patch("notebooker.standalone_scheduler.get_jobstore_config") as mock_get_config: + with mock.patch("notebooker.standalone_scheduler.create_scheduler") as mock_create: + with mock.patch("notebooker.standalone_scheduler.signal.signal") as mock_signal: + with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): + with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): + mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_scheduler = mock.MagicMock() + mock_create.return_value = mock_scheduler + + try: + run_standalone_scheduler(webapp_config) + except (KeyboardInterrupt, SystemExit): + pass + + # Verify signal handlers were registered + signal_calls = [call[0][0] for call in mock_signal.call_args_list] + assert signal.SIGTERM in signal_calls + assert signal.SIGINT in signal_calls + + +class TestShutdownHandler: + def test_shutdown_handler_shuts_down_scheduler(self): + """Test that _shutdown_handler properly shuts down the scheduler.""" + import notebooker.standalone_scheduler as scheduler_module + + mock_scheduler = mock.MagicMock() + scheduler_module._scheduler = mock_scheduler + + with mock.patch("notebooker.standalone_scheduler.sys.exit") as mock_exit: + _shutdown_handler(signal.SIGTERM, None) + + mock_scheduler.shutdown.assert_called_once_with(wait=True) + mock_exit.assert_called_once_with(0) + + # Clean up + scheduler_module._scheduler = None + + def test_shutdown_handler_handles_no_scheduler(self): + """Test that _shutdown_handler handles case when scheduler is None.""" + import notebooker.standalone_scheduler as scheduler_module + + scheduler_module._scheduler = None + + with mock.patch("notebooker.standalone_scheduler.sys.exit") as mock_exit: + _shutdown_handler(signal.SIGTERM, None) + + mock_exit.assert_called_once_with(0) From f3caeb3e573606d00d3d9029f1361f95860f95b9 Mon Sep 17 00:00:00 2001 From: Jon Bannister Date: Thu, 15 Jan 2026 13:21:27 +0000 Subject: [PATCH 2/4] Fix scheduler race condition and template dropdown bug - Use scheduler.start(paused=True) to prevent jobs firing before pause - Fix _gen_all_templates to skip empty folders (was showing folder names) - Fix test ordering assertion for templates endpoint - Apply black and prettier formatting --- notebooker/_entrypoints.py | 8 ++---- notebooker/scheduler_core.py | 18 ++++-------- notebooker/standalone_scheduler.py | 2 +- notebooker/utils/templates.py | 5 ++-- notebooker/web/app.py | 1 - tests/integration/web/routes/test_core.py | 2 +- tests/integration/web/test_app.py | 7 ++--- tests/unit/test_scheduler_core.py | 34 ++++++----------------- tests/unit/test_standalone_scheduler.py | 19 +++++++++++-- 9 files changed, 40 insertions(+), 56 deletions(-) diff --git a/notebooker/_entrypoints.py b/notebooker/_entrypoints.py index 13cf2443..705913a9 100644 --- a/notebooker/_entrypoints.py +++ b/notebooker/_entrypoints.py @@ -205,12 +205,7 @@ def start_webapp( "Defaults to the same as the serializer's mongo collection + '_scheduler'.", ) @pass_config -def start_scheduler( - config: BaseConfig, - logging_level, - scheduler_mongo_database, - scheduler_mongo_collection, -): +def start_scheduler(config: BaseConfig, logging_level, scheduler_mongo_database, scheduler_mongo_collection): """ Start the scheduler as a standalone process. @@ -222,6 +217,7 @@ def start_scheduler( using a standalone scheduler. """ import logging + logging.basicConfig(level=logging.getLevelName(logging_level)) # Copy config and add scheduler-specific settings diff --git a/notebooker/scheduler_core.py b/notebooker/scheduler_core.py index 66eb7785..a8797b79 100644 --- a/notebooker/scheduler_core.py +++ b/notebooker/scheduler_core.py @@ -53,15 +53,10 @@ def get_jobstore_config(config: BaseConfig) -> Dict[str, Any]: # Allow config overrides for database/collection, with sensible defaults scheduler_db = getattr(config, "SCHEDULER_MONGO_DATABASE", "") or serializer.database_name scheduler_collection = ( - getattr(config, "SCHEDULER_MONGO_COLLECTION", "") - or f"{serializer.result_collection_name}_scheduler" + getattr(config, "SCHEDULER_MONGO_COLLECTION", "") or f"{serializer.result_collection_name}_scheduler" ) - return { - "client": client, - "database": scheduler_db, - "collection": scheduler_collection, - } + return {"client": client, "database": scheduler_db, "collection": scheduler_collection} def create_scheduler(jobstore_config: Dict[str, Any], paused: bool = False) -> BackgroundScheduler: @@ -91,16 +86,15 @@ def create_scheduler(jobstore_config: Dict[str, Any], paused: bool = False) -> B } scheduler = BackgroundScheduler( - jobstores=jobstores, - job_defaults={"misfire_grace_time": 60 * 60}, # 1 hour grace time + jobstores=jobstores, job_defaults={"misfire_grace_time": 60 * 60} # 1 hour grace time ) - scheduler.start() - if paused: - scheduler.pause() + # Start in paused state to prevent any jobs from firing + scheduler.start(paused=True) logger.info("Scheduler started in paused (management-only) mode") else: + scheduler.start() logger.info("Scheduler started") scheduler.print_jobs() diff --git a/notebooker/standalone_scheduler.py b/notebooker/standalone_scheduler.py index 29446d2b..db47abb7 100644 --- a/notebooker/standalone_scheduler.py +++ b/notebooker/standalone_scheduler.py @@ -28,7 +28,6 @@ def _shutdown_handler(signum, frame): """Handle SIGTERM/SIGINT for graceful shutdown.""" - global _scheduler sig_name = signal.Signals(signum).name logger.info(f"Received {sig_name}, shutting down scheduler...") @@ -64,6 +63,7 @@ def run_standalone_scheduler(config: BaseConfig): # This is needed because scheduled jobs call run_report() which # relies on GLOBAL_CONFIG being set from notebooker.web import app as app_module + app_module.GLOBAL_CONFIG = config logger.info("Starting standalone scheduler...") diff --git a/notebooker/utils/templates.py b/notebooker/utils/templates.py index 86e86fe6..07564d75 100644 --- a/notebooker/utils/templates.py +++ b/notebooker/utils/templates.py @@ -66,8 +66,9 @@ def _get_preview( def _gen_all_templates(template_dict): for template_name, children in template_dict.items(): - if children: + if children is not None: for x in _gen_all_templates(children): # Replace with "yield from" when we have py3 yield x - else: + elif children is None: yield template_name + # Skip entries with empty dicts (folders with no valid templates) diff --git a/notebooker/web/app.py b/notebooker/web/app.py index 08a95e1e..950a2a93 100644 --- a/notebooker/web/app.py +++ b/notebooker/web/app.py @@ -40,7 +40,6 @@ def _cancel_all_jobs(): @atexit.register def _cleanup_on_exit(): - global all_report_refresher if "pytest" in sys.modules or not all_report_refresher: return os.environ["NOTEBOOKER_APP_STOPPING"] = "1" diff --git a/tests/integration/web/routes/test_core.py b/tests/integration/web/routes/test_core.py index a92349cb..f91c0398 100644 --- a/tests/integration/web/routes/test_core.py +++ b/tests/integration/web/routes/test_core.py @@ -14,7 +14,7 @@ def test_create_schedule(flask_app, setup_workspace): rv = client.get("/core/all_possible_templates_flattened") assert rv.status_code == 200 data = json.loads(rv.data) - assert data == {"result": ["fake/py_report", "fake/ipynb_report", "fake/report_failing"]} + assert set(data["result"]) == {"fake/py_report", "fake/ipynb_report", "fake/report_failing"} def test_version_number(flask_app, setup_workspace): diff --git a/tests/integration/web/test_app.py b/tests/integration/web/test_app.py index c17ba646..bebfc360 100644 --- a/tests/integration/web/test_app.py +++ b/tests/integration/web/test_app.py @@ -20,7 +20,7 @@ def test_setup_scheduler(flask_app, webapp_config, test_db_name, test_lib_name): def test_setup_scheduler_management_only(flask_app, webapp_config, test_db_name, test_lib_name): - """Test that SCHEDULER_MANAGEMENT_ONLY creates a paused scheduler.""" + """Test that SCHEDULER_MANAGEMENT_ONLY starts scheduler in paused state.""" webapp_config.DISABLE_SCHEDULER = False webapp_config.SCHEDULER_MANAGEMENT_ONLY = True scheduler_coll = f"{test_lib_name}_scheduler" @@ -32,7 +32,6 @@ def test_setup_scheduler_management_only(flask_app, webapp_config, test_db_name, app = setup_scheduler(flask_app, webapp_config) assert app.apscheduler is not None - # Verify scheduler was started AND paused - mock_scheduler.start.assert_called_once() - mock_scheduler.pause.assert_called_once() + # Verify scheduler was started with paused=True (no race condition) + mock_scheduler.start.assert_called_once_with(paused=True) jobstore.assert_called_with(database=test_db_name, collection=scheduler_coll, client=mock.ANY) diff --git a/tests/unit/test_scheduler_core.py b/tests/unit/test_scheduler_core.py index 2b2c6c87..461a132f 100644 --- a/tests/unit/test_scheduler_core.py +++ b/tests/unit/test_scheduler_core.py @@ -34,10 +34,7 @@ def test_respects_custom_collection_override(self, webapp_config): def test_raises_for_non_mongo_serializer(self): """Test that a non-Mongo serializer raises ValueError.""" - config = WebappConfig( - SERIALIZER_CLS="PyMongoResultSerializer", # valid but we'll mock it - SERIALIZER_CONFIG={}, - ) + config = WebappConfig(SERIALIZER_CLS="PyMongoResultSerializer", SERIALIZER_CONFIG={}) # valid but we'll mock it # Mock get_serializer_from_cls to return a non-Mongo serializer (just a mock object) with mock.patch("notebooker.scheduler_core.get_serializer_from_cls") as mock_get_serializer: @@ -50,11 +47,7 @@ class TestCreateScheduler: def test_creates_running_scheduler(self): """Test that create_scheduler creates a running scheduler.""" mock_client = mock.MagicMock() - jobstore_config = { - "client": mock_client, - "database": "test_db", - "collection": "test_scheduler", - } + jobstore_config = {"client": mock_client, "database": "test_db", "collection": "test_scheduler"} with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: with mock.patch("notebooker.scheduler_core.MongoDBJobStore") as mock_jobstore_cls: @@ -65,9 +58,7 @@ def test_creates_running_scheduler(self): # Verify jobstore was created with correct params mock_jobstore_cls.assert_called_once_with( - database="test_db", - collection="test_scheduler", - client=mock_client, + database="test_db", collection="test_scheduler", client=mock_client ) # Verify scheduler was started but not paused @@ -78,13 +69,9 @@ def test_creates_running_scheduler(self): assert scheduler is mock_scheduler def test_creates_paused_scheduler(self): - """Test that create_scheduler with paused=True pauses the scheduler.""" + """Test that create_scheduler with paused=True starts scheduler in paused state.""" mock_client = mock.MagicMock() - jobstore_config = { - "client": mock_client, - "database": "test_db", - "collection": "test_scheduler", - } + jobstore_config = {"client": mock_client, "database": "test_db", "collection": "test_scheduler"} with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: with mock.patch("notebooker.scheduler_core.MongoDBJobStore"): @@ -93,20 +80,15 @@ def test_creates_paused_scheduler(self): scheduler = create_scheduler(jobstore_config, paused=True) - # Verify scheduler was started AND paused - mock_scheduler.start.assert_called_once() - mock_scheduler.pause.assert_called_once() + # Verify scheduler was started with paused=True (no race condition) + mock_scheduler.start.assert_called_once_with(paused=True) assert scheduler is mock_scheduler def test_scheduler_created_with_correct_defaults(self): """Test that scheduler is created with correct job defaults.""" mock_client = mock.MagicMock() - jobstore_config = { - "client": mock_client, - "database": "test_db", - "collection": "test_scheduler", - } + jobstore_config = {"client": mock_client, "database": "test_db", "collection": "test_scheduler"} with mock.patch("notebooker.scheduler_core.BackgroundScheduler") as mock_scheduler_cls: with mock.patch("notebooker.scheduler_core.MongoDBJobStore"): diff --git a/tests/unit/test_standalone_scheduler.py b/tests/unit/test_standalone_scheduler.py index e36b148c..0ad09557 100644 --- a/tests/unit/test_standalone_scheduler.py +++ b/tests/unit/test_standalone_scheduler.py @@ -12,12 +12,17 @@ def test_sets_global_config(self, webapp_config): with mock.patch("notebooker.standalone_scheduler.signal.signal"): with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): - mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_get_config.return_value = { + "client": mock.MagicMock(), + "database": "db", + "collection": "coll", + } mock_scheduler = mock.MagicMock() mock_create.return_value = mock_scheduler # Import the app module to check GLOBAL_CONFIG from notebooker.web import app as app_module + original_config = app_module.GLOBAL_CONFIG try: @@ -38,7 +43,11 @@ def test_creates_scheduler_not_paused(self, webapp_config): with mock.patch("notebooker.standalone_scheduler.signal.signal"): with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): - mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_get_config.return_value = { + "client": mock.MagicMock(), + "database": "db", + "collection": "coll", + } mock_scheduler = mock.MagicMock() mock_create.return_value = mock_scheduler @@ -59,7 +68,11 @@ def test_registers_signal_handlers(self, webapp_config): with mock.patch("notebooker.standalone_scheduler.signal.signal") as mock_signal: with mock.patch("notebooker.standalone_scheduler.time.sleep", side_effect=KeyboardInterrupt): with mock.patch("notebooker.standalone_scheduler._shutdown_handler"): - mock_get_config.return_value = {"client": mock.MagicMock(), "database": "db", "collection": "coll"} + mock_get_config.return_value = { + "client": mock.MagicMock(), + "database": "db", + "collection": "coll", + } mock_scheduler = mock.MagicMock() mock_create.return_value = mock_scheduler From 5b6bd0b19d9e86276b33495ddc9cbc2ec0d1f351 Mon Sep 17 00:00:00 2001 From: Jon Bannister Date: Thu, 15 Jan 2026 14:19:43 +0000 Subject: [PATCH 3/4] Bump version to 0.8.0 --- .circleci/config.yml | 8 ++++---- CHANGELOG.md | 8 ++++++++ docs/conf.py | 2 +- notebooker/version.py | 2 +- notebooker/web/static/package.json | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bea045eb..1b1e69a6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -213,7 +213,7 @@ jobs: PYTHON_VERSION: "3_6" CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_6 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6 - VERSION: 0.7.2 + VERSION: 0.8.0 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -229,7 +229,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_7 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7 - VERSION: 0.7.2 + VERSION: 0.8.0 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -243,7 +243,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_8 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_8 - VERSION: 0.7.2 + VERSION: 0.8.0 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -257,7 +257,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_11 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_11 - VERSION: 0.7.2 + VERSION: 0.8.0 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d785b61..3f700600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +0.8.0 (2026-01-15) +------------------ + +* feature: standalone scheduler process for improved reliability in Kubernetes deployments +* feature: new `--scheduler-management-only` flag for webapp to manage jobs without executing them +* bugfix: fix scheduler race condition by starting in paused state +* bugfix: fix template dropdown showing folder names instead of templates + 0.7.2 (2025-01-17) ------------------ diff --git a/docs/conf.py b/docs/conf.py index 9951f5ec..2ec9e677 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Man Group Quant Tech" # The full version, including alpha/beta/rc tags -release = "0.7.2" +release = "0.8.0" # -- General configuration --------------------------------------------------- diff --git a/notebooker/version.py b/notebooker/version.py index bc8c296f..777f190d 100644 --- a/notebooker/version.py +++ b/notebooker/version.py @@ -1 +1 @@ -__version__ = "0.7.2" +__version__ = "0.8.0" diff --git a/notebooker/web/static/package.json b/notebooker/web/static/package.json index 0f81b6cb..f1288515 100644 --- a/notebooker/web/static/package.json +++ b/notebooker/web/static/package.json @@ -1,6 +1,6 @@ { "name": "notebooker", - "version": "0.7.2", + "version": "0.8.0", "description": "Notebooker - Turn notebooks into reports", "dependencies": { "bootstrap-table": "1.20.2", From ec187daf4dab04af382cd76d55b7e596625d4827 Mon Sep 17 00:00:00 2001 From: Jon Bannister Date: Fri, 16 Jan 2026 09:18:31 +0000 Subject: [PATCH 4/4] Add complete CLI examples to standalone scheduler docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/webapp/webapp.rst | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/webapp/webapp.rst b/docs/webapp/webapp.rst index 770f3504..4aa7d4ef 100644 --- a/docs/webapp/webapp.rst +++ b/docs/webapp/webapp.rst @@ -201,7 +201,13 @@ improving reliability. .. code-block:: bash - notebooker-cli start-scheduler --mongo-host localhost:27017 + notebooker-cli \ + --py-template-base-dir /path/to/your/repo \ + --py-template-subdir notebook_templates \ + --mongo-host localhost:27017 \ + --database-name notebooker \ + --result-collection-name notebooker_results \ + start-scheduler **Starting the webapp in management-only mode:** @@ -211,7 +217,15 @@ execute them - that's handled by the standalone scheduler. .. code-block:: bash - notebooker-cli start-webapp --scheduler-management-only --mongo-host localhost:27017 + notebooker-cli \ + --py-template-base-dir /path/to/your/repo \ + --py-template-subdir notebook_templates \ + --mongo-host localhost:27017 \ + --database-name notebooker \ + --result-collection-name notebooker_results \ + start-webapp \ + --port 8080 \ + --scheduler-management-only **Deployment configuration:**