posit-dev · cpsievert · Feb 11, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -270,4 +270,7 @@ docs/plans/
 # Playwright MCP
 .playwright-mcp/
 
+# Git worktrees
+.worktrees/
+
 /.luarc.json
diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Improvements
+
+* When a custom `prompt_template` is provided that doesn't contain Mustache references to `{{schema}}`, the expensive `get_schema()` call is now skipped entirely. This allows users with large databases to avoid slow startup by providing their own prompt that includes schema information inline (or omits it). (#208)
+
 ### New features
 
 * Added support for Snowflake Semantic Views. When connected to Snowflake (via SQLAlchemy or Ibis), querychat automatically discovers available Semantic Views and includes their definitions in the system prompt. This helps the LLM generate correct queries using the `SEMANTIC_VIEW()` table function with certified business metrics and dimensions. (#200)

diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 
+import re
 from pathlib import Path
 from typing import TYPE_CHECKING
 
 import chevron
 
+_SCHEMA_TAG_RE = re.compile(r"\{\{[{#^/]?\s*schema\b")
+
 if TYPE_CHECKING:
     from ._datasource import DataSource
     from ._querychat_base import TOOL_GROUPS
@@ -47,9 +50,12 @@ def __init__(
         else:
             self.extra_instructions = extra_instructions
 
-        self.schema = data_source.get_schema(
-            categorical_threshold=categorical_threshold
-        )
+        if _SCHEMA_TAG_RE.search(self.template):
+            self.schema = data_source.get_schema(
+                categorical_threshold=categorical_threshold
+            )
+        else:
+            self.schema = ""
 
         self.categorical_threshold = categorical_threshold
         self.data_source = data_source

diff --git a/pkg-py/tests/test_base.py b/pkg-py/tests/test_base.py
@@ -208,7 +208,7 @@ def test_client_with_callbacks(self, sample_df):
         reset_called = []
 
         client = qc.client(
-            update_dashboard=lambda data: update_called.append(data),
+            update_dashboard=update_called.append,
             reset_dashboard=lambda: reset_called.append(True),
         )
         assert isinstance(client, chatlas.Chat)

diff --git a/pkg-py/tests/test_system_prompt.py b/pkg-py/tests/test_system_prompt.py
@@ -257,3 +257,44 @@ def test_render_includes_db_type(self, sample_data_source, sample_prompt_templat
 
         assert "Database Type:" in rendered
         assert sample_data_source.get_db_type() in rendered
+
+
+class TestSchemaInferenceSkip:
+    """Tests that schema inference is skipped when template doesn't reference {{schema}}."""
+
+    def test_schema_skipped_when_not_in_template(self, sample_data_source):
+        """Schema should be empty string when template doesn't use {{schema}}."""
+        prompt = QueryChatSystemPrompt(
+            prompt_template="No schema here: {{db_type}}",
+            data_source=sample_data_source,
+        )
+
+        assert prompt.schema == ""
+
+    def test_schema_computed_when_in_template(self, sample_data_source):
+        """Schema should be computed when template uses {{schema}}."""
+        prompt = QueryChatSystemPrompt(
+            prompt_template="Schema: {{schema}}",
+            data_source=sample_data_source,
+        )
+
+        assert prompt.schema != ""
+        assert "test_table" in prompt.schema
+
+    def test_schema_computed_for_triple_braces(self, sample_data_source):
+        """Schema should be computed for unescaped {{{schema}}} syntax."""
+        prompt = QueryChatSystemPrompt(
+            prompt_template="Schema: {{{schema}}}",
+            data_source=sample_data_source,
+        )
+
+        assert prompt.schema != ""
+
+    def test_schema_computed_for_conditional_section(self, sample_data_source):
+        """Schema should be computed for {{#schema}} conditional sections."""
+        prompt = QueryChatSystemPrompt(
+            prompt_template="{{#schema}}Has schema{{/schema}}",
+            data_source=sample_data_source,
+        )
+
+        assert prompt.schema != ""
diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md
@@ -1,5 +1,6 @@
 # querychat (development version)
 
+* When a custom `prompt_template` is provided that doesn't contain Mustache references to `{{schema}}`, the expensive `get_schema()` call is now skipped entirely. This allows users with large databases to avoid slow startup by providing their own prompt that includes schema information inline (or omits it). (#208)
 
 * Added support for Snowflake Semantic Views. When connected to Snowflake via DBI, querychat automatically discovers available Semantic Views and includes their definitions in the system prompt. This helps the LLM generate correct queries using the `SEMANTIC_VIEW()` table function with certified business metrics and dimensions. (#200)
 

diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R
@@ -62,10 +62,14 @@ QueryChatSystemPrompt <- R6::R6Class(
         self$extra_instructions <- read_text(extra_instructions)
       }
 
-      # Store schema and other fields
-      self$schema <- data_source$get_schema(
-        categorical_threshold = categorical_threshold
-      )
+      # Only compute schema if the template references it
+      if (grepl("\\{\\{[{#^/]?\\s*schema\\b", self$template)) {
+        self$schema <- data_source$get_schema(
+          categorical_threshold = categorical_threshold
+        )
+      } else {
+        self$schema <- ""
+      }
       self$categorical_threshold <- categorical_threshold
       self$data_source <- data_source
     },

diff --git a/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R b/pkg-r/tests/testthat/test-QueryChatSystemPrompt.R
@@ -455,3 +455,60 @@ describe("QueryChatSystemPrompt with full prompt.md template", {
     expect_match(prompt_high, "Categorical values:")
   })
 })
+
+describe("Schema inference skip", {
+  skip_if_no_dataframe_engine()
+
+  it("skips schema when template doesn't reference {{schema}}", {
+    df <- new_test_df()
+    ds <- DataFrameSource$new(df, "test_table")
+    withr::defer(ds$cleanup())
+
+    sp <- QueryChatSystemPrompt$new(
+      prompt_template = "No schema here: {{db_type}}",
+      data_source = ds
+    )
+
+    expect_equal(sp$schema, "")
+  })
+
+  it("computes schema when template uses {{schema}}", {
+    df <- new_test_df()
+    ds <- DataFrameSource$new(df, "test_table")
+    withr::defer(ds$cleanup())
+
+    sp <- QueryChatSystemPrompt$new(
+      prompt_template = "Schema: {{schema}}",
+      data_source = ds
+    )
+
+    expect_true(nchar(sp$schema) > 0)
+    expect_match(sp$schema, "test_table")
+  })
+
+  it("computes schema for {{{schema}}} triple braces", {
+    df <- new_test_df()
+    ds <- DataFrameSource$new(df, "test_table")
+    withr::defer(ds$cleanup())
+
+    sp <- QueryChatSystemPrompt$new(
+      prompt_template = "Schema: {{{schema}}}",
+      data_source = ds
+    )
+
+    expect_true(nchar(sp$schema) > 0)
+  })
+
+  it("computes schema for {{#schema}} conditional sections", {
+    df <- new_test_df()
+    ds <- DataFrameSource$new(df, "test_table")
+    withr::defer(ds$cleanup())
+
+    sp <- QueryChatSystemPrompt$new(
+      prompt_template = "{{#schema}}Has schema{{/schema}}",
+      data_source = ds
+    )
+
+    expect_true(nchar(sp$schema) > 0)
+  })
+})
diff --git a/pyproject.toml b/pyproject.toml
@@ -66,7 +66,7 @@ include = ["pkg-py/src/querychat", "pkg-py/LICENSE", "pkg-py/README.md"]
 
 [dependency-groups]
 dev = ["ruff>=0.6.5", "pyright>=1.1.401", "tox-uv>=1.11.4", "pytest>=8.4.0", "polars>=1.0.0", "pyarrow>=14.0.0", "ibis-framework[duckdb]>=9.0.0"]
-docs = ["quartodoc>=0.11.1", "nbformat", "nbclient", "ipykernel"]
+docs = ["quartodoc>=0.11.1", "griffe<2", "nbformat", "nbclient", "ipykernel"]
 examples = [
     "openai",
     "palmerpenguins>=0.1.4",