From 5269b3b6f936e01b41d5d29ea64ba730829b4d6c Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Thu, 1 Jan 2026 23:42:03 +0100
Subject: [PATCH 1/6] fix the duplication and inconsistency in testdata path
 computation and usage:

- pytest fixture (adding code and info to conftest.py)
- compute the testdata_path in only one place
- provide used test files as constants
---
 tests/test/conftest.py               | 45 +++++++++++++++++++++++++++-
 tests/test/test_demo.py              |  9 ++----
 tests/test/test_incremental_index.py |  9 ++----
 tests/test/test_podcasts.py          |  6 ++--
 tests/test/test_transcripts.py       | 12 ++++----
 5 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/tests/test/conftest.py b/tests/test/conftest.py
index e5f326b..b027016 100644
--- a/tests/test/conftest.py
+++ b/tests/test/conftest.py
@@ -1,8 +1,9 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from collections.abc import AsyncGenerator, Iterator
+from collections.abc import AsyncGenerator, Callable, Iterator
 import os
+from pathlib import Path
 import tempfile
 from typing import Any
 
@@ -43,6 +44,37 @@
     MemorySemanticRefCollection,
 )
 
+# --- Testdata path utilities ---
+# Locate the tests directory relative to this file
+_TESTS_DIR = Path(__file__).resolve().parent.parent  # tests/test -> tests
+_TESTDATA_DIR = _TESTS_DIR / "testdata"
+_REPO_ROOT = _TESTS_DIR.parent
+
+
+def get_testdata_path(filename: str) -> str:
+    """Return absolute path to a file in tests/testdata/."""
+    return str(_TESTDATA_DIR / filename)
+
+
+def get_repo_root() -> Path:
+    """Return the repository root path."""
+    return _REPO_ROOT
+
+
+def has_testdata_file(filename: str) -> bool:
+    """Check if a testdata file exists (for use in skipif conditions)."""
+    return (_TESTDATA_DIR / filename).exists()
+
+
+# Commonly used test files as constants
+CONFUSE_A_CAT_VTT = get_testdata_path("Confuse-A-Cat.vtt")
+PARROT_SKETCH_VTT = get_testdata_path("Parrot_Sketch.vtt")
+FAKE_PODCAST_TXT = get_testdata_path("FakePodcast.txt")
+EPISODE_53_INDEX = get_testdata_path("Episode_53_AdrianTchaikovsky_index")
+EPISODE_53_TRANSCRIPT = get_testdata_path("Episode_53_AdrianTchaikovsky.txt")
+EPISODE_53_ANSWERS = get_testdata_path("Episode_53_Answer_results.json")
+EPISODE_53_SEARCH = get_testdata_path("Episode_53_Search_results.json")
+
 
 @pytest.fixture(scope="session")
 def needs_auth() -> None:
@@ -63,6 +95,17 @@ def embedding_model() -> AsyncEmbeddingModel:
     return AsyncEmbeddingModel(model_name=TEST_MODEL_NAME)
 
 
+@pytest.fixture(scope="session")
+def testdata_path() -> Callable[[str], str]:
+    """Fixture returning a function to get absolute paths to testdata files.
+
+    Usage:
+        def test_something(testdata_path):
+            path = testdata_path("Confuse-A-Cat.vtt")
+    """
+    return get_testdata_path
+
+
 @pytest.fixture
 def temp_dir() -> Iterator[str]:
     with tempfile.TemporaryDirectory() as dir:
diff --git a/tests/test/test_demo.py b/tests/test/test_demo.py
index 36acca3..599f006 100644
--- a/tests/test/test_demo.py
+++ b/tests/test/test_demo.py
@@ -3,7 +3,6 @@
 
 import argparse
 import asyncio
-import os
 import textwrap
 import time
 
@@ -12,23 +11,21 @@
 from typeagent.knowpro.interfaces import ScoredSemanticRefOrdinal
 from typeagent.podcasts import podcast
 
-tests_dir = os.path.dirname(__file__)
-root_dir = os.path.dirname(tests_dir)
-DEFAULT_FILE = os.path.join(root_dir, "testdata", "Episode_53_AdrianTchaikovsky_index")
+from conftest import EPISODE_53_INDEX
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
     "filename",
     nargs="?",
     type=str,
-    default=DEFAULT_FILE,
+    default=EPISODE_53_INDEX,
 )
 
 
 def test_main(really_needs_auth: None):
     # auth is needed because we use embeddings.
     # TODO: Only use the embeddings loaded from the file and cached.
-    asyncio.run(main(DEFAULT_FILE))
+    asyncio.run(main(EPISODE_53_INDEX))
 
 
 async def main(filename_prefix: str):
diff --git a/tests/test/test_incremental_index.py b/tests/test/test_incremental_index.py
index 077db4b..12f706a 100644
--- a/tests/test/test_incremental_index.py
+++ b/tests/test/test_incremental_index.py
@@ -18,8 +18,7 @@
 )
 from typeagent.transcripts.transcript_ingest import ingest_vtt_transcript
 
-tests_dir = os.path.dirname(__file__)
-root_dir = os.path.dirname(tests_dir)
+from conftest import CONFUSE_A_CAT_VTT, PARROT_SKETCH_VTT
 
 
 @pytest.mark.asyncio
@@ -144,9 +143,8 @@ async def test_incremental_index_with_vtt_files():
         # First VTT file ingestion
         print("\n=== Import first VTT file ===")
         # Import the first transcript
-        DEFAULT_FILE = os.path.join(root_dir, "testdata", "Confuse-A-Cat.vtt")
         transcript1 = await ingest_vtt_transcript(
-            DEFAULT_FILE,
+            CONFUSE_A_CAT_VTT,
             settings,
             dbname=db_path,
         )
@@ -169,9 +167,8 @@ async def test_incremental_index_with_vtt_files():
         settings2.semantic_ref_index_settings.auto_extract_knowledge = False
 
         # Ingest the second transcript
-        DEFAULT_FILE = os.path.join(root_dir, "testdata", "Parrot_Sketch.vtt")
         transcript2 = await ingest_vtt_transcript(
-            DEFAULT_FILE,
+            PARROT_SKETCH_VTT,
             settings2,
             dbname=db_path,
         )
diff --git a/tests/test/test_podcasts.py b/tests/test/test_podcasts.py
index 2ac9b3a..6f901a7 100644
--- a/tests/test/test_podcasts.py
+++ b/tests/test/test_podcasts.py
@@ -13,9 +13,7 @@
 from typeagent.podcasts import podcast_ingest
 from typeagent.podcasts.podcast import Podcast
 
-tests_dir = os.path.dirname(__file__)
-root_dir = os.path.dirname(tests_dir)
-DEFAULT_FILE = os.path.join(root_dir, "testdata", "FakePodcast.txt")
+from conftest import FAKE_PODCAST_TXT
 
 
 @pytest.mark.asyncio
@@ -25,7 +23,7 @@ async def test_ingest_podcast(
     # Import the podcast
     settings = ConversationSettings(embedding_model)
     pod = await podcast_ingest.ingest_podcast(
-        DEFAULT_FILE,
+        FAKE_PODCAST_TXT,
         settings,
         None,
         Datetime.now(timezone.utc),  # Use timezone-aware datetime
diff --git a/tests/test/test_transcripts.py b/tests/test/test_transcripts.py
index 2efef03..9079833 100644
--- a/tests/test/test_transcripts.py
+++ b/tests/test/test_transcripts.py
@@ -21,6 +21,8 @@
     webvtt_timestamp_to_seconds,
 )
 
+from conftest import CONFUSE_A_CAT_VTT, has_testdata_file, PARROT_SKETCH_VTT
+
 
 def test_extract_speaker_from_text():
     """Test speaker extraction from various text formats."""
@@ -67,12 +69,12 @@ def test_webvtt_timestamp_conversion():
 
 
 @pytest.mark.skipif(
-    not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"),
+    not has_testdata_file("Confuse-A-Cat.vtt"),
     reason="Test VTT file not found",
 )
 def test_get_transcript_info():
     """Test getting basic information from a VTT file."""
-    vtt_file = "tests/testdata/Confuse-A-Cat.vtt"
+    vtt_file = CONFUSE_A_CAT_VTT
 
     # Test duration
     duration = get_transcript_duration(vtt_file)
@@ -93,7 +95,7 @@ def conversation_settings(
 
 
 @pytest.mark.skipif(
-    not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"),
+    not has_testdata_file("Confuse-A-Cat.vtt"),
     reason="Test VTT file not found",
 )
 @pytest.mark.asyncio
@@ -108,7 +110,7 @@ async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings
     from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex
     from typeagent.transcripts.transcript_ingest import parse_voice_tags
 
-    vtt_file = "tests/testdata/Confuse-A-Cat.vtt"
+    vtt_file = CONFUSE_A_CAT_VTT
 
     # Use in-memory storage to avoid database cleanup issues
     settings = conversation_settings
@@ -264,7 +266,7 @@ async def test_transcript_knowledge_extraction_slow(
     settings = ConversationSettings(embedding_model)
 
     # Parse first 5 captions from Parrot Sketch
-    vtt_file = "tests/testdata/Parrot_Sketch.vtt"
+    vtt_file = PARROT_SKETCH_VTT
     if not os.path.exists(vtt_file):
         pytest.skip(f"Test file {vtt_file} not found")
 

From e3496a6d0ccd61ffb88d123ab7e1c5ba2c505c48 Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Thu, 1 Jan 2026 23:43:26 +0100
Subject: [PATCH 2/6] fix the duplication and inconsistency in testdata path
 computation and usage: - to non test code we shall discuss wether testdata
 makes sense at all - here i have implemented a small workaround with reusage
 of conftest.py via a wrapper

---
 src/typeagent/mcp/server.py | 12 +++++++---
 tools/ingest_podcast.py     |  9 ++++----
 tools/query.py              | 21 ++++++++++++-----
 tools/util_testdata.py      | 45 +++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 13 deletions(-)
 create mode 100644 tools/util_testdata.py

diff --git a/src/typeagent/mcp/server.py b/src/typeagent/mcp/server.py
index 0c106ec..433a081 100644
--- a/src/typeagent/mcp/server.py
+++ b/src/typeagent/mcp/server.py
@@ -6,6 +6,8 @@
 
 import argparse
 from dataclasses import dataclass
+from pathlib import Path
+import sys
 import time
 from typing import Any
 
@@ -19,6 +21,12 @@
 # Enable coverage.py before local imports (a no-op unless COVERAGE_PROCESS_START is set).
 coverage.process_startup()
 
+# Add tools/ to path for util_testdata imports
+_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
+sys.path.insert(0, str(_REPO_ROOT / "tools"))
+
+from util_testdata import EPISODE_53_INDEX  # type: ignore[import-not-found]
+
 from typeagent.aitools import embeddings, utils
 from typeagent.knowpro import answers, query, searchlang
 from typeagent.knowpro.answer_response_schema import AnswerResponse
@@ -166,9 +174,7 @@ async def load_podcast_index_or_database(
     dbname: str | None = None,
 ) -> query.QueryEvalContext[podcast.PodcastMessage, Any]:
     if dbname is None:
-        conversation = await podcast.Podcast.read_from_file(
-            "tests/testdata/Episode_53_AdrianTchaikovsky_index", settings
-        )
+        conversation = await podcast.Podcast.read_from_file(EPISODE_53_INDEX, settings)
     else:
         conversation = await podcast.Podcast.create(settings)
     return query.QueryEvalContext(conversation)
diff --git a/tools/ingest_podcast.py b/tools/ingest_podcast.py
index 90e80ec..5bdff25 100644
--- a/tools/ingest_podcast.py
+++ b/tools/ingest_podcast.py
@@ -2,11 +2,12 @@
 import asyncio
 import os
 
+from util_testdata import EPISODE_53_TRANSCRIPT  # type: ignore[attr-defined]
+
 from typeagent.aitools.utils import load_dotenv
 from typeagent.knowpro.convsettings import ConversationSettings
 from typeagent.podcasts.podcast_ingest import ingest_podcast
 
-DEFAULT_TRANSCRIPT = "testdata/Episode_53_AdrianTchaikovsky.txt"
 CHARS_PER_MINUTE = 1050  # My guess for average speech rate incl. overhead
 
 
@@ -50,9 +51,9 @@ async def main():
     if args.database is not None and args.json_output is not None:
         raise SystemExit("Please use at most one of --database and --json-output")
     if args.transcript is None:
-        if os.path.exists(DEFAULT_TRANSCRIPT):
-            args.transcript = DEFAULT_TRANSCRIPT
-            print("Reading default transcript:", DEFAULT_TRANSCRIPT)
+        if os.path.exists(EPISODE_53_TRANSCRIPT):
+            args.transcript = EPISODE_53_TRANSCRIPT
+            print("Reading default transcript:", EPISODE_53_TRANSCRIPT)
         else:
             raise SystemExit("Please provide a transcript file to ingest")
 
diff --git a/tools/query.py b/tools/query.py
index d11f643..c8b5db1 100644
--- a/tools/query.py
+++ b/tools/query.py
@@ -29,6 +29,13 @@
 except ImportError:
     pass
 
+# fmt: off
+from util_testdata import (  # type: ignore[attr-defined]
+    EPISODE_53_ANSWERS, # type: ignore[import-not-found]
+    EPISODE_53_INDEX, # type: ignore[import-not-found]
+    EPISODE_53_SEARCH, # type: ignore[import-not-found]
+)
+
 import typechat
 
 from typeagent.aitools import embeddings, utils
@@ -58,6 +65,11 @@
 from typeagent.storage.sqlite.provider import SqliteStorageProvider
 from typeagent.storage.utils import create_storage_provider
 
+# fmt: on
+
+
+# fmt: on
+
 ### Classes ###
 
 
@@ -925,27 +937,24 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser:
         ),
     )
 
-    default_podcast_file = "tests/testdata/Episode_53_AdrianTchaikovsky_index"
     parser.add_argument(
         "--podcast",
         type=str,
-        default=default_podcast_file,
+        default=EPISODE_53_INDEX,
         help="Path to the podcast index files (excluding the '_index.json' suffix)",
     )
-    default_qafile = "tests/testdata/Episode_53_Answer_results.json"
     explain_qa = "a list of questions and answers to test the full pipeline"
     parser.add_argument(
         "--qafile",
         type=str,
-        default=default_qafile,
+        default=EPISODE_53_ANSWERS,
         help=f"Path to the Answer_results.json file ({explain_qa})",
     )
-    default_srfile = "tests/testdata/Episode_53_Search_results.json"
     explain_sr = "a list of intermediate results from stages 1, 2 and 3"
     parser.add_argument(
         "--srfile",
         type=str,
-        default=default_srfile,
+        default=EPISODE_53_SEARCH,
         help=f"Path to the Search_results.json file ({explain_sr})",
     )
     parser.add_argument(
diff --git a/tools/util_testdata.py b/tools/util_testdata.py
new file mode 100644
index 0000000..ee5ee18
--- /dev/null
+++ b/tools/util_testdata.py
@@ -0,0 +1,45 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Utility to import testdata path constants from conftest.py.
+
+This module handles adding tests/test to sys.path so that conftest.py
+can be imported from non-test code (tools/, src/).
+
+Usage:
+    from util_testdata import EPISODE_53_INDEX, EPISODE_53_ANSWERS, ...
+"""
+
+from pathlib import Path
+import sys
+
+# Add tests/test to path for conftest imports
+_REPO_ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(_REPO_ROOT / "tests" / "test"))
+
+# Re-export all testdata constants from conftest
+from conftest import (  # type: ignore[import-not-found]  # noqa: E402
+    CONFUSE_A_CAT_VTT,
+    EPISODE_53_ANSWERS,
+    EPISODE_53_INDEX,
+    EPISODE_53_SEARCH,
+    EPISODE_53_TRANSCRIPT,
+    FAKE_PODCAST_TXT,
+    get_repo_root,
+    get_testdata_path,
+    has_testdata_file,
+    PARROT_SKETCH_VTT,
+)
+
+__all__ = [
+    "CONFUSE_A_CAT_VTT",
+    "EPISODE_53_ANSWERS",
+    "EPISODE_53_INDEX",
+    "EPISODE_53_SEARCH",
+    "EPISODE_53_TRANSCRIPT",
+    "FAKE_PODCAST_TXT",
+    "PARROT_SKETCH_VTT",
+    "get_repo_root",
+    "get_testdata_path",
+    "has_testdata_file",
+]

From 6a85f9e8ad1b378ceaef09a4d0c3addf27f52a58 Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Fri, 2 Jan 2026 07:47:10 +0100
Subject: [PATCH 3/6] Refactor podcast ingestion and query tools: - removed
 dependency on tests aka conftest.py - added explicit arguments for  passing
 in testdata and validation logic

---
 src/typeagent/mcp/server.py | 78 +++++++++++++++++++++++++++++--------
 tools/ingest_podcast.py     | 17 ++++----
 tools/query.py              | 35 ++++++++++-------
 tools/util_testdata.py      | 45 ---------------------
 4 files changed, 92 insertions(+), 83 deletions(-)
 delete mode 100644 tools/util_testdata.py

diff --git a/src/typeagent/mcp/server.py b/src/typeagent/mcp/server.py
index 433a081..0ff612f 100644
--- a/src/typeagent/mcp/server.py
+++ b/src/typeagent/mcp/server.py
@@ -6,8 +6,7 @@
 
 import argparse
 from dataclasses import dataclass
-from pathlib import Path
-import sys
+import os
 import time
 from typing import Any
 
@@ -21,12 +20,6 @@
 # Enable coverage.py before local imports (a no-op unless COVERAGE_PROCESS_START is set).
 coverage.process_startup()
 
-# Add tools/ to path for util_testdata imports
-_REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
-sys.path.insert(0, str(_REPO_ROOT / "tools"))
-
-from util_testdata import EPISODE_53_INDEX  # type: ignore[import-not-found]
-
 from typeagent.aitools import embeddings, utils
 from typeagent.knowpro import answers, query, searchlang
 from typeagent.knowpro.answer_response_schema import AnswerResponse
@@ -36,6 +29,9 @@
 from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex
 from typeagent.storage.utils import create_storage_provider
 
+# Example podcast index path for documentation and error messages
+_EXAMPLE_PODCAST_INDEX = "tests/testdata/Episode_53_AdrianTchaikovsky_index"
+
 
 class MCPTypeChatModel(typechat.TypeChatLanguageModel):
     """TypeChat language model that uses MCP sampling API instead of direct API calls."""
@@ -150,7 +146,9 @@ async def make_context(
         entities_top_k=50, topics_top_k=50, messages_top_k=None, chunking=None
     )
 
-    query_context = await load_podcast_index_or_database(settings, dbname)
+    query_context = await load_podcast_database_or_index(
+        settings, dbname, _podcast_index
+    )
 
     # Use MCP-based model instead of one that requires API keys
     model = MCPTypeChatModel(session)
@@ -169,22 +167,33 @@ async def make_context(
     return context
 
 
-async def load_podcast_index_or_database(
+async def load_podcast_database_or_index(
     settings: ConversationSettings,
     dbname: str | None = None,
+    podcast_index: str | None = None,
 ) -> query.QueryEvalContext[podcast.PodcastMessage, Any]:
-    if dbname is None:
-        conversation = await podcast.Podcast.read_from_file(EPISODE_53_INDEX, settings)
-    else:
+    if dbname is not None:
+        # Load from SQLite database
         conversation = await podcast.Podcast.create(settings)
+    elif podcast_index is not None:
+        # Load from JSON index files
+        conversation = await podcast.Podcast.read_from_file(podcast_index, settings)
+    else:
+        raise ValueError(
+            "Either --database or --podcast-index must be specified. "
+            "Use --podcast-index to specify the path to podcast index files "
+            f"(e.g., '{_EXAMPLE_PODCAST_INDEX}')."
+        )
     return query.QueryEvalContext(conversation)
 
 
 # Create an MCP server
 mcp = FastMCP("typagent")
 
-# Global variable to store database path (set via command-line argument)
+# Global variables to store command-line arguments
+# (no other straightforward way to pass to tool handlers)
 _dbname: str | None = None
+_podcast_index: str | None = None
 
 
 @dataclass
@@ -251,12 +260,49 @@ async def query_conversation(
         "--database",
         type=str,
         default=None,
-        help="Path to the SQLite database file (default: load from JSON file)",
+        help="Path to a SQLite database file with pre-indexed podcast data",
+    )
+    parser.add_argument(
+        "-p",
+        "--podcast-index",
+        type=str,
+        default=None,
+        help="Path to podcast index files (excluding '_data.json' suffix), "
+        f"e.g., '{_EXAMPLE_PODCAST_INDEX}'",
     )
     args = parser.parse_args()
 
-    # Store database path in global variable (no other straightforward way to pass to tool)
+    # Validate arguments
+    if args.database is None and args.podcast_index is None:
+        parser.error(
+            "Either --database or --podcast-index is required.\n"
+            "Example: python -m typeagent.mcp.server "
+            f"--podcast-index {_EXAMPLE_PODCAST_INDEX}"
+        )
+
+    if args.database is not None and args.podcast_index is not None:
+        parser.error("Cannot specify both --database and --podcast-index")
+
+    # Validate file existence
+    if args.database is not None and not os.path.exists(args.database):
+        parser.error(
+            f"Database file not found: {args.database}\n"
+            "Please provide a valid path to an existing SQLite database."
+        )
+
+    if args.podcast_index is not None:
+        data_file = args.podcast_index + "_data.json"
+        if not os.path.exists(data_file):
+            parser.error(
+                f"Podcast index file not found: {data_file}\n"
+                "Please provide a valid path to podcast index files "
+                "(without the '_data.json' suffix).\n"
+                f"Example: {_EXAMPLE_PODCAST_INDEX}"
+            )
+
+    # Store in global variables for tool handlers
     _dbname = args.database
+    _podcast_index = args.podcast_index
 
     # Use stdio transport for simplicity
     mcp.run(transport="stdio")
diff --git a/tools/ingest_podcast.py b/tools/ingest_podcast.py
index 5bdff25..6ff9cdc 100644
--- a/tools/ingest_podcast.py
+++ b/tools/ingest_podcast.py
@@ -2,8 +2,6 @@
 import asyncio
 import os
 
-from util_testdata import EPISODE_53_TRANSCRIPT  # type: ignore[attr-defined]
-
 from typeagent.aitools.utils import load_dotenv
 from typeagent.knowpro.convsettings import ConversationSettings
 from typeagent.podcasts.podcast_ingest import ingest_podcast
@@ -51,11 +49,16 @@ async def main():
     if args.database is not None and args.json_output is not None:
         raise SystemExit("Please use at most one of --database and --json-output")
     if args.transcript is None:
-        if os.path.exists(EPISODE_53_TRANSCRIPT):
-            args.transcript = EPISODE_53_TRANSCRIPT
-            print("Reading default transcript:", EPISODE_53_TRANSCRIPT)
-        else:
-            raise SystemExit("Please provide a transcript file to ingest")
+        raise SystemExit(
+            "Error: A transcript file is required.\n"
+            "Usage: python ingest_podcast.py <transcript_file>\n"
+            "Example: python ingest_podcast.py path/to/transcript.vtt"
+        )
+    if not os.path.exists(args.transcript):
+        raise SystemExit(
+            f"Error: Transcript file not found: {args.transcript}\n"
+            "Please verify the path exists and is accessible."
+        )
 
     load_dotenv()
 
diff --git a/tools/query.py b/tools/query.py
index c8b5db1..83e4339 100644
--- a/tools/query.py
+++ b/tools/query.py
@@ -29,13 +29,6 @@
 except ImportError:
     pass
 
-# fmt: off
-from util_testdata import (  # type: ignore[attr-defined]
-    EPISODE_53_ANSWERS, # type: ignore[import-not-found]
-    EPISODE_53_INDEX, # type: ignore[import-not-found]
-    EPISODE_53_SEARCH, # type: ignore[import-not-found]
-)
-
 import typechat
 
 from typeagent.aitools import embeddings, utils
@@ -65,11 +58,6 @@
 from typeagent.storage.sqlite.provider import SqliteStorageProvider
 from typeagent.storage.utils import create_storage_provider
 
-# fmt: on
-
-
-# fmt: on
-
 ### Classes ###
 
 
@@ -548,6 +536,23 @@ async def main():
     args = parser.parse_args()
     fill_in_debug_defaults(parser, args)
 
+    # Validate required podcast argument
+    if args.podcast is None and args.database is None:
+        raise SystemExit(
+            "Error: Either --podcast or --database is required.\n"
+            "Usage: python query.py --podcast <path_to_index>\n"
+            "   or: python query.py --database <path_to_database>\n"
+            "Example: python query.py --podcast tests/testdata/Episode_53_index"
+        )
+    if args.podcast is not None:
+        index_file = args.podcast + "_index.json"
+        if not os.path.exists(index_file):
+            raise SystemExit(
+                f"Error: Podcast index file not found: {index_file}\n"
+                "Please verify the path exists and is accessible.\n"
+                "Note: The path should exclude the '_index.json' suffix."
+            )
+
     if args.logfire:
         utils.setup_logfire()
 
@@ -940,21 +945,21 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser:
     parser.add_argument(
         "--podcast",
         type=str,
-        default=EPISODE_53_INDEX,
+        default=None,
         help="Path to the podcast index files (excluding the '_index.json' suffix)",
     )
     explain_qa = "a list of questions and answers to test the full pipeline"
     parser.add_argument(
         "--qafile",
         type=str,
-        default=EPISODE_53_ANSWERS,
+        default=None,
         help=f"Path to the Answer_results.json file ({explain_qa})",
     )
     explain_sr = "a list of intermediate results from stages 1, 2 and 3"
     parser.add_argument(
         "--srfile",
         type=str,
-        default=EPISODE_53_SEARCH,
+        default=None,
         help=f"Path to the Search_results.json file ({explain_sr})",
     )
     parser.add_argument(
diff --git a/tools/util_testdata.py b/tools/util_testdata.py
deleted file mode 100644
index ee5ee18..0000000
--- a/tools/util_testdata.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Utility to import testdata path constants from conftest.py.
-
-This module handles adding tests/test to sys.path so that conftest.py
-can be imported from non-test code (tools/, src/).
-
-Usage:
-    from util_testdata import EPISODE_53_INDEX, EPISODE_53_ANSWERS, ...
-"""
-
-from pathlib import Path
-import sys
-
-# Add tests/test to path for conftest imports
-_REPO_ROOT = Path(__file__).resolve().parent.parent
-sys.path.insert(0, str(_REPO_ROOT / "tests" / "test"))
-
-# Re-export all testdata constants from conftest
-from conftest import (  # type: ignore[import-not-found]  # noqa: E402
-    CONFUSE_A_CAT_VTT,
-    EPISODE_53_ANSWERS,
-    EPISODE_53_INDEX,
-    EPISODE_53_SEARCH,
-    EPISODE_53_TRANSCRIPT,
-    FAKE_PODCAST_TXT,
-    get_repo_root,
-    get_testdata_path,
-    has_testdata_file,
-    PARROT_SKETCH_VTT,
-)
-
-__all__ = [
-    "CONFUSE_A_CAT_VTT",
-    "EPISODE_53_ANSWERS",
-    "EPISODE_53_INDEX",
-    "EPISODE_53_SEARCH",
-    "EPISODE_53_TRANSCRIPT",
-    "FAKE_PODCAST_TXT",
-    "PARROT_SKETCH_VTT",
-    "get_repo_root",
-    "get_testdata_path",
-    "has_testdata_file",
-]

From 18c7c8dcd1b5ce5c7c916d12edafe042bf53fef2 Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Fri, 2 Jan 2026 09:00:38 +0100
Subject: [PATCH 4/6] - moved all unit tests from tests/test to tests - adopted
 conftest.py to use only parent instaed of parent.parent

---
 tests/{test => }/conftest.py                              | 2 +-
 tests/{test => }/test_add_messages_with_indexing.py       | 0
 tests/{test => }/test_auth.py                             | 0
 tests/{test => }/test_collections.py                      | 0
 tests/{test => }/test_conversation_metadata.py            | 0
 tests/{test => }/test_demo.py                             | 0
 tests/{test => }/test_embedding_consistency.py            | 0
 tests/{test => }/test_embeddings.py                       | 0
 tests/{test => }/test_factory.py                          | 0
 tests/{test => }/test_incremental_index.py                | 0
 tests/{test => }/test_interfaces.py                       | 0
 tests/{test => }/test_knowledge.py                        | 0
 tests/{test => }/test_kplib.py                            | 0
 tests/{test => }/test_mcp_server.py                       | 0
 tests/{test => }/test_message_text_index_population.py    | 0
 tests/{test => }/test_message_text_index_serialization.py | 0
 tests/{test => }/test_messageindex.py                     | 0
 tests/{test => }/test_online.py                           | 0
 tests/{test => }/test_podcast_incremental.py              | 0
 tests/{test => }/test_podcasts.py                         | 0
 tests/{test => }/test_property_index_population.py        | 0
 tests/{test => }/test_propindex.py                        | 0
 tests/{test => }/test_query.py                            | 0
 tests/{test => }/test_query_method.py                     | 0
 tests/{test => }/test_related_terms_fast.py               | 0
 tests/{test => }/test_related_terms_index_population.py   | 0
 tests/{test => }/test_reltermsindex.py                    | 0
 tests/{test => }/test_searchlib.py                        | 0
 tests/{test => }/test_secindex.py                         | 0
 tests/{test => }/test_secindex_storage_integration.py     | 0
 tests/{test => }/test_semrefindex.py                      | 0
 tests/{test => }/test_serialization.py                    | 0
 tests/{test => }/test_sqlite_indexes.py                   | 0
 tests/{test => }/test_sqlitestore.py                      | 0
 tests/{test => }/test_storage_providers_unified.py        | 0
 tests/{test => }/test_timestampindex.py                   | 0
 tests/{test => }/test_transcripts.py                      | 0
 tests/{test => }/test_utils.py                            | 0
 tests/{test => }/test_vectorbase.py                       | 0
 39 files changed, 1 insertion(+), 1 deletion(-)
 rename tests/{test => }/conftest.py (99%)
 rename tests/{test => }/test_add_messages_with_indexing.py (100%)
 rename tests/{test => }/test_auth.py (100%)
 rename tests/{test => }/test_collections.py (100%)
 rename tests/{test => }/test_conversation_metadata.py (100%)
 rename tests/{test => }/test_demo.py (100%)
 rename tests/{test => }/test_embedding_consistency.py (100%)
 rename tests/{test => }/test_embeddings.py (100%)
 rename tests/{test => }/test_factory.py (100%)
 rename tests/{test => }/test_incremental_index.py (100%)
 rename tests/{test => }/test_interfaces.py (100%)
 rename tests/{test => }/test_knowledge.py (100%)
 rename tests/{test => }/test_kplib.py (100%)
 rename tests/{test => }/test_mcp_server.py (100%)
 rename tests/{test => }/test_message_text_index_population.py (100%)
 rename tests/{test => }/test_message_text_index_serialization.py (100%)
 rename tests/{test => }/test_messageindex.py (100%)
 rename tests/{test => }/test_online.py (100%)
 rename tests/{test => }/test_podcast_incremental.py (100%)
 rename tests/{test => }/test_podcasts.py (100%)
 rename tests/{test => }/test_property_index_population.py (100%)
 rename tests/{test => }/test_propindex.py (100%)
 rename tests/{test => }/test_query.py (100%)
 rename tests/{test => }/test_query_method.py (100%)
 rename tests/{test => }/test_related_terms_fast.py (100%)
 rename tests/{test => }/test_related_terms_index_population.py (100%)
 rename tests/{test => }/test_reltermsindex.py (100%)
 rename tests/{test => }/test_searchlib.py (100%)
 rename tests/{test => }/test_secindex.py (100%)
 rename tests/{test => }/test_secindex_storage_integration.py (100%)
 rename tests/{test => }/test_semrefindex.py (100%)
 rename tests/{test => }/test_serialization.py (100%)
 rename tests/{test => }/test_sqlite_indexes.py (100%)
 rename tests/{test => }/test_sqlitestore.py (100%)
 rename tests/{test => }/test_storage_providers_unified.py (100%)
 rename tests/{test => }/test_timestampindex.py (100%)
 rename tests/{test => }/test_transcripts.py (100%)
 rename tests/{test => }/test_utils.py (100%)
 rename tests/{test => }/test_vectorbase.py (100%)

diff --git a/tests/test/conftest.py b/tests/conftest.py
similarity index 99%
rename from tests/test/conftest.py
rename to tests/conftest.py
index b027016..3533e23 100644
--- a/tests/test/conftest.py
+++ b/tests/conftest.py
@@ -46,7 +46,7 @@
 
 # --- Testdata path utilities ---
 # Locate the tests directory relative to this file
-_TESTS_DIR = Path(__file__).resolve().parent.parent  # tests/test -> tests
+_TESTS_DIR = Path(__file__).resolve().parent  # tests/
 _TESTDATA_DIR = _TESTS_DIR / "testdata"
 _REPO_ROOT = _TESTS_DIR.parent
 
diff --git a/tests/test/test_add_messages_with_indexing.py b/tests/test_add_messages_with_indexing.py
similarity index 100%
rename from tests/test/test_add_messages_with_indexing.py
rename to tests/test_add_messages_with_indexing.py
diff --git a/tests/test/test_auth.py b/tests/test_auth.py
similarity index 100%
rename from tests/test/test_auth.py
rename to tests/test_auth.py
diff --git a/tests/test/test_collections.py b/tests/test_collections.py
similarity index 100%
rename from tests/test/test_collections.py
rename to tests/test_collections.py
diff --git a/tests/test/test_conversation_metadata.py b/tests/test_conversation_metadata.py
similarity index 100%
rename from tests/test/test_conversation_metadata.py
rename to tests/test_conversation_metadata.py
diff --git a/tests/test/test_demo.py b/tests/test_demo.py
similarity index 100%
rename from tests/test/test_demo.py
rename to tests/test_demo.py
diff --git a/tests/test/test_embedding_consistency.py b/tests/test_embedding_consistency.py
similarity index 100%
rename from tests/test/test_embedding_consistency.py
rename to tests/test_embedding_consistency.py
diff --git a/tests/test/test_embeddings.py b/tests/test_embeddings.py
similarity index 100%
rename from tests/test/test_embeddings.py
rename to tests/test_embeddings.py
diff --git a/tests/test/test_factory.py b/tests/test_factory.py
similarity index 100%
rename from tests/test/test_factory.py
rename to tests/test_factory.py
diff --git a/tests/test/test_incremental_index.py b/tests/test_incremental_index.py
similarity index 100%
rename from tests/test/test_incremental_index.py
rename to tests/test_incremental_index.py
diff --git a/tests/test/test_interfaces.py b/tests/test_interfaces.py
similarity index 100%
rename from tests/test/test_interfaces.py
rename to tests/test_interfaces.py
diff --git a/tests/test/test_knowledge.py b/tests/test_knowledge.py
similarity index 100%
rename from tests/test/test_knowledge.py
rename to tests/test_knowledge.py
diff --git a/tests/test/test_kplib.py b/tests/test_kplib.py
similarity index 100%
rename from tests/test/test_kplib.py
rename to tests/test_kplib.py
diff --git a/tests/test/test_mcp_server.py b/tests/test_mcp_server.py
similarity index 100%
rename from tests/test/test_mcp_server.py
rename to tests/test_mcp_server.py
diff --git a/tests/test/test_message_text_index_population.py b/tests/test_message_text_index_population.py
similarity index 100%
rename from tests/test/test_message_text_index_population.py
rename to tests/test_message_text_index_population.py
diff --git a/tests/test/test_message_text_index_serialization.py b/tests/test_message_text_index_serialization.py
similarity index 100%
rename from tests/test/test_message_text_index_serialization.py
rename to tests/test_message_text_index_serialization.py
diff --git a/tests/test/test_messageindex.py b/tests/test_messageindex.py
similarity index 100%
rename from tests/test/test_messageindex.py
rename to tests/test_messageindex.py
diff --git a/tests/test/test_online.py b/tests/test_online.py
similarity index 100%
rename from tests/test/test_online.py
rename to tests/test_online.py
diff --git a/tests/test/test_podcast_incremental.py b/tests/test_podcast_incremental.py
similarity index 100%
rename from tests/test/test_podcast_incremental.py
rename to tests/test_podcast_incremental.py
diff --git a/tests/test/test_podcasts.py b/tests/test_podcasts.py
similarity index 100%
rename from tests/test/test_podcasts.py
rename to tests/test_podcasts.py
diff --git a/tests/test/test_property_index_population.py b/tests/test_property_index_population.py
similarity index 100%
rename from tests/test/test_property_index_population.py
rename to tests/test_property_index_population.py
diff --git a/tests/test/test_propindex.py b/tests/test_propindex.py
similarity index 100%
rename from tests/test/test_propindex.py
rename to tests/test_propindex.py
diff --git a/tests/test/test_query.py b/tests/test_query.py
similarity index 100%
rename from tests/test/test_query.py
rename to tests/test_query.py
diff --git a/tests/test/test_query_method.py b/tests/test_query_method.py
similarity index 100%
rename from tests/test/test_query_method.py
rename to tests/test_query_method.py
diff --git a/tests/test/test_related_terms_fast.py b/tests/test_related_terms_fast.py
similarity index 100%
rename from tests/test/test_related_terms_fast.py
rename to tests/test_related_terms_fast.py
diff --git a/tests/test/test_related_terms_index_population.py b/tests/test_related_terms_index_population.py
similarity index 100%
rename from tests/test/test_related_terms_index_population.py
rename to tests/test_related_terms_index_population.py
diff --git a/tests/test/test_reltermsindex.py b/tests/test_reltermsindex.py
similarity index 100%
rename from tests/test/test_reltermsindex.py
rename to tests/test_reltermsindex.py
diff --git a/tests/test/test_searchlib.py b/tests/test_searchlib.py
similarity index 100%
rename from tests/test/test_searchlib.py
rename to tests/test_searchlib.py
diff --git a/tests/test/test_secindex.py b/tests/test_secindex.py
similarity index 100%
rename from tests/test/test_secindex.py
rename to tests/test_secindex.py
diff --git a/tests/test/test_secindex_storage_integration.py b/tests/test_secindex_storage_integration.py
similarity index 100%
rename from tests/test/test_secindex_storage_integration.py
rename to tests/test_secindex_storage_integration.py
diff --git a/tests/test/test_semrefindex.py b/tests/test_semrefindex.py
similarity index 100%
rename from tests/test/test_semrefindex.py
rename to tests/test_semrefindex.py
diff --git a/tests/test/test_serialization.py b/tests/test_serialization.py
similarity index 100%
rename from tests/test/test_serialization.py
rename to tests/test_serialization.py
diff --git a/tests/test/test_sqlite_indexes.py b/tests/test_sqlite_indexes.py
similarity index 100%
rename from tests/test/test_sqlite_indexes.py
rename to tests/test_sqlite_indexes.py
diff --git a/tests/test/test_sqlitestore.py b/tests/test_sqlitestore.py
similarity index 100%
rename from tests/test/test_sqlitestore.py
rename to tests/test_sqlitestore.py
diff --git a/tests/test/test_storage_providers_unified.py b/tests/test_storage_providers_unified.py
similarity index 100%
rename from tests/test/test_storage_providers_unified.py
rename to tests/test_storage_providers_unified.py
diff --git a/tests/test/test_timestampindex.py b/tests/test_timestampindex.py
similarity index 100%
rename from tests/test/test_timestampindex.py
rename to tests/test_timestampindex.py
diff --git a/tests/test/test_transcripts.py b/tests/test_transcripts.py
similarity index 100%
rename from tests/test/test_transcripts.py
rename to tests/test_transcripts.py
diff --git a/tests/test/test_utils.py b/tests/test_utils.py
similarity index 100%
rename from tests/test/test_utils.py
rename to tests/test_utils.py
diff --git a/tests/test/test_vectorbase.py b/tests/test_vectorbase.py
similarity index 100%
rename from tests/test/test_vectorbase.py
rename to tests/test_vectorbase.py

From 75a5f4973b51b659132e678d0279773b9b3dca68 Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Fri, 2 Jan 2026 09:02:14 +0100
Subject: [PATCH 5/6] The fixture was creating a minimal environment with only
 API keys, but the subprocess needs the full environment (PATH, PYTHONPATH,
 system libraries, etc.) to import Python modules correctly.

Fix: Changed the fixture to inherit the full parent environment (dict(os.environ)) instead of building a minimal one.
---
 tests/test_mcp_server.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index fc825ab..9669e91 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -14,17 +14,21 @@
 from mcp.shared.context import RequestContext
 from mcp.types import CreateMessageRequestParams, CreateMessageResult, TextContent
 
+from conftest import EPISODE_53_INDEX
+
 
 @pytest.fixture
 def server_params() -> StdioServerParameters:
-    """Create MCP server parameters with minimal environment."""
-    env = {}
+    """Create MCP server parameters with environment inherited from parent process."""
+    # Start with the full environment - subprocess needs PATH, PYTHONPATH, etc.
+    env = dict(os.environ)
+    # Coverage support
     if "COVERAGE_PROCESS_START" in os.environ:
         env["COVERAGE_PROCESS_START"] = os.environ["COVERAGE_PROCESS_START"]
 
     return StdioServerParameters(
         command=sys.executable,
-        args=["-m", "typeagent.mcp.server"],
+        args=["-m", "typeagent.mcp.server", "--podcast-index", EPISODE_53_INDEX],
         env=env,
     )
 
@@ -90,17 +94,6 @@ async def test_mcp_server_query_conversation_slow(
     from mcp import ClientSession
     from mcp.client.stdio import stdio_client
 
-    # Pass through environment variables needed for authentication
-    # otherwise this test will fail in the CI on Windows only
-    if not (server_params.env) is None:
-        server_params.env.update(
-            {
-                k: v
-                for k, v in os.environ.items()
-                if k.startswith(("AZURE_", "OPENAI_")) or k in ("CREDENTIALS_JSON",)
-            }
-        )
-
     # Create client session and connect to server
     async with stdio_client(server_params) as (read, write):
         async with ClientSession(
@@ -133,6 +126,7 @@ async def test_mcp_server_query_conversation_slow(
             # Parse response (it should be JSON with success, answer, time_used)
             import json
 
+            print(f"Response text: {response_text}")
             try:
                 response_data = json.loads(response_text)
             except json.JSONDecodeError as e:

From 9c953198b44d19a65820b51c68e1fd0b523739df Mon Sep 17 00:00:00 2001
From: Bernhard Merkle <bernhard.merkle@gmail.com>
Date: Fri, 2 Jan 2026 23:29:51 +0100
Subject: [PATCH 6/6] - Add environment variable handling for authentication in
 test_mcp_server, or windows ci - removed debug output

---
 tests/test_mcp_server.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 9669e91..03fd0e6 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -94,6 +94,17 @@ async def test_mcp_server_query_conversation_slow(
     from mcp import ClientSession
     from mcp.client.stdio import stdio_client
 
+    # Pass through environment variables needed for authentication
+    # otherwise this test will fail in the CI on Windows only
+    if not (server_params.env) is None:
+        server_params.env.update(
+            {
+                k: v
+                for k, v in os.environ.items()
+                if k.startswith(("AZURE_", "OPENAI_")) or k in ("CREDENTIALS_JSON",)
+            }
+        )
+
     # Create client session and connect to server
     async with stdio_client(server_params) as (read, write):
         async with ClientSession(
@@ -126,7 +137,6 @@ async def test_mcp_server_query_conversation_slow(
             # Parse response (it should be JSON with success, answer, time_used)
             import json
 
-            print(f"Response text: {response_text}")
             try:
                 response_data = json.loads(response_text)
             except json.JSONDecodeError as e: