From 7816281e73a35b382fc78c38330b4d7a4e481a02 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 1 Jan 2026 15:36:49 +0100 Subject: [PATCH 1/5] 125-examples: move demos/sample/etc to examples: - moved demo to examples/demo - moved samples to examples/samples - moved gmail to tools/gmail - moved spec to docs/spec later we can delete/update outdated code/doc etc - adopted make and config files, ci, gitignore - adopted links in documentation - removed usused import in simple_query_demo.py --- .github/workflows/ci.yml | 4 ++-- .gitignore | 10 +++++----- AGENTS.md | 2 +- Makefile | 6 +++--- docs/demos.md | 2 +- docs/gmail.md | 2 +- {spec => docs/spec}/coverage.txt | 0 {spec => docs/spec}/indexes_overview.md | 0 {spec => docs/spec}/storage_future_extensions.md | 0 .../spec}/storage_immediate_implementation.md | 0 {spec => docs/spec}/storage_implementation_plan.md | 0 {spec => docs/spec}/storage_spec.md | 0 {demo => examples/demo}/README.md | 4 ++-- {demo => examples/demo}/ingest.py | 0 {demo => examples/demo}/query.py | 0 {demo => examples/demo}/testdata.txt | 0 examples/simple_query_demo.py | 12 +++++------- make.bat | 6 +++--- {gmail => tools/gmail}/gmail_dump.py | 0 19 files changed, 23 insertions(+), 25 deletions(-) rename {spec => docs/spec}/coverage.txt (100%) rename {spec => docs/spec}/indexes_overview.md (100%) rename {spec => docs/spec}/storage_future_extensions.md (100%) rename {spec => docs/spec}/storage_immediate_implementation.md (100%) rename {spec => docs/spec}/storage_implementation_plan.md (100%) rename {spec => docs/spec}/storage_spec.md (100%) rename {demo => examples/demo}/README.md (74%) rename {demo => examples/demo}/ingest.py (100%) rename {demo => examples/demo}/query.py (100%) rename {demo => examples/demo}/testdata.txt (100%) rename {gmail => tools/gmail}/gmail_dump.py (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88e2112..fa67f4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,13 +86,13 @@ jobs: shell: bash if: matrix.task == 'check' run: | - uv run pyright src tests tools gmail + uv run pyright src tests examples - name: Run Format shell: bash if: matrix.task == 'format' run: | - uv run black -tpy312 -tpy313 -tpy314 src tests tools gmail demo --check + uv run black -tpy312 -tpy313 -tpy314 src tests tools examples --check - name: Login to Azure if: matrix.task == 'test' diff --git a/.gitignore b/.gitignore index 55acbaa..3fac9b7 100644 --- a/.gitignore +++ b/.gitignore @@ -30,13 +30,13 @@ pytest.local.ini # Evaluations /evals -/testdata/Episode_53_Answer_results.json -/testdata/Episode_53_Search_results.json +/examples/testdata/Episode_53_Answer_results.json +/examples/testdata/Episode_53_Search_results.json # Email demo -/gmail/client_secret.json -/gmail/token.json +/tools/gmail/client_secret.json +/tools/gmail/token.json *_dump/ # Monty Python demo -/testdata/MP +/examples/testdata/MP diff --git a/AGENTS.md b/AGENTS.md index 2ebf0a8..148180f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,7 +13,7 @@ When moving, copying or deleting files, use the git commands: `git mv`, `git cp` - Activate `.venv`: `make venv; source .venv/bin/activate` (run this only once) - To get API keys in ad-hoc code, call `typeagent.aitools.utils.load_dotenv()` - Use `pytest test` to run tests in test/ -- Use `pyright` to check type annotations in tools/, test/, typeagent/, gmail/ +- Use `pyright` to check type annotations in src/, tools/, tests/, examples/ - Ignore build/, dist/ - You can also use the pylance extension for type checking in VS Code - Use `make check` to type-check all files diff --git a/Makefile b/Makefile index c3382c4..957a0c3 100644 --- a/Makefile +++ b/Makefile @@ -8,12 +8,12 @@ all: venv format check test build .PHONY: format format: venv - .venv/bin/isort src tests tools gmail demo $(FLAGS) - .venv/bin/black -tpy312 -tpy313 -tpy314 src tests tools gmail demo $(FLAGS) + .venv/bin/isort src tests tools examples $(FLAGS) + .venv/bin/black -tpy312 -tpy313 -tpy314 src tests tools examples $(FLAGS) .PHONY: check check: venv - .venv/bin/pyright --pythonpath .venv/bin/python src tests tools gmail + .venv/bin/pyright --pythonpath .venv/bin/python src tests tools examples .PHONY: test test: venv diff --git a/docs/demos.md b/docs/demos.md index 037f6a2..bc0fa99 100644 --- a/docs/demos.md +++ b/docs/demos.md @@ -64,7 +64,7 @@ tool that we used for the Monty Python demo. ### How to use the Gmail API to download messages -In the `gmail/` folder you'll find a tool named `gmail_dump.py` which +In the `tools/gmail/` folder you'll find a tool named `gmail_dump.py` which will download any number of messages (default 50) using the Gmail API. In order to use the Gmail API, however, you have to create a (free) Google Cloud app and configure it appropriately. diff --git a/docs/gmail.md b/docs/gmail.md index d2ef5d8..0bd558c 100644 --- a/docs/gmail.md +++ b/docs/gmail.md @@ -1,6 +1,6 @@ # Extracting GMail Messages -There's a helper script in the repo under `gmail/`. +There's a helper script in the repo under `tools/gmail/`. It requires setting up and creating a Google API project. Until we have time to write this up, your best bet is to ask your favorite search engine or LLM-based chat bot for help. diff --git a/spec/coverage.txt b/docs/spec/coverage.txt similarity index 100% rename from spec/coverage.txt rename to docs/spec/coverage.txt diff --git a/spec/indexes_overview.md b/docs/spec/indexes_overview.md similarity index 100% rename from spec/indexes_overview.md rename to docs/spec/indexes_overview.md diff --git a/spec/storage_future_extensions.md b/docs/spec/storage_future_extensions.md similarity index 100% rename from spec/storage_future_extensions.md rename to docs/spec/storage_future_extensions.md diff --git a/spec/storage_immediate_implementation.md b/docs/spec/storage_immediate_implementation.md similarity index 100% rename from spec/storage_immediate_implementation.md rename to docs/spec/storage_immediate_implementation.md diff --git a/spec/storage_implementation_plan.md b/docs/spec/storage_implementation_plan.md similarity index 100% rename from spec/storage_implementation_plan.md rename to docs/spec/storage_implementation_plan.md diff --git a/spec/storage_spec.md b/docs/spec/storage_spec.md similarity index 100% rename from spec/storage_spec.md rename to docs/spec/storage_spec.md diff --git a/demo/README.md b/examples/demo/README.md similarity index 74% rename from demo/README.md rename to examples/demo/README.md index 7466c58..bda9f71 100644 --- a/demo/README.md +++ b/examples/demo/README.md @@ -1,7 +1,7 @@ # Demo scripts The files here are the scripts from -[Getting Started](../docs/getting-started.md). +[Getting Started](../../docs/getting-started.md). - [ingest.py](ingest.py): The ingestion script. - [query.py](query.py): The query script. @@ -9,4 +9,4 @@ The files here are the scripts from Note that for any of this to work you need to acquire an OpenAI API key and set some variables; see -[Environment Variables](../docs/env-vars.md). +[Environment Variables](../../docs/env-vars.md). diff --git a/demo/ingest.py b/examples/demo/ingest.py similarity index 100% rename from demo/ingest.py rename to examples/demo/ingest.py diff --git a/demo/query.py b/examples/demo/query.py similarity index 100% rename from demo/query.py rename to examples/demo/query.py diff --git a/demo/testdata.txt b/examples/demo/testdata.txt similarity index 100% rename from demo/testdata.txt rename to examples/demo/testdata.txt diff --git a/examples/simple_query_demo.py b/examples/simple_query_demo.py index 6025673..cbf54ed 100644 --- a/examples/simple_query_demo.py +++ b/examples/simple_query_demo.py @@ -14,9 +14,7 @@ import asyncio from typeagent import create_conversation -from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.utils import load_dotenv -from typeagent.knowpro.convsettings import ConversationSettings from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta @@ -44,9 +42,7 @@ async def main(): metadata=TranscriptMessageMeta(speaker="Instructor"), ), TranscriptMessage( - text_chunks=[ - "Python is a great language for beginners and experts alike." - ], + text_chunks=["Python is a great language for beginners and experts alike."], metadata=TranscriptMessageMeta(speaker="Instructor"), ), TranscriptMessage( @@ -64,8 +60,10 @@ async def main(): print("Adding messages and building indexes...") result = await conv.add_messages_with_indexing(messages) print(f"Conversation ready with {await conv.messages.size()} messages.") - print(f"Added {result.messages_added} messages, {result.semrefs_added} semantic refs") - + print( + f"Added {result.messages_added} messages, {result.semrefs_added} semantic refs" + ) + # Check indexes if conv.secondary_indexes: if conv.secondary_indexes.message_index: diff --git a/make.bat b/make.bat index d4a5dfb..fb4f960 100644 --- a/make.bat +++ b/make.bat @@ -26,14 +26,14 @@ goto help :format if not exist ".venv\" call make.bat venv echo Formatting code... -.venv\Scripts\isort src tests tools gmail demo -.venv\Scripts\black src tests tools gmail demo +.venv\Scripts\isort src tests tools examples +.venv\Scripts\black src tests tools examples goto end :check if not exist ".venv\" call make.bat venv echo Running type checks... -.venv\Scripts\pyright --pythonpath .venv\Scripts\python src tests tools gmail demo +.venv\Scripts\pyright --pythonpath .venv\Scripts\python src tests tools examples goto end :test diff --git a/gmail/gmail_dump.py b/tools/gmail/gmail_dump.py similarity index 100% rename from gmail/gmail_dump.py rename to tools/gmail/gmail_dump.py From 3e6f9db89dd7149242bcb6a54526363787d26dd8 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 1 Jan 2026 15:53:51 +0100 Subject: [PATCH 2/5] tools shall also be checked --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa67f4c..3f89c78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,7 +86,7 @@ jobs: shell: bash if: matrix.task == 'check' run: | - uv run pyright src tests examples + uv run pyright src tests tools examples - name: Run Format shell: bash From 11f9f1e6a00c5e4f83a8edbdbfc57bf8040d281e Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 1 Jan 2026 19:02:21 +0100 Subject: [PATCH 3/5] fixed path --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 3fac9b7..ca4cb7b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,8 +30,8 @@ pytest.local.ini # Evaluations /evals -/examples/testdata/Episode_53_Answer_results.json -/examples/testdata/Episode_53_Search_results.json +/tests/testdata/Episode_53_Answer_results.json +/tests/testdata/Episode_53_Search_results.json # Email demo /tools/gmail/client_secret.json From 4c32bff4aa967a9b1443724d3733eaae31b30541 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 1 Jan 2026 19:13:09 +0100 Subject: [PATCH 4/5] fixed remaining paths after move of tests --- docs/demos.md | 10 +++++----- src/typeagent/mcp/server.py | 2 +- tests/test/test_transcripts.py | 10 +++++----- tools/query.py | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/demos.md b/docs/demos.md index bc0fa99..8c61423 100644 --- a/docs/demos.md +++ b/docs/demos.md @@ -96,21 +96,21 @@ your `*.eml` files from -- every email provider has its own quirks. The podcast demo is actually the easiest to run: The "database" is included in the repo as -`testdata/Episode_53_AdrianTchaikovsky_index*`, +`tests/testdata/Episode_53_AdrianTchaikovsky_index*`, and this is in fact the default "database" used by `tools/query.py` when no `-d`/`--database` flag is given. -This "database" indexes `test/Episode_53_AdrianTchaikovsky.txt`. +This "database" indexes `tests/testdata/Episode_53_AdrianTchaikovsky.txt`. It was created by a one-off script that invoked -`typeagent/podcast/podcast_ingest/ingest_podcast()` +`src/typeagent/podcast/podcast_ingest/ingest_podcast()` and saved to two files by calling the `.ingest()` method on the -returned `typeagent/podcasts/podcast/Podcast` object. +returned `src/typeagent/podcasts/podcast/Podcast` object. Here's a brief sample session: ```sh $ python tools/query.py 1.318s -- Using Azure OpenAI -0.054s -- Loading podcast from 'testdata/Episode_53_AdrianTchaikovsky_index' +0.054s -- Loading podcast from 'tests/testdata/Episode_53_AdrianTchaikovsky_index' TypeAgent demo UI 0.2 (type 'q' to exit) TypeAgent> What did Kevin say to Adrian about science fiction? -------------------------------------------------- diff --git a/src/typeagent/mcp/server.py b/src/typeagent/mcp/server.py index 80b7c1b..0c106ec 100644 --- a/src/typeagent/mcp/server.py +++ b/src/typeagent/mcp/server.py @@ -167,7 +167,7 @@ async def load_podcast_index_or_database( ) -> query.QueryEvalContext[podcast.PodcastMessage, Any]: if dbname is None: conversation = await podcast.Podcast.read_from_file( - "testdata/Episode_53_AdrianTchaikovsky_index", settings + "tests/testdata/Episode_53_AdrianTchaikovsky_index", settings ) else: conversation = await podcast.Podcast.create(settings) diff --git a/tests/test/test_transcripts.py b/tests/test/test_transcripts.py index 114b831..83faa6f 100644 --- a/tests/test/test_transcripts.py +++ b/tests/test/test_transcripts.py @@ -67,11 +67,11 @@ def test_webvtt_timestamp_conversion(): @pytest.mark.skipif( - not os.path.exists("testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" ) def test_get_transcript_info(): """Test getting basic information from a VTT file.""" - vtt_file = "testdata/Confuse-A-Cat.vtt" + vtt_file = "tests/testdata/Confuse-A-Cat.vtt" # Test duration duration = get_transcript_duration(vtt_file) @@ -92,7 +92,7 @@ def conversation_settings( @pytest.mark.skipif( - not os.path.exists("testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" ) @pytest.mark.asyncio async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings): @@ -106,7 +106,7 @@ async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex from typeagent.transcripts.transcript_ingest import parse_voice_tags - vtt_file = "testdata/Confuse-A-Cat.vtt" + vtt_file = "tests/testdata/Confuse-A-Cat.vtt" # Use in-memory storage to avoid database cleanup issues settings = conversation_settings @@ -262,7 +262,7 @@ async def test_transcript_knowledge_extraction_slow( settings = ConversationSettings(embedding_model) # Parse first 5 captions from Parrot Sketch - vtt_file = "testdata/Parrot_Sketch.vtt" + vtt_file = "tests/testdata/Parrot_Sketch.vtt" if not os.path.exists(vtt_file): pytest.skip(f"Test file {vtt_file} not found") diff --git a/tools/query.py b/tools/query.py index b71af7b..d11f643 100644 --- a/tools/query.py +++ b/tools/query.py @@ -925,14 +925,14 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser: ), ) - default_podcast_file = "testdata/Episode_53_AdrianTchaikovsky_index" + default_podcast_file = "tests/testdata/Episode_53_AdrianTchaikovsky_index" parser.add_argument( "--podcast", type=str, default=default_podcast_file, help="Path to the podcast index files (excluding the '_index.json' suffix)", ) - default_qafile = "testdata/Episode_53_Answer_results.json" + default_qafile = "tests/testdata/Episode_53_Answer_results.json" explain_qa = "a list of questions and answers to test the full pipeline" parser.add_argument( "--qafile", @@ -940,7 +940,7 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser: default=default_qafile, help=f"Path to the Answer_results.json file ({explain_qa})", ) - default_srfile = "testdata/Episode_53_Search_results.json" + default_srfile = "tests/testdata/Episode_53_Search_results.json" explain_sr = "a list of intermediate results from stages 1, 2 and 3" parser.add_argument( "--srfile", From ee028f2299a09786303ba6276d7b534533065368 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Thu, 1 Jan 2026 19:14:34 +0100 Subject: [PATCH 5/5] reformat --- tests/test/test_transcripts.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test/test_transcripts.py b/tests/test/test_transcripts.py index 83faa6f..2efef03 100644 --- a/tests/test/test_transcripts.py +++ b/tests/test/test_transcripts.py @@ -67,7 +67,8 @@ def test_webvtt_timestamp_conversion(): @pytest.mark.skipif( - not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), + reason="Test VTT file not found", ) def test_get_transcript_info(): """Test getting basic information from a VTT file.""" @@ -92,7 +93,8 @@ def conversation_settings( @pytest.mark.skipif( - not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), + reason="Test VTT file not found", ) @pytest.mark.asyncio async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings):