diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88e21128..3f89c78a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,13 +86,13 @@ jobs: shell: bash if: matrix.task == 'check' run: | - uv run pyright src tests tools gmail + uv run pyright src tests tools examples - name: Run Format shell: bash if: matrix.task == 'format' run: | - uv run black -tpy312 -tpy313 -tpy314 src tests tools gmail demo --check + uv run black -tpy312 -tpy313 -tpy314 src tests tools examples --check - name: Login to Azure if: matrix.task == 'test' diff --git a/.gitignore b/.gitignore index 55acbaab..ca4cb7b2 100644 --- a/.gitignore +++ b/.gitignore @@ -30,13 +30,13 @@ pytest.local.ini # Evaluations /evals -/testdata/Episode_53_Answer_results.json -/testdata/Episode_53_Search_results.json +/tests/testdata/Episode_53_Answer_results.json +/tests/testdata/Episode_53_Search_results.json # Email demo -/gmail/client_secret.json -/gmail/token.json +/tools/gmail/client_secret.json +/tools/gmail/token.json *_dump/ # Monty Python demo -/testdata/MP +/examples/testdata/MP diff --git a/AGENTS.md b/AGENTS.md index 2ebf0a85..148180fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,7 +13,7 @@ When moving, copying or deleting files, use the git commands: `git mv`, `git cp` - Activate `.venv`: `make venv; source .venv/bin/activate` (run this only once) - To get API keys in ad-hoc code, call `typeagent.aitools.utils.load_dotenv()` - Use `pytest test` to run tests in test/ -- Use `pyright` to check type annotations in tools/, test/, typeagent/, gmail/ +- Use `pyright` to check type annotations in src/, tools/, tests/, examples/ - Ignore build/, dist/ - You can also use the pylance extension for type checking in VS Code - Use `make check` to type-check all files diff --git a/Makefile b/Makefile index c3382c49..957a0c34 100644 --- a/Makefile +++ b/Makefile @@ -8,12 +8,12 @@ all: venv format check test build .PHONY: format format: venv - .venv/bin/isort src tests tools gmail demo $(FLAGS) - .venv/bin/black -tpy312 -tpy313 -tpy314 src tests tools gmail demo $(FLAGS) + .venv/bin/isort src tests tools examples $(FLAGS) + .venv/bin/black -tpy312 -tpy313 -tpy314 src tests tools examples $(FLAGS) .PHONY: check check: venv - .venv/bin/pyright --pythonpath .venv/bin/python src tests tools gmail + .venv/bin/pyright --pythonpath .venv/bin/python src tests tools examples .PHONY: test test: venv diff --git a/docs/demos.md b/docs/demos.md index 037f6a2d..8c61423a 100644 --- a/docs/demos.md +++ b/docs/demos.md @@ -64,7 +64,7 @@ tool that we used for the Monty Python demo. ### How to use the Gmail API to download messages -In the `gmail/` folder you'll find a tool named `gmail_dump.py` which +In the `tools/gmail/` folder you'll find a tool named `gmail_dump.py` which will download any number of messages (default 50) using the Gmail API. In order to use the Gmail API, however, you have to create a (free) Google Cloud app and configure it appropriately. @@ -96,21 +96,21 @@ your `*.eml` files from -- every email provider has its own quirks. The podcast demo is actually the easiest to run: The "database" is included in the repo as -`testdata/Episode_53_AdrianTchaikovsky_index*`, +`tests/testdata/Episode_53_AdrianTchaikovsky_index*`, and this is in fact the default "database" used by `tools/query.py` when no `-d`/`--database` flag is given. -This "database" indexes `test/Episode_53_AdrianTchaikovsky.txt`. +This "database" indexes `tests/testdata/Episode_53_AdrianTchaikovsky.txt`. It was created by a one-off script that invoked -`typeagent/podcast/podcast_ingest/ingest_podcast()` +`src/typeagent/podcast/podcast_ingest/ingest_podcast()` and saved to two files by calling the `.ingest()` method on the -returned `typeagent/podcasts/podcast/Podcast` object. +returned `src/typeagent/podcasts/podcast/Podcast` object. Here's a brief sample session: ```sh $ python tools/query.py 1.318s -- Using Azure OpenAI -0.054s -- Loading podcast from 'testdata/Episode_53_AdrianTchaikovsky_index' +0.054s -- Loading podcast from 'tests/testdata/Episode_53_AdrianTchaikovsky_index' TypeAgent demo UI 0.2 (type 'q' to exit) TypeAgent> What did Kevin say to Adrian about science fiction? -------------------------------------------------- diff --git a/docs/gmail.md b/docs/gmail.md index d2ef5d83..0bd558c4 100644 --- a/docs/gmail.md +++ b/docs/gmail.md @@ -1,6 +1,6 @@ # Extracting GMail Messages -There's a helper script in the repo under `gmail/`. +There's a helper script in the repo under `tools/gmail/`. It requires setting up and creating a Google API project. Until we have time to write this up, your best bet is to ask your favorite search engine or LLM-based chat bot for help. diff --git a/spec/coverage.txt b/docs/spec/coverage.txt similarity index 100% rename from spec/coverage.txt rename to docs/spec/coverage.txt diff --git a/spec/indexes_overview.md b/docs/spec/indexes_overview.md similarity index 100% rename from spec/indexes_overview.md rename to docs/spec/indexes_overview.md diff --git a/spec/storage_future_extensions.md b/docs/spec/storage_future_extensions.md similarity index 100% rename from spec/storage_future_extensions.md rename to docs/spec/storage_future_extensions.md diff --git a/spec/storage_immediate_implementation.md b/docs/spec/storage_immediate_implementation.md similarity index 100% rename from spec/storage_immediate_implementation.md rename to docs/spec/storage_immediate_implementation.md diff --git a/spec/storage_implementation_plan.md b/docs/spec/storage_implementation_plan.md similarity index 100% rename from spec/storage_implementation_plan.md rename to docs/spec/storage_implementation_plan.md diff --git a/spec/storage_spec.md b/docs/spec/storage_spec.md similarity index 100% rename from spec/storage_spec.md rename to docs/spec/storage_spec.md diff --git a/demo/README.md b/examples/demo/README.md similarity index 74% rename from demo/README.md rename to examples/demo/README.md index 7466c580..bda9f71a 100644 --- a/demo/README.md +++ b/examples/demo/README.md @@ -1,7 +1,7 @@ # Demo scripts The files here are the scripts from -[Getting Started](../docs/getting-started.md). +[Getting Started](../../docs/getting-started.md). - [ingest.py](ingest.py): The ingestion script. - [query.py](query.py): The query script. @@ -9,4 +9,4 @@ The files here are the scripts from Note that for any of this to work you need to acquire an OpenAI API key and set some variables; see -[Environment Variables](../docs/env-vars.md). +[Environment Variables](../../docs/env-vars.md). diff --git a/demo/ingest.py b/examples/demo/ingest.py similarity index 100% rename from demo/ingest.py rename to examples/demo/ingest.py diff --git a/demo/query.py b/examples/demo/query.py similarity index 100% rename from demo/query.py rename to examples/demo/query.py diff --git a/demo/testdata.txt b/examples/demo/testdata.txt similarity index 100% rename from demo/testdata.txt rename to examples/demo/testdata.txt diff --git a/examples/simple_query_demo.py b/examples/simple_query_demo.py index 6025673e..cbf54edd 100644 --- a/examples/simple_query_demo.py +++ b/examples/simple_query_demo.py @@ -14,9 +14,7 @@ import asyncio from typeagent import create_conversation -from typeagent.aitools.embeddings import AsyncEmbeddingModel from typeagent.aitools.utils import load_dotenv -from typeagent.knowpro.convsettings import ConversationSettings from typeagent.transcripts.transcript import TranscriptMessage, TranscriptMessageMeta @@ -44,9 +42,7 @@ async def main(): metadata=TranscriptMessageMeta(speaker="Instructor"), ), TranscriptMessage( - text_chunks=[ - "Python is a great language for beginners and experts alike." - ], + text_chunks=["Python is a great language for beginners and experts alike."], metadata=TranscriptMessageMeta(speaker="Instructor"), ), TranscriptMessage( @@ -64,8 +60,10 @@ async def main(): print("Adding messages and building indexes...") result = await conv.add_messages_with_indexing(messages) print(f"Conversation ready with {await conv.messages.size()} messages.") - print(f"Added {result.messages_added} messages, {result.semrefs_added} semantic refs") - + print( + f"Added {result.messages_added} messages, {result.semrefs_added} semantic refs" + ) + # Check indexes if conv.secondary_indexes: if conv.secondary_indexes.message_index: diff --git a/make.bat b/make.bat index d4a5dfb2..fb4f960b 100644 --- a/make.bat +++ b/make.bat @@ -26,14 +26,14 @@ goto help :format if not exist ".venv\" call make.bat venv echo Formatting code... -.venv\Scripts\isort src tests tools gmail demo -.venv\Scripts\black src tests tools gmail demo +.venv\Scripts\isort src tests tools examples +.venv\Scripts\black src tests tools examples goto end :check if not exist ".venv\" call make.bat venv echo Running type checks... -.venv\Scripts\pyright --pythonpath .venv\Scripts\python src tests tools gmail demo +.venv\Scripts\pyright --pythonpath .venv\Scripts\python src tests tools examples goto end :test diff --git a/src/typeagent/mcp/server.py b/src/typeagent/mcp/server.py index 80b7c1b8..0c106ec7 100644 --- a/src/typeagent/mcp/server.py +++ b/src/typeagent/mcp/server.py @@ -167,7 +167,7 @@ async def load_podcast_index_or_database( ) -> query.QueryEvalContext[podcast.PodcastMessage, Any]: if dbname is None: conversation = await podcast.Podcast.read_from_file( - "testdata/Episode_53_AdrianTchaikovsky_index", settings + "tests/testdata/Episode_53_AdrianTchaikovsky_index", settings ) else: conversation = await podcast.Podcast.create(settings) diff --git a/tests/test/test_transcripts.py b/tests/test/test_transcripts.py index 114b8312..2efef03d 100644 --- a/tests/test/test_transcripts.py +++ b/tests/test/test_transcripts.py @@ -67,11 +67,12 @@ def test_webvtt_timestamp_conversion(): @pytest.mark.skipif( - not os.path.exists("testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), + reason="Test VTT file not found", ) def test_get_transcript_info(): """Test getting basic information from a VTT file.""" - vtt_file = "testdata/Confuse-A-Cat.vtt" + vtt_file = "tests/testdata/Confuse-A-Cat.vtt" # Test duration duration = get_transcript_duration(vtt_file) @@ -92,7 +93,8 @@ def conversation_settings( @pytest.mark.skipif( - not os.path.exists("testdata/Confuse-A-Cat.vtt"), reason="Test VTT file not found" + not os.path.exists("tests/testdata/Confuse-A-Cat.vtt"), + reason="Test VTT file not found", ) @pytest.mark.asyncio async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings): @@ -106,7 +108,7 @@ async def test_ingest_vtt_transcript(conversation_settings: ConversationSettings from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex from typeagent.transcripts.transcript_ingest import parse_voice_tags - vtt_file = "testdata/Confuse-A-Cat.vtt" + vtt_file = "tests/testdata/Confuse-A-Cat.vtt" # Use in-memory storage to avoid database cleanup issues settings = conversation_settings @@ -262,7 +264,7 @@ async def test_transcript_knowledge_extraction_slow( settings = ConversationSettings(embedding_model) # Parse first 5 captions from Parrot Sketch - vtt_file = "testdata/Parrot_Sketch.vtt" + vtt_file = "tests/testdata/Parrot_Sketch.vtt" if not os.path.exists(vtt_file): pytest.skip(f"Test file {vtt_file} not found") diff --git a/gmail/gmail_dump.py b/tools/gmail/gmail_dump.py similarity index 100% rename from gmail/gmail_dump.py rename to tools/gmail/gmail_dump.py diff --git a/tools/query.py b/tools/query.py index b71af7b3..d11f643e 100644 --- a/tools/query.py +++ b/tools/query.py @@ -925,14 +925,14 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser: ), ) - default_podcast_file = "testdata/Episode_53_AdrianTchaikovsky_index" + default_podcast_file = "tests/testdata/Episode_53_AdrianTchaikovsky_index" parser.add_argument( "--podcast", type=str, default=default_podcast_file, help="Path to the podcast index files (excluding the '_index.json' suffix)", ) - default_qafile = "testdata/Episode_53_Answer_results.json" + default_qafile = "tests/testdata/Episode_53_Answer_results.json" explain_qa = "a list of questions and answers to test the full pipeline" parser.add_argument( "--qafile", @@ -940,7 +940,7 @@ def make_arg_parser(description: str) -> argparse.ArgumentParser: default=default_qafile, help=f"Path to the Answer_results.json file ({explain_qa})", ) - default_srfile = "testdata/Episode_53_Search_results.json" + default_srfile = "tests/testdata/Episode_53_Search_results.json" explain_sr = "a list of intermediate results from stages 1, 2 and 3" parser.add_argument( "--srfile",