diff --git a/.github/workflows/ensure_clean.yml b/.github/workflows/ensure_clean.yml new file mode 100644 index 0000000..fdb2fa3 --- /dev/null +++ b/.github/workflows/ensure_clean.yml @@ -0,0 +1,55 @@ +name: Ensure files are formatted. + +on: [workflow_dispatch, push] + +jobs: + ensure_formatted: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Setup Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: ${{ env.POETRY_VERSION }} + + - name: Install Python + id: install-python + uses: actions/setup-python@v5 + with: + python-version-file: .python-version + cache: "poetry" + + - uses: actions/cache@v4 + with: + key: style-${{ hashFiles('**/poetry.lock') }} + path: ./.venv + restore-keys: | + style- + + - name: Install Python Dependencies + run: | + poetry install --sync + + - name: Normalize all files + run: | + source .venv/bin/activate + git add --renormalize . + + - name: Format all python files + run: | + source .venv/bin/activate + black `git ls-files '*.py'` + isort `git ls-files '*.py'` + + - name: Make sure poetry.lock is in sync + run: | + poetry check --lock + + - name: Check that no files were changed + run: | + source .venv/bin/activate + git --no-pager diff + git status --porcelain + [ -z "$(git status --porcelain)" ] diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..450178b --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12.7 \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..47e7cd1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,207 @@ +# Contributing Guide + +We welcome all types of contributions to Supermat, including bug reports, feature requests, code improvements, documentation updates, and tests. Please follow the guidelines below to help us maintain a high-quality, collaborative project. + +--- + +## Table of Contents + +- [Contributing Guide](#contributing-guide) + - [Table of Contents](#table-of-contents) + - [1. Development Setup](#1-development-setup) + - [2. Branch \& Commit Naming Conventions](#2-branch--commit-naming-conventions) + - [Branch Naming](#branch-naming) + - [Commit Messages](#commit-messages) + - [3. Issue Reporting Guidelines](#3-issue-reporting-guidelines) + - [4. Testing](#4-testing) + - [5. Documentation](#5-documentation) + - [6. Code Formatting](#6-code-formatting) + - [7. Code of Conduct](#7-code-of-conduct) + - [8. Final Steps](#8-final-steps) + +--- + +## 1. Development Setup + +To set up your local development environment: + +1. **Clone the Repository** + + ```bash + git clone https://github.com/SupermatAI/supermat.git + cd supermat + ``` + +2. **Install Dependencies with Poetry** + Ensure you have [Python Poetry](https://python-poetry.org/) installed. Then run: + + ```bash + poetry install --with=dev,docs,frontend --all-extras + ``` + +3. **Run the Application** + To see Supermat in action via the Gradio interface, run: + + ```bash + python -m supermat.gradio + ``` + +4. **Virtual Environment (Optional)** + Poetry automatically handles virtual environments. If needed, refer to Poetry’s documentation for managing virtual environments. + +--- + +## 2. Branch & Commit Naming Conventions + +We follow [Conventional Commits](https://www.conventionalcommits.org/) and semantic versioning principles for clarity and automation. This ensures that commit messages also help determine version bumps (e.g., breaking changes lead to major version increments, features to minor, fixes to patch). + +### Branch Naming + +Branches should be named in the following format: + +``` +/- +``` + +- **``:** One of the following: + - `feat` (new feature) + - `fix` (bug fix) + - `docs` (documentation changes) + - `style` (formatting, no functional changes) + - `refactor` (code refactoring) + - `test` (adding or updating tests) + - `chore` (maintenance tasks) +- **``:** Reference the related issue (if applicable). +- **``:** A brief, hyphen-separated summary. + +**Examples:** + +- `feat/123-add-user-auth` +- `fix/456-correct-api-endpoint` +- `docs/789-update-readme` + +### Commit Messages + +Commit messages should follow this structure: + +``` +(): +``` + +- **``:** Same as above (e.g., `feat`, `fix`). +- **``:** (Optional) The module or area affected. +- **``:** A concise summary in the imperative mood (e.g., “add,” “fix,” “update”). + +**Examples:** + +- `feat(auth): add JWT-based user authentication` +- `fix(api): correct endpoint URL for data retrieval` +- `docs: update installation instructions` + +For breaking changes, either append an exclamation mark after the type or include a `BREAKING CHANGE:` footer: + +- `feat!: overhaul authentication system` +- Or: + + ```markdown + feat(auth): update user authentication + + BREAKING CHANGE: The authentication API has changed; please update your integration accordingly. + ``` + +--- + +## 3. Issue Reporting Guidelines + +When opening a new issue, please include the following information: + +- **Title:** A clear and descriptive title summarizing the problem. +- **Description:** A detailed explanation of the issue. +- **Steps to Reproduce:** Provide step-by-step instructions that allow us to reproduce the problem. +- **Expected vs. Actual Behavior:** Describe what you expected to happen and what actually occurred. +- **Environment Details:** Include OS, Python version, and any other relevant setup details. +- **Screenshots/Logs:** Attach screenshots or error logs if available. + +--- + +## 4. Testing + +We use [pytest](https://docs.pytest.org/) for running our test suite. + +- **Run Tests Locally:** + + Simply execute: + + ```bash + pytest + ``` + +- **Before Submitting a PR:** + Ensure that all tests pass. + +--- + +## 5. Documentation + +Our documentation is built with [MkDocs](https://www.mkdocs.org/). + +- **Viewing Documentation Locally:** + + Run: + + ```bash + mkdocs serve + ``` + + This command will start a local server so you can preview the docs as you work on them. +- **Contributing Changes:** + Please follow the structure in the `docs/` folder and update `SUMMARY.md` if you add new pages. + +--- + +## 6. Code Formatting + +We use [black](https://black.readthedocs.io/) and [isort](https://pycqa.github.io/isort/) to enforce a consistent code style. Configuration is managed in `pyproject.toml`. + +- **Format Code with Black:** + + ```bash + black . + ``` + +- **Sort Imports with isort:** + + ```bash + isort . + ``` + +Please run these tools on your changes before submitting a pull request. + +--- + +## 7. Code of Conduct + +To maintain a friendly and inclusive environment, we ask all contributors to follow our Code of Conduct. We are adopting the [Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/code_of_conduct.md) as our Code of Conduct. + +- **Location:** + A copy of the Code of Conduct should be added to the repository as `CODE_OF_CONDUCT.md`. +- **Expectations:** + All contributors are expected to treat others with respect and to refrain from harassment or exclusionary behavior. +- **Reporting:** + If you experience or witness any violations, please report them to the project maintainers via the contact details provided in the Code of Conduct. + +--- + +## 8. Final Steps + +- **Pull Requests:** + - Ensure that your branch name and commit messages adhere to the guidelines above. + - Run all tests and format your code before submission. +- **Review Process:** + Your pull request will undergo review and feedback. Please address any requested changes promptly. +- **Questions:** + If you have any questions about contributing, feel free to open an issue or join our discussions. + +Thank you for contributing to Supermat and helping us build a better project! + +--- diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 27a401a..0000000 --- a/requirements.txt +++ /dev/null @@ -1,244 +0,0 @@ -aiofiles==23.2.1 ; python_version >= "3.12" and python_version < "4.0" -aiohappyeyeballs==2.4.6 ; python_version >= "3.12" and python_version < "4.0" -aiohttp==3.11.12 ; python_version >= "3.12" and python_version < "4.0" -aiosignal==1.3.2 ; python_version >= "3.12" and python_version < "4.0" -alabaster==0.7.16 ; python_version >= "3.12" and python_version < "4.0" -annotated-types==0.7.0 ; python_version >= "3.12" and python_version < "4.0" -anthropic==0.45.2 ; python_version >= "3.12" and python_version < "4.0" -anyio==4.8.0 ; python_version >= "3.12" and python_version < "4.0" -appscript==1.3.0 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "darwin" -asgiref==3.8.1 ; python_version >= "3.12" and python_version < "4.0" -asttokens==3.0.0 ; python_version >= "3.12" and python_version < "4.0" -attrs==25.1.0 ; python_version >= "3.12" and python_version < "4.0" -audioop-lts==0.2.1 ; python_version >= "3.13" and python_version < "4.0" -babel==2.17.0 ; python_version >= "3.12" and python_version < "4.0" -backoff==2.2.1 ; python_version >= "3.12" and python_version < "4.0" -bcrypt==4.2.1 ; python_version >= "3.12" and python_version < "4.0" -beautifulsoup4==4.13.3 ; python_version >= "3.12" and python_version < "4.0" -blis==0.7.11 ; python_version >= "3.12" and python_version < "4.0" -build==1.2.2.post1 ; python_version >= "3.12" and python_version < "4.0" -cachetools==5.5.1 ; python_version >= "3.12" and python_version < "4.0" -catalogue==2.0.10 ; python_version >= "3.12" and python_version < "4.0" -certifi==2025.1.31 ; python_version >= "3.12" and python_version < "4.0" -chardet==5.2.0 ; python_version >= "3.12" and python_version < "4.0" -charset-normalizer==3.4.1 ; python_version >= "3.12" and python_version < "4.0" -chroma-hnswlib==0.7.6 ; python_version >= "3.12" and python_version < "4.0" -chromadb==0.5.20 ; python_version >= "3.12" and python_version < "4.0" -click==8.1.8 ; python_version >= "3.12" and python_version < "4.0" -cloudpathlib==0.20.0 ; python_version >= "3.12" and python_version < "4.0" -colorama==0.4.6 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Windows" or python_version >= "3.12" and python_version < "4.0" and os_name == "nt" or python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32" -coloredlogs==15.0.1 ; python_version >= "3.12" and python_version < "4.0" -comm==0.2.2 ; python_version >= "3.12" and python_version < "4.0" -confection==0.1.5 ; python_version >= "3.12" and python_version < "4.0" -cymem==2.0.11 ; python_version >= "3.12" and python_version < "4.0" -dataclasses-json==0.6.7 ; python_version >= "3.12" and python_version < "4.0" -decorator==5.1.1 ; python_version >= "3.12" and python_version < "4.0" -deprecated==1.2.18 ; python_version >= "3.12" and python_version < "4.0" -distro==1.9.0 ; python_version >= "3.12" and python_version < "4.0" -docutils==0.20.1 ; python_version >= "3.12" and python_version < "4.0" -docx2pdf==0.1.8 ; python_version >= "3.12" and python_version < "4.0" -durationpy==0.9 ; python_version >= "3.12" and python_version < "4.0" -executing==2.2.0 ; python_version >= "3.12" and python_version < "4.0" -fastapi==0.115.8 ; python_version >= "3.12" and python_version < "4.0" -ffmpy==0.5.0 ; python_version >= "3.12" and python_version < "4.0" -filelock==3.17.0 ; python_version >= "3.12" and python_version < "4.0" -flatbuffers==25.1.24 ; python_version >= "3.12" and python_version < "4.0" -frozenlist==1.5.0 ; python_version >= "3.12" and python_version < "4.0" -fsspec==2025.2.0 ; python_version >= "3.12" and python_version < "4.0" -google-auth==2.38.0 ; python_version >= "3.12" and python_version < "4.0" -googleapis-common-protos==1.66.0 ; python_version >= "3.12" and python_version < "4.0" -gradio-client==1.7.0 ; python_version >= "3.12" and python_version < "4.0" -gradio==5.15.0 ; python_version >= "3.12" and python_version < "4.0" -greenlet==3.1.1 ; python_version >= "3.12" and python_version < "3.14" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") -grpcio==1.70.0 ; python_version >= "3.12" and python_version < "4.0" -h11==0.14.0 ; python_version >= "3.12" and python_version < "4.0" -httpcore==1.0.7 ; python_version >= "3.12" and python_version < "4.0" -httptools==0.6.4 ; python_version >= "3.12" and python_version < "4.0" -httpx-sse==0.4.0 ; python_version >= "3.12" and python_version < "4.0" -httpx==0.28.1 ; python_version >= "3.12" and python_version < "4.0" -huggingface-hub==0.28.1 ; python_version >= "3.12" and python_version < "4.0" -humanfriendly==10.0 ; python_version >= "3.12" and python_version < "4.0" -idna==3.10 ; python_version >= "3.12" and python_version < "4.0" -imagesize==1.4.1 ; python_version >= "3.12" and python_version < "4.0" -importlib-metadata==8.5.0 ; python_version >= "3.12" and python_version < "4.0" -importlib-resources==6.5.2 ; python_version >= "3.12" and python_version < "4.0" -ipython==8.32.0 ; python_version >= "3.12" and python_version < "4.0" -ipywidgets==8.1.5 ; python_version >= "3.12" and python_version < "4.0" -jedi==0.19.2 ; python_version >= "3.12" and python_version < "4.0" -jinja2==3.1.5 ; python_version >= "3.12" and python_version < "4.0" -jiter==0.8.2 ; python_version >= "3.12" and python_version < "4.0" -joblib==1.4.2 ; python_version >= "3.12" and python_version < "4.0" -jsonpatch==1.33 ; python_version >= "3.12" and python_version < "4.0" -jsonpointer==3.0.0 ; python_version >= "3.12" and python_version < "4.0" -jupyterlab-widgets==3.0.13 ; python_version >= "3.12" and python_version < "4.0" -kubernetes==32.0.0 ; python_version >= "3.12" and python_version < "4.0" -langchain-anthropic==0.3.7 ; python_version >= "3.12" and python_version < "4.0" -langchain-benchmarks @ git+https://github.com/legendof-selda/langchain-benchmarks.git@951d3712e2d4449d1555b11e07454ae3059586dd ; python_version >= "3.12" and python_version < "4.0" -langchain-chroma==0.1.4 ; python_version >= "3.12" and python_version < "4.0" -langchain-community==0.3.17 ; python_version >= "3.12" and python_version < "4.0" -langchain-core==0.3.34 ; python_version >= "3.12" and python_version < "4.0" -langchain-huggingface==0.1.2 ; python_version >= "3.12" and python_version < "4.0" -langchain-ollama==0.2.3 ; python_version >= "3.12" and python_version < "4.0" -langchain-openai==0.2.14 ; python_version >= "3.12" and python_version < "4.0" -langchain-text-splitters==0.3.6 ; python_version >= "3.12" and python_version < "4.0" -langchain==0.3.18 ; python_version >= "3.12" and python_version < "4.0" -langcodes==3.5.0 ; python_version >= "3.12" and python_version < "4.0" -langsmith==0.1.147 ; python_version >= "3.12" and python_version < "4.0" -language-data==1.3.0 ; python_version >= "3.12" and python_version < "4.0" -lxml==5.3.0 ; python_version >= "3.12" and python_version < "4.0" -marisa-trie==1.2.1 ; python_version >= "3.12" and python_version < "4.0" -markdown-it-py==3.0.0 ; python_version >= "3.12" and python_version < "4.0" -markupsafe==2.1.5 ; python_version >= "3.12" and python_version < "4.0" -marshmallow==3.26.1 ; python_version >= "3.12" and python_version < "4.0" -matplotlib-inline==0.1.7 ; python_version >= "3.12" and python_version < "4.0" -mdurl==0.1.2 ; python_version >= "3.12" and python_version < "4.0" -mmh3==5.1.0 ; python_version >= "3.12" and python_version < "4.0" -monotonic==1.6 ; python_version >= "3.12" and python_version < "4.0" -mpmath==1.3.0 ; python_version >= "3.12" and python_version < "4.0" -multidict==6.1.0 ; python_version >= "3.12" and python_version < "4.0" -murmurhash==1.0.12 ; python_version >= "3.12" and python_version < "4.0" -mypy-extensions==1.0.0 ; python_version >= "3.12" and python_version < "4.0" -networkx==3.4.2 ; python_version >= "3.12" and python_version < "4.0" -nltk==3.9.1 ; python_version >= "3.12" and python_version < "4.0" -numpy==1.26.4 ; python_version >= "3.12" and python_version < "4.0" -nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nccl-cu12==2.21.5 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -oauthlib==3.2.2 ; python_version >= "3.12" and python_version < "4.0" -ollama==0.4.7 ; python_version >= "3.12" and python_version < "4.0" -onnxruntime==1.20.1 ; python_version >= "3.12" and python_version < "4.0" -openai==1.61.1 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-api==1.30.0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-exporter-otlp-proto-common==1.30.0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-exporter-otlp-proto-grpc==1.30.0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-instrumentation-asgi==0.51b0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-instrumentation-fastapi==0.51b0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-instrumentation==0.51b0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-proto==1.30.0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-sdk==1.30.0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-semantic-conventions==0.51b0 ; python_version >= "3.12" and python_version < "4.0" -opentelemetry-util-http==0.51b0 ; python_version >= "3.12" and python_version < "4.0" -orjson==3.10.15 ; python_version >= "3.12" and python_version < "4.0" -overrides==7.7.0 ; python_version >= "3.12" and python_version < "4.0" -packaging==24.2 ; python_version >= "3.12" and python_version < "4.0" -pandas==2.2.3 ; python_version >= "3.12" and python_version < "4.0" -parso==0.8.4 ; python_version >= "3.12" and python_version < "4.0" -pdf2image==1.17.0 ; python_version >= "3.12" and python_version < "4.0" -pdfservices-sdk==4.1.0 ; python_version >= "3.12" and python_version < "4.0" -pexpect==4.9.0 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform != "win32" and sys_platform != "emscripten") -pillow==10.4.0 ; python_version >= "3.12" and python_version < "4.0" -plum-dispatch==1.7.4 ; python_version >= "3.12" and python_version < "4.0" -posthog==3.11.0 ; python_version >= "3.12" and python_version < "4.0" -preshed==3.0.9 ; python_version >= "3.12" and python_version < "4.0" -prompt-toolkit==3.0.50 ; python_version >= "3.12" and python_version < "4.0" -propcache==0.2.1 ; python_version >= "3.12" and python_version < "4.0" -protobuf==5.29.3 ; python_version >= "3.12" and python_version < "4.0" -ptyprocess==0.7.0 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform != "win32" and sys_platform != "emscripten") -pure-eval==0.2.3 ; python_version >= "3.12" and python_version < "4.0" -pyasn1-modules==0.4.1 ; python_version >= "3.12" and python_version < "4.0" -pyasn1==0.6.1 ; python_version >= "3.12" and python_version < "4.0" -pydantic-core==2.27.2 ; python_version >= "3.12" and python_version < "4.0" -pydantic-settings==2.7.1 ; python_version >= "3.12" and python_version < "4.0" -pydantic==2.10.6 ; python_version >= "3.12" and python_version < "4.0" -pydub==0.25.1 ; python_version >= "3.12" and python_version < "4.0" -pygments==2.19.1 ; python_version >= "3.12" and python_version < "4.0" -pymupdf==1.25.3 ; python_version >= "3.12" and python_version < "4.0" -pypika==0.48.9 ; python_version >= "3.12" and python_version < "4.0" -pyproject-hooks==1.2.0 ; python_version >= "3.12" and python_version < "4.0" -pyreadline3==3.5.4 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32" -python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "4.0" -python-docx==1.1.2 ; python_version >= "3.12" and python_version < "4.0" -python-dotenv==1.0.1 ; python_version >= "3.12" and python_version < "4.0" -python-multipart==0.0.20 ; python_version >= "3.12" and python_version < "4.0" -python-pptx==0.6.23 ; python_version >= "3.12" and python_version < "4.0" -pytz==2025.1 ; python_version >= "3.12" and python_version < "4.0" -pywin32==308 ; python_version >= "3.12" and python_version < "4.0" and sys_platform == "win32" -pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "4.0" -rake-nltk==1.0.6 ; python_version >= "3.12" and python_version < "4.0" -regex==2024.11.6 ; python_version >= "3.12" and python_version < "4.0" -reportlab==4.3.0 ; python_version >= "3.12" and python_version < "4.0" -requests-oauthlib==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -requests-toolbelt==1.0.0 ; python_version >= "3.12" and python_version < "4.0" -requests==2.31.0 ; python_version >= "3.12" and python_version < "4.0" -rich==13.9.4 ; python_version >= "3.12" and python_version < "4.0" -rsa==4.9 ; python_version >= "3.12" and python_version < "4.0" -ruff==0.9.5 ; python_version >= "3.12" and python_version < "4.0" and sys_platform != "emscripten" -safehttpx==0.1.6 ; python_version >= "3.12" and python_version < "4.0" -safetensors==0.5.2 ; python_version >= "3.12" and python_version < "4.0" -scikit-learn==1.6.1 ; python_version >= "3.12" and python_version < "4.0" -scipy==1.15.1 ; python_version >= "3.12" and python_version < "4.0" -semantic-version==2.10.0 ; python_version >= "3.12" and python_version < "4.0" -sentence-transformers==2.7.0 ; python_version >= "3.12" and python_version < "4.0" -setuptools==69.5.1 ; python_version >= "3.12" and python_version < "4.0" -shellingham==1.5.4 ; python_version >= "3.12" and python_version < "4.0" -six==1.17.0 ; python_version >= "3.12" and python_version < "4.0" -smart-open==7.1.0 ; python_version >= "3.12" and python_version < "4.0" -sniffio==1.3.1 ; python_version >= "3.12" and python_version < "4.0" -snowballstemmer==2.2.0 ; python_version >= "3.12" and python_version < "4.0" -soupsieve==2.6 ; python_version >= "3.12" and python_version < "4.0" -spacy-legacy==3.0.12 ; python_version >= "3.12" and python_version < "4.0" -spacy-loggers==1.0.5 ; python_version >= "3.12" and python_version < "4.0" -spacy==3.7.5 ; python_version >= "3.12" and python_version < "4.0" -sphinx-rtd-theme==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -sphinx==7.3.7 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-jquery==4.1 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.12" and python_version < "4.0" -spire-barcode==7.2.8 ; python_version >= "3.12" and python_version < "4.0" -spire-doc==12.12.0 ; python_version >= "3.12" and python_version < "4.0" -spire-ocr==1.9.0 ; python_version >= "3.12" and python_version < "4.0" -spire-office==9.12.1 ; python_version >= "3.12" and python_version < "4.0" -spire-pdf==10.12.1 ; python_version >= "3.12" and python_version < "4.0" -spire-presentation==9.12.1 ; python_version >= "3.12" and python_version < "4.0" -spire-xls==14.12.0 ; python_version >= "3.12" and python_version < "4.0" -sqlalchemy==2.0.38 ; python_version >= "3.12" and python_version < "4.0" -srsly==2.4.8 ; python_version >= "3.12" and python_version < "4.0" -stack-data==0.6.3 ; python_version >= "3.12" and python_version < "4.0" -starlette==0.45.3 ; python_version >= "3.12" and python_version < "4.0" -sympy==1.13.1 ; python_version >= "3.12" and python_version < "4.0" -tabulate==0.9.0 ; python_version >= "3.12" and python_version < "4.0" -tenacity==9.0.0 ; python_version >= "3.12" and python_version < "4.0" -thinc==8.2.5 ; python_version >= "3.12" and python_version < "4.0" -threadpoolctl==3.5.0 ; python_version >= "3.12" and python_version < "4.0" -tiktoken==0.8.0 ; python_version >= "3.12" and python_version < "4.0" -tokenizers==0.21.0 ; python_version >= "3.12" and python_version < "4.0" -tomlkit==0.13.2 ; python_version >= "3.12" and python_version < "4.0" -torch==2.6.0 ; python_version >= "3.12" and python_version < "4.0" -tqdm==4.67.1 ; python_version >= "3.12" and python_version < "4.0" -traitlets==5.14.3 ; python_version >= "3.12" and python_version < "4.0" -transformers==4.48.3 ; python_version >= "3.12" and python_version < "4.0" -triton==3.2.0 ; python_version >= "3.12" and python_version < "4.0" and platform_system == "Linux" and platform_machine == "x86_64" -typer==0.15.1 ; python_version >= "3.12" and python_version < "4.0" -typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "4.0" -typing-inspect==0.9.0 ; python_version >= "3.12" and python_version < "4.0" -tzdata==2025.1 ; python_version >= "3.12" and python_version < "4.0" -unicode==2.9 ; python_version >= "3.12" and python_version < "4.0" -unidecode==1.3.8 ; python_version >= "3.12" and python_version < "4.0" -urllib3==2.3.0 ; python_version >= "3.12" and python_version < "4.0" -uvicorn==0.34.0 ; python_version >= "3.12" and python_version < "4.0" -uvloop==0.21.0 ; python_version >= "3.12" and python_version < "4.0" and (sys_platform != "win32" and sys_platform != "cygwin") and platform_python_implementation != "PyPy" -wasabi==1.1.3 ; python_version >= "3.12" and python_version < "4.0" -watchfiles==1.0.4 ; python_version >= "3.12" and python_version < "4.0" -wcwidth==0.2.13 ; python_version >= "3.12" and python_version < "4.0" -weasel==0.4.1 ; python_version >= "3.12" and python_version < "4.0" -websocket-client==1.8.0 ; python_version >= "3.12" and python_version < "4.0" -websockets==14.2 ; python_version >= "3.12" and python_version < "4.0" -widgetsnbextension==4.0.13 ; python_version >= "3.12" and python_version < "4.0" -wrapt==1.17.2 ; python_version >= "3.12" and python_version < "4.0" -xlsxwriter==3.2.2 ; python_version >= "3.12" and python_version < "4.0" -yarl==1.18.3 ; python_version >= "3.12" and python_version < "4.0" -zipp==3.21.0 ; python_version >= "3.12" and python_version < "4.0" diff --git a/supermat/evaluation/evalution.py b/supermat/evaluation/evalution.py index 3549c6f..f52a419 100644 --- a/supermat/evaluation/evalution.py +++ b/supermat/evaluation/evalution.py @@ -19,14 +19,12 @@ ) from langchain_core.runnables.base import Runnable from langchain_huggingface import HuggingFaceEmbeddings -from tqdm.auto import tqdm from supermat.core.models.parsed_document import ParsedDocumentType from supermat.core.parser import FileProcessor from supermat.langchain.bindings import SupermatRetriever - def get_docs(pdf_files: list[Path]) -> ParsedDocumentType: parsed_files = Parallel(n_jobs=-1)(delayed(FileProcessor.parse_file)(path) for path in pdf_files) if TYPE_CHECKING: @@ -43,7 +41,7 @@ def get_docs(pdf_files: list[Path]) -> ParsedDocumentType: return documents -def get_retriever(documents: ParsedDocumentType,collection_name:str) -> SupermatRetriever: +def get_retriever(documents: ParsedDocumentType, collection_name: str) -> SupermatRetriever: retriever = SupermatRetriever( parsed_docs=documents, vector_store=Chroma( @@ -59,7 +57,7 @@ def get_retriever(documents: ParsedDocumentType,collection_name:str) -> Supermat def get_qa_chain(retriever: SupermatRetriever, template: str, llm_model: BaseChatModel) -> Runnable: qa_chain = ( - RunnableLambda(lambda x: x["Question"]) + RunnableLambda(lambda x: x["Question"]) # pyright: ignore[reportIndexIssue] | RunnableParallel({"context": retriever, "Question": RunnablePassthrough()}) | ChatPromptTemplate.from_template(template) | llm_model @@ -79,7 +77,7 @@ def calculate_metrics(llm_model: BaseChatModel, evaluators: list, datset_path: P ) test_run = run_without_langsmith( path_or_token_id=datset_path.as_posix(), - llm_or_chain_factory=qa_chain, # was qachain2 + llm_or_chain_factory=qa_chain, # pyright: ignore[reportArgumentType] evaluation=eval_config, verbose=True, concurrency_level=10,