Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions evaluation/openrag_eval/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# OpenRAG Evaluation Environment Variables

# OpenRAG API Key (required)
# This is your API key for authenticating with the OpenRAG instance.
# Format: orag_your_api_key_here
#
# To obtain an API key:
# 1. Navigate to your OpenRAG instance (e.g., http://localhost:3000)
# 2. Go to Settings or API Keys section
# 3. Generate a new API key
# 4. Copy the key and paste it below
OPENRAG_API_KEY=

# OpenRAG URL (optional)
# Base URL of your OpenRAG instance
# Default: http://localhost:3000
# OPENRAG_URL=http://localhost:3000
203 changes: 203 additions & 0 deletions evaluation/openrag_eval/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

# Ruff stuff:
.ruff_cache/

# PyPI configuration file
.pypirc

# IDEs
.vscode/
*.swp
*.swo
*~

# OS
.DS_Store
Thumbs.db

# Specific to project
mlflow.db
*milvus.db*
output*/

# From unitxt
inference_engine_cache

# Cache directories
cache/

# Task logs
logs/

src/openrag_eval/data_loaders/ait_qa_pdf/documents/

# RagWorkbench data directory
ragworkbench_data/
79 changes: 79 additions & 0 deletions evaluation/openrag_eval/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# openrag-eval

OpenRAG Evaluation Tool - an evaluation framework for OpenRAG.

## Installation

### Prerequisites

Install [uv](https://docs.astral.sh/uv/):
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```

### Installation

```bash
cd openrag/evaluation/openrag_eval

# Create virtual environment
uv venv

# Install with dependencies
uv sync
```

## Usage

Run the main evaluation script:

```bash
uv run python -m openrag_eval.evaluate
```


## Development

For development, install with development dependencies:

```bash
# Install with dev dependencies
uv sync --extra dev

# Run tests with uv
uv run pytest

# Format code with uv
uv run black src/ tests/

# Lint code with uv
uv run ruff check src/ tests/

# Type check with uv
uv run mypy src/
```

## Structure

The project is organized as follows:

```
openrag_eval/
├── src/openrag_eval/ # Main package
│ ├── pipelines/ # Ingest and inference pipelines
│ ├── boards/ # Evaluation board configurations
│ └── evaluate.py # Main evaluation script
└── tests/ # Tests
```

The main components are:

### Pipelines (`src/openrag_eval/pipelines/`)
Contains implementations of RAG pipelines:
- **`ingest.py`**: A RagWorkbench ingestion pipeline implemented with the OpenRAG SDK
- **`inference.py`**: A RagWorkbench inference pipeline implemented with the OpenRAG SDK

### Boards (`src/openrag_eval/boards/`)
Contains board configurations for evaluation experiments:
- **`table_rich/`**: A definition for RAG experiments over table-rich documents.

69 changes: 69 additions & 0 deletions evaluation/openrag_eval/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "openrag-eval"
version = "0.1.0"
description = "OpenRAG Evaluation Tool"
readme = "README.md"
requires-python = ">=3.13"
license = {text = "Apache-2.0"}

classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

dependencies = [
"openrag-sdk",
"ragworkbench @ git+ssh://git@github.com/IBM/RagWorkbench.git",
"pydantic>=2.0.0",
"python-dotenv>=1.0.0",
"openai>=2.0.0",
]

[project.optional-dependencies]
dev = [
"pytest>=7.0.0",
"pytest-cov>=4.0.0",
"black>=26.1.0",
"ruff>=0.14.14",
"mypy>=1.14.0",
]

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.packages.find]
where = ["src"]
include = ["openrag_eval*"]

[tool.black]
line-length = 88
target-version = ['py311']

[tool.ruff]
line-length = 88
target-version = "py311"
src = ["src"]

[tool.ruff.lint]
select = ["E", "W", "F", "I", "B", "C4", "UP"]
ignore = ["E501", "B008"]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["src"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v -s --strict-markers"
6 changes: 6 additions & 0 deletions evaluation/openrag_eval/src/openrag_eval/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""OpenRAG Evaluation Tool."""

from openrag_eval import boards

__version__ = "0.1.0"
__all__ = ["boards"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Boards package for OpenRAG evaluation."""
Loading
Loading