Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions build-hooks/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
import enum
import functools
import re
from typing import (
TYPE_CHECKING,
Callable,
Generic,
MutableSequence,
Optional,
Type,
TypeVar,
overload,
)
from typing import TYPE_CHECKING
from typing import Callable
from typing import Generic
from typing import MutableSequence
from typing import Optional
from typing import Type
from typing import TypeVar
from typing import overload

try:
from typing import Self
Expand Down
12 changes: 10 additions & 2 deletions demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,16 @@
" SELECT * FROM read_csv('{file_path}', delim='\\t', header=false, {bed_columns})\n",
" \"\"\")\n",
"\n",
"print(\"Features A:\", conn.execute(\"SELECT COUNT(*) FROM features_a\").fetchone()[0], \"intervals\")\n",
"print(\"Features B:\", conn.execute(\"SELECT COUNT(*) FROM features_b\").fetchone()[0], \"intervals\")\n",
"print(\n",
" \"Features A:\",\n",
" conn.execute(\"SELECT COUNT(*) FROM features_a\").fetchone()[0],\n",
" \"intervals\",\n",
")\n",
"print(\n",
" \"Features B:\",\n",
" conn.execute(\"SELECT COUNT(*) FROM features_b\").fetchone()[0],\n",
" \"intervals\",\n",
")\n",
"conn.execute(\"SELECT * FROM features_a LIMIT 5\").fetchdf()"
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@
# },
"repository_url": "https://github.com/abdenlab/giql",
"use_repository_button": True,
}
}
4 changes: 1 addition & 3 deletions src/giql/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def _split_named_and_positional(args):
positional_args = []
for arg in args:
if isinstance(arg, (exp.PropertyEQ, exp.Kwarg)):
param_name = (
arg.this.name if hasattr(arg.this, "name") else str(arg.this)
)
param_name = arg.this.name if hasattr(arg.this, "name") else str(arg.this)
kwargs[param_name.lower()] = arg.expression
else:
positional_args.append(arg)
Expand Down
29 changes: 23 additions & 6 deletions src/giql/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,21 @@
"description": "Find the k-nearest genomic features to a reference position",
"syntax": "CROSS JOIN LATERAL NEAREST(table, reference=interval, k=5)",
"parameters": [
{"name": "target_table", "description": "Table to search for nearest features"},
{
"name": "target_table",
"description": "Table to search for nearest features",
},
{"name": "reference", "description": "Reference position or column"},
{"name": "k", "description": "Number of nearest neighbors (default: 1)"},
{"name": "max_distance", "description": "Maximum distance threshold (optional)"},
{
"name": "max_distance",
"description": "Maximum distance threshold (optional)",
},
{"name": "stranded", "description": "Same-strand only (default: false)"},
{"name": "signed", "description": "Return signed distances (default: false)"},
{
"name": "signed",
"description": "Return signed distances (default: false)",
},
],
"returns": "Rows from target table with distance column",
"example": "SELECT * FROM peaks CROSS JOIN LATERAL NEAREST(genes, reference=peaks.interval, k=3) AS nearest",
Expand All @@ -91,7 +100,10 @@
"syntax": "CLUSTER(interval) AS cluster_id",
"parameters": [
{"name": "interval", "description": "Genomic column to cluster"},
{"name": "distance", "description": "Max gap to consider same cluster (default: 0)"},
{
"name": "distance",
"description": "Max gap to consider same cluster (default: 0)",
},
{"name": "stranded", "description": "Cluster by strand (default: false)"},
],
"returns": "Integer cluster ID",
Expand Down Expand Up @@ -333,7 +345,10 @@ def explain_operator(name: str) -> dict[str, Any]:
name_upper = name.upper().strip()

if name_upper not in OPERATORS:
return {"error": f"Unknown operator: {name}", "available": list(OPERATORS.keys())}
return {
"error": f"Unknown operator: {name}",
"available": list(OPERATORS.keys()),
}

op = OPERATORS[name_upper]

Expand All @@ -346,7 +361,9 @@ def explain_operator(name: str) -> dict[str, Any]:
pattern = rf"^{name_upper}\n[~=\-]+\n(.*?)(?=\n[A-Z]+\n[~=\-]+|\Z)"
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
if match:
full_docs = f"{name_upper}\n{'=' * len(name_upper)}\n{match.group(1).strip()}"
full_docs = (
f"{name_upper}\n{'=' * len(name_upper)}\n{match.group(1).strip()}"
)

return {
"name": name_upper,
Expand Down
2 changes: 1 addition & 1 deletion src/giql/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Table:
coordinate_system="1based",
interval_type="closed",
),
]
],
)
"""

Expand Down
4 changes: 2 additions & 2 deletions src/giql/transpile.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def transpile(

sql = transpile(
"SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
tables=["peaks"]
tables=["peaks"],
)

Custom table configuration::
Expand All @@ -92,7 +92,7 @@ def transpile(
start_col="start",
end_col="end",
)
]
],
)
"""
# Build tables container
Expand Down
8 changes: 6 additions & 2 deletions tests/generators/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,9 @@ def test_giqlnearest_sql_closed_intervals(self):
tables = Tables()
tables.register("genes_closed", Table("genes_closed", interval_type="closed"))

sql = "SELECT * FROM NEAREST(genes_closed, reference := 'chr1:1000-2000', k := 3)"
sql = (
"SELECT * FROM NEAREST(genes_closed, reference := 'chr1:1000-2000', k := 3)"
)
ast = parse_one(sql, dialect=GIQLDialect)

generator = BaseGIQLGenerator(tables=tables)
Expand Down Expand Up @@ -988,7 +990,9 @@ def test_giqlnearest_sql_target_not_in_tables(self, tables_with_peaks_and_genes)
WHEN giqlnearest_sql is called
THEN ValueError is raised listing available tables.
"""
sql = "SELECT * FROM NEAREST(unknown_table, reference := 'chr1:1000-2000', k := 3)"
sql = (
"SELECT * FROM NEAREST(unknown_table, reference := 'chr1:1000-2000', k := 3)"
)
ast = parse_one(sql, dialect=GIQLDialect)

generator = BaseGIQLGenerator(tables=tables_with_peaks_and_genes)
Expand Down
Empty file added tests/integration/__init__.py
Empty file.
1 change: 1 addition & 0 deletions tests/integration/bedtools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Bedtools integration tests for GIQL operator correctness."""
57 changes: 57 additions & 0 deletions tests/integration/bedtools/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Pytest fixtures for bedtools integration tests."""

import shutil

import pytest

from giql import transpile

duckdb = pytest.importorskip("duckdb")
pytest.importorskip("pybedtools")

if not shutil.which("bedtools"):
pytest.skip(
"bedtools binary not found in PATH",
allow_module_level=True,
)

pytestmark = pytest.mark.integration

from .utils.duckdb_loader import load_intervals # noqa: E402


@pytest.fixture(scope="function")
def duckdb_connection():
"""Provide clean DuckDB connection for each test.

Each test gets a fresh in-memory database with no shared state.
"""
conn = duckdb.connect(":memory:")
yield conn
conn.close()


@pytest.fixture(scope="function")
def giql_query(duckdb_connection):
"""Provide a helper that loads data, transpiles GIQL, and executes.

Usage::

result = giql_query(
"SELECT * FROM t WHERE interval INTERSECTS 'chr1:1-100'",
tables=["t"],
t=[GenomicInterval("chr1", 50, 150, "x", 0, "+")],
)
"""

def _run(query: str, *, tables: list[str], **table_data):
for name, intervals in table_data.items():
load_intervals(
duckdb_connection,
name,
[i.to_tuple() for i in intervals],
)
sql = transpile(query, tables=tables)
return duckdb_connection.execute(sql).fetchall()

return _run
Loading
Loading