Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions apps/api/src/evograph/api/routes/species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""Browse species endpoint with filtering and pagination."""

from fastapi import APIRouter, Depends, Query
from sqlalchemy import func, or_
from sqlalchemy.orm import Session

from evograph.api.schemas.taxa import SpeciesBrowsePage, SpeciesSummary
from evograph.db.models import Edge, NodeMedia, Sequence, Taxon
from evograph.db.session import get_db

router = APIRouter(tags=["species"])


@router.get("/species", response_model=SpeciesBrowsePage)
def browse_species(
offset: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=100),
has_sequences: bool | None = Query(None, description="Filter to species with/without COI sequences"),
has_edges: bool | None = Query(None, description="Filter to species with/without MI edges"),
is_extinct: bool | None = Query(None, description="Filter by extinct status"),
clade: int | None = Query(None, description="Filter to descendants of this ott_id"),
sort: str = Query("name", pattern="^(name|edges)$", description="Sort by name or edge count"),
db: Session = Depends(get_db),
) -> SpeciesBrowsePage:
"""Browse species with optional filters.

Supports filtering by sequence availability, MI edge presence,
extinct status, and clade membership. Paginated with offset/limit.
"""
# Base filter: only species-rank taxa
filters = [Taxon.rank == "species"]

# Filter: extinct status
if is_extinct is not None:
filters.append(Taxon.is_extinct == is_extinct)

# Filter: clade membership via lineage array contains
if clade is not None:
filters.append(Taxon.lineage.any(clade))

# Subqueries for sequence/edge existence
has_seq_subq = (
db.query(Sequence.ott_id)
.filter(Sequence.ott_id == Taxon.ott_id, Sequence.is_canonical.is_(True))
.correlate(Taxon)
.exists()
)

has_edge_subq = (
db.query(Edge.src_ott_id)
.filter(
or_(
Edge.src_ott_id == Taxon.ott_id,
Edge.dst_ott_id == Taxon.ott_id,
)
)
.correlate(Taxon)
.exists()
)

if has_sequences is True:
filters.append(has_seq_subq)
elif has_sequences is False:
filters.append(~has_seq_subq)

if has_edges is True:
filters.append(has_edge_subq)
elif has_edges is False:
filters.append(~has_edge_subq)

# Count total matching
total = (
db.query(func.count(Taxon.ott_id))
.filter(*filters)
.scalar()
) or 0

# Build query for fetching rows
base = db.query(Taxon).filter(*filters)

# Sort order
if sort == "edges":
# Subquery for edge count per species
edge_count_sq = (
db.query(func.count())
.filter(
or_(
Edge.src_ott_id == Taxon.ott_id,
Edge.dst_ott_id == Taxon.ott_id,
)
)
.correlate(Taxon)
.scalar_subquery()
)
base = base.order_by(edge_count_sq.desc(), Taxon.name)
else:
base = base.order_by(Taxon.name)

# Fetch page
rows = base.offset(offset).limit(limit).all()

if not rows:
return SpeciesBrowsePage(items=[], total=total, offset=offset, limit=limit)

ott_ids = [t.ott_id for t in rows]

# Batch: images
images: dict[int, str] = {}
media_rows = (
db.query(NodeMedia.ott_id, NodeMedia.image_url)
.filter(NodeMedia.ott_id.in_(ott_ids))
.all()
)
images = {ott: url for ott, url in media_rows}

# Batch: which have canonical sequences
seq_ott_ids: set[int] = set()
seq_rows = (
db.query(Sequence.ott_id)
.filter(Sequence.ott_id.in_(ott_ids), Sequence.is_canonical.is_(True))
.all()
)
seq_ott_ids = {r[0] for r in seq_rows}

# Batch: edge counts per species
edge_counts: dict[int, int] = {}
src_counts = (
db.query(Edge.src_ott_id, func.count())
.filter(Edge.src_ott_id.in_(ott_ids))
.group_by(Edge.src_ott_id)
.all()
)
for ott, cnt in src_counts:
edge_counts[ott] = edge_counts.get(ott, 0) + cnt
dst_counts = (
db.query(Edge.dst_ott_id, func.count())
.filter(Edge.dst_ott_id.in_(ott_ids))
.group_by(Edge.dst_ott_id)
.all()
)
for ott, cnt in dst_counts:
edge_counts[ott] = edge_counts.get(ott, 0) + cnt

# Batch: family and order names from lineage arrays
# Collect all ancestor ott_ids from lineages
all_ancestor_ids: set[int] = set()
for t in rows:
if t.lineage:
all_ancestor_ids.update(t.lineage)

# Fetch only family/order ancestors in one query
ancestor_map: dict[int, tuple[str, str]] = {} # ott_id -> (name, rank)
if all_ancestor_ids:
ancestor_rows = (
db.query(Taxon.ott_id, Taxon.name, Taxon.rank)
.filter(
Taxon.ott_id.in_(all_ancestor_ids),
Taxon.rank.in_(["family", "order"]),
)
.all()
)
ancestor_map = {ott: (name, rank) for ott, name, rank in ancestor_rows}

# Build per-species family/order lookup
species_family: dict[int, str] = {}
species_order: dict[int, str] = {}
for t in rows:
if t.lineage:
for anc_id in t.lineage:
info = ancestor_map.get(anc_id)
if info:
if info[1] == "family":
species_family[t.ott_id] = info[0]
elif info[1] == "order":
species_order[t.ott_id] = info[0]

return SpeciesBrowsePage(
items=[
SpeciesSummary(
ott_id=t.ott_id,
name=t.name,
rank=t.rank,
image_url=images.get(t.ott_id),
is_extinct=t.is_extinct,
has_sequence=t.ott_id in seq_ott_ids,
edge_count=edge_counts.get(t.ott_id, 0),
family_name=species_family.get(t.ott_id),
order_name=species_order.get(t.ott_id),
)
for t in rows
],
total=total,
offset=offset,
limit=limit,
)
21 changes: 18 additions & 3 deletions apps/api/src/evograph/api/routes/taxa.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Taxon detail endpoint with paginated children."""

from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import func, text
from sqlalchemy import case, func, text
from sqlalchemy.orm import Session

from evograph.api.schemas.taxa import ChildrenPage, TaxonDetail, TaxonSummary
Expand All @@ -12,6 +12,20 @@

_INLINE_CHILDREN_LIMIT = 100

# Higher-rank children appear first so navigating the tree starts with the
# most useful groupings (orders, families) rather than a random alphabetical
# mix of species and subspecies.
_RANK_SORT_ORDER = case(
(Taxon.rank == "class", 0),
(Taxon.rank == "order", 1),
(Taxon.rank == "family", 2),
(Taxon.rank == "subfamily", 3),
(Taxon.rank == "genus", 4),
(Taxon.rank == "species", 5),
(Taxon.rank == "subspecies", 6),
else_=7,
)


def _fetch_lineage(db: Session, ott_id: int) -> list[TaxonSummary]:
"""Fetch full lineage (root → ... → parent) using a recursive CTE.
Expand Down Expand Up @@ -62,10 +76,11 @@ def get_taxon(
) or 0

# Get children (limited for inline display)
# Sort by rank importance so orders/families appear before species/subspecies
children = (
db.query(Taxon)
.filter(Taxon.parent_ott_id == ott_id)
.order_by(Taxon.name)
.order_by(_RANK_SORT_ORDER, Taxon.name)
.limit(_INLINE_CHILDREN_LIMIT)
.all()
)
Expand Down Expand Up @@ -159,7 +174,7 @@ def get_children(
children = (
db.query(Taxon)
.filter(Taxon.parent_ott_id == ott_id)
.order_by(Taxon.name)
.order_by(_RANK_SORT_ORDER, Taxon.name)
.offset(offset)
.limit(limit)
.all()
Expand Down
19 changes: 19 additions & 0 deletions apps/api/src/evograph/api/schemas/taxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,22 @@ class SearchPage(BaseModel):
items: list[TaxonSummary]
total: int
limit: int


class SpeciesSummary(BaseModel):
ott_id: int
name: str
rank: str
image_url: str | None = None
is_extinct: bool | None = None
has_sequence: bool = False
edge_count: int = 0
family_name: str | None = None
order_name: str | None = None


class SpeciesBrowsePage(BaseModel):
items: list[SpeciesSummary]
total: int
offset: int
limit: int
3 changes: 2 additions & 1 deletion apps/api/src/evograph/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fastapi.middleware.gzip import GZipMiddleware
from sqlalchemy import text

from evograph.api.routes import graph, jobs, search, sequences, stats, taxa
from evograph.api.routes import graph, jobs, search, sequences, species, stats, taxa
from evograph.db.session import SessionLocal, engine
from evograph.logging_config import configure_logging
from evograph.middleware.rate_limit import RateLimitMiddleware
Expand Down Expand Up @@ -62,6 +62,7 @@ async def lifespan(app: FastAPI):
app.include_router(taxa.router, prefix="/v1")
app.include_router(graph.router, prefix="/v1")
app.include_router(sequences.router, prefix="/v1")
app.include_router(species.router, prefix="/v1")
app.include_router(stats.router, prefix="/v1")
app.include_router(jobs.router, prefix="/v1")

Expand Down
10 changes: 10 additions & 0 deletions apps/api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,16 @@ def group_by(self, *args, **kwargs):
def select_from(self, *args, **kwargs):
return self

def correlate(self, *args, **kwargs):
return self

def scalar_subquery(self):
return self

def desc(self):
"""Support ORDER BY ... DESC on scalar subqueries."""
return self

def exists(self):
"""Return an exists clause marker for use in outer query."""
return MockExistsClause()
Expand Down
Loading
Loading