marcoloco23 · marcoloco23 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/apps/api/src/evograph/api/routes/species.py b/apps/api/src/evograph/api/routes/species.py
@@ -0,0 +1,195 @@
+"""Browse species endpoint with filtering and pagination."""
+
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy import func, or_
+from sqlalchemy.orm import Session
+
+from evograph.api.schemas.taxa import SpeciesBrowsePage, SpeciesSummary
+from evograph.db.models import Edge, NodeMedia, Sequence, Taxon
+from evograph.db.session import get_db
+
+router = APIRouter(tags=["species"])
+
+
+@router.get("/species", response_model=SpeciesBrowsePage)
+def browse_species(
+    offset: int = Query(0, ge=0),
+    limit: int = Query(50, ge=1, le=100),
+    has_sequences: bool | None = Query(None, description="Filter to species with/without COI sequences"),
+    has_edges: bool | None = Query(None, description="Filter to species with/without MI edges"),
+    is_extinct: bool | None = Query(None, description="Filter by extinct status"),
+    clade: int | None = Query(None, description="Filter to descendants of this ott_id"),
+    sort: str = Query("name", pattern="^(name|edges)$", description="Sort by name or edge count"),
+    db: Session = Depends(get_db),
+) -> SpeciesBrowsePage:
+    """Browse species with optional filters.
+
+    Supports filtering by sequence availability, MI edge presence,
+    extinct status, and clade membership. Paginated with offset/limit.
+    """
+    # Base filter: only species-rank taxa
+    filters = [Taxon.rank == "species"]
+
+    # Filter: extinct status
+    if is_extinct is not None:
+        filters.append(Taxon.is_extinct == is_extinct)
+
+    # Filter: clade membership via lineage array contains
+    if clade is not None:
+        filters.append(Taxon.lineage.any(clade))
+
+    # Subqueries for sequence/edge existence
+    has_seq_subq = (
+        db.query(Sequence.ott_id)
+        .filter(Sequence.ott_id == Taxon.ott_id, Sequence.is_canonical.is_(True))
+        .correlate(Taxon)
+        .exists()
+    )
+
+    has_edge_subq = (
+        db.query(Edge.src_ott_id)
+        .filter(
+            or_(
+                Edge.src_ott_id == Taxon.ott_id,
+                Edge.dst_ott_id == Taxon.ott_id,
+            )
+        )
+        .correlate(Taxon)
+        .exists()
+    )
+
+    if has_sequences is True:
+        filters.append(has_seq_subq)
+    elif has_sequences is False:
+        filters.append(~has_seq_subq)
+
+    if has_edges is True:
+        filters.append(has_edge_subq)
+    elif has_edges is False:
+        filters.append(~has_edge_subq)
+
+    # Count total matching
+    total = (
+        db.query(func.count(Taxon.ott_id))
+        .filter(*filters)
+        .scalar()
+    ) or 0
+
+    # Build query for fetching rows
+    base = db.query(Taxon).filter(*filters)
+
+    # Sort order
+    if sort == "edges":
+        # Subquery for edge count per species
+        edge_count_sq = (
+            db.query(func.count())
+            .filter(
+                or_(
+                    Edge.src_ott_id == Taxon.ott_id,
+                    Edge.dst_ott_id == Taxon.ott_id,
+                )
+            )
+            .correlate(Taxon)
+            .scalar_subquery()
+        )
+        base = base.order_by(edge_count_sq.desc(), Taxon.name)
+    else:
+        base = base.order_by(Taxon.name)
+
+    # Fetch page
+    rows = base.offset(offset).limit(limit).all()
+
+    if not rows:
+        return SpeciesBrowsePage(items=[], total=total, offset=offset, limit=limit)
+
+    ott_ids = [t.ott_id for t in rows]
+
+    # Batch: images
+    images: dict[int, str] = {}
+    media_rows = (
+        db.query(NodeMedia.ott_id, NodeMedia.image_url)
+        .filter(NodeMedia.ott_id.in_(ott_ids))
+        .all()
+    )
+    images = {ott: url for ott, url in media_rows}
+
+    # Batch: which have canonical sequences
+    seq_ott_ids: set[int] = set()
+    seq_rows = (
+        db.query(Sequence.ott_id)
+        .filter(Sequence.ott_id.in_(ott_ids), Sequence.is_canonical.is_(True))
+        .all()
+    )
+    seq_ott_ids = {r[0] for r in seq_rows}
+
+    # Batch: edge counts per species
+    edge_counts: dict[int, int] = {}
+    src_counts = (
+        db.query(Edge.src_ott_id, func.count())
+        .filter(Edge.src_ott_id.in_(ott_ids))
+        .group_by(Edge.src_ott_id)
+        .all()
+    )
+    for ott, cnt in src_counts:
+        edge_counts[ott] = edge_counts.get(ott, 0) + cnt
+    dst_counts = (
+        db.query(Edge.dst_ott_id, func.count())
+        .filter(Edge.dst_ott_id.in_(ott_ids))
+        .group_by(Edge.dst_ott_id)
+        .all()
+    )
+    for ott, cnt in dst_counts:
+        edge_counts[ott] = edge_counts.get(ott, 0) + cnt
+
+    # Batch: family and order names from lineage arrays
+    # Collect all ancestor ott_ids from lineages
+    all_ancestor_ids: set[int] = set()
+    for t in rows:
+        if t.lineage:
+            all_ancestor_ids.update(t.lineage)
+
+    # Fetch only family/order ancestors in one query
+    ancestor_map: dict[int, tuple[str, str]] = {}  # ott_id -> (name, rank)
+    if all_ancestor_ids:
+        ancestor_rows = (
+            db.query(Taxon.ott_id, Taxon.name, Taxon.rank)
+            .filter(
+                Taxon.ott_id.in_(all_ancestor_ids),
+                Taxon.rank.in_(["family", "order"]),
+            )
+            .all()
+        )
+        ancestor_map = {ott: (name, rank) for ott, name, rank in ancestor_rows}
+
+    # Build per-species family/order lookup
+    species_family: dict[int, str] = {}
+    species_order: dict[int, str] = {}
+    for t in rows:
+        if t.lineage:
+            for anc_id in t.lineage:
+                info = ancestor_map.get(anc_id)
+                if info:
+                    if info[1] == "family":
+                        species_family[t.ott_id] = info[0]
+                    elif info[1] == "order":
+                        species_order[t.ott_id] = info[0]
+
+    return SpeciesBrowsePage(
+        items=[
+            SpeciesSummary(
+                ott_id=t.ott_id,
+                name=t.name,
+                rank=t.rank,
+                image_url=images.get(t.ott_id),
+                is_extinct=t.is_extinct,
+                has_sequence=t.ott_id in seq_ott_ids,
+                edge_count=edge_counts.get(t.ott_id, 0),
+                family_name=species_family.get(t.ott_id),
+                order_name=species_order.get(t.ott_id),
+            )
+            for t in rows
+        ],
+        total=total,
+        offset=offset,
+        limit=limit,
+    )
diff --git a/apps/api/src/evograph/api/routes/taxa.py b/apps/api/src/evograph/api/routes/taxa.py
@@ -1,7 +1,7 @@
 """Taxon detail endpoint with paginated children."""
 
 from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy import func, text
+from sqlalchemy import case, func, text
 from sqlalchemy.orm import Session
 
 from evograph.api.schemas.taxa import ChildrenPage, TaxonDetail, TaxonSummary
@@ -12,6 +12,20 @@
 
 _INLINE_CHILDREN_LIMIT = 100
 
+# Higher-rank children appear first so navigating the tree starts with the
+# most useful groupings (orders, families) rather than a random alphabetical
+# mix of species and subspecies.
+_RANK_SORT_ORDER = case(
+    (Taxon.rank == "class", 0),
+    (Taxon.rank == "order", 1),
+    (Taxon.rank == "family", 2),
+    (Taxon.rank == "subfamily", 3),
+    (Taxon.rank == "genus", 4),
+    (Taxon.rank == "species", 5),
+    (Taxon.rank == "subspecies", 6),
+    else_=7,
+)
+
 
 def _fetch_lineage(db: Session, ott_id: int) -> list[TaxonSummary]:
     """Fetch full lineage (root → ... → parent) using a recursive CTE.
@@ -62,10 +76,11 @@ def get_taxon(
     ) or 0
 
     # Get children (limited for inline display)
+    # Sort by rank importance so orders/families appear before species/subspecies
     children = (
         db.query(Taxon)
         .filter(Taxon.parent_ott_id == ott_id)
-        .order_by(Taxon.name)
+        .order_by(_RANK_SORT_ORDER, Taxon.name)
         .limit(_INLINE_CHILDREN_LIMIT)
         .all()
     )
@@ -159,7 +174,7 @@ def get_children(
     children = (
         db.query(Taxon)
         .filter(Taxon.parent_ott_id == ott_id)
-        .order_by(Taxon.name)
+        .order_by(_RANK_SORT_ORDER, Taxon.name)
         .offset(offset)
         .limit(limit)
         .all()

diff --git a/apps/api/src/evograph/api/schemas/taxa.py b/apps/api/src/evograph/api/schemas/taxa.py
@@ -37,3 +37,22 @@ class SearchPage(BaseModel):
     items: list[TaxonSummary]
     total: int
     limit: int
+
+
+class SpeciesSummary(BaseModel):
+    ott_id: int
+    name: str
+    rank: str
+    image_url: str | None = None
+    is_extinct: bool | None = None
+    has_sequence: bool = False
+    edge_count: int = 0
+    family_name: str | None = None
+    order_name: str | None = None
+
+
+class SpeciesBrowsePage(BaseModel):
+    items: list[SpeciesSummary]
+    total: int
+    offset: int
+    limit: int
diff --git a/apps/api/src/evograph/main.py b/apps/api/src/evograph/main.py
@@ -6,7 +6,7 @@
 from fastapi.middleware.gzip import GZipMiddleware
 from sqlalchemy import text
 
-from evograph.api.routes import graph, jobs, search, sequences, stats, taxa
+from evograph.api.routes import graph, jobs, search, sequences, species, stats, taxa
 from evograph.db.session import SessionLocal, engine
 from evograph.logging_config import configure_logging
 from evograph.middleware.rate_limit import RateLimitMiddleware
@@ -62,6 +62,7 @@ async def lifespan(app: FastAPI):
 app.include_router(taxa.router, prefix="/v1")
 app.include_router(graph.router, prefix="/v1")
 app.include_router(sequences.router, prefix="/v1")
+app.include_router(species.router, prefix="/v1")
 app.include_router(stats.router, prefix="/v1")
 app.include_router(jobs.router, prefix="/v1")
 

diff --git a/apps/api/tests/conftest.py b/apps/api/tests/conftest.py
@@ -113,6 +113,16 @@ def group_by(self, *args, **kwargs):
     def select_from(self, *args, **kwargs):
         return self
 
+    def correlate(self, *args, **kwargs):
+        return self
+
+    def scalar_subquery(self):
+        return self
+
+    def desc(self):
+        """Support ORDER BY ... DESC on scalar subqueries."""
+        return self
+
     def exists(self):
         """Return an exists clause marker for use in outer query."""
         return MockExistsClause()