diff --git a/apps/api/src/evograph/api/routes/species.py b/apps/api/src/evograph/api/routes/species.py
new file mode 100644
index 0000000..0c487b1
--- /dev/null
+++ b/apps/api/src/evograph/api/routes/species.py
@@ -0,0 +1,195 @@
+"""Browse species endpoint with filtering and pagination."""
+
+from fastapi import APIRouter, Depends, Query
+from sqlalchemy import func, or_
+from sqlalchemy.orm import Session
+
+from evograph.api.schemas.taxa import SpeciesBrowsePage, SpeciesSummary
+from evograph.db.models import Edge, NodeMedia, Sequence, Taxon
+from evograph.db.session import get_db
+
+router = APIRouter(tags=["species"])
+
+
+@router.get("/species", response_model=SpeciesBrowsePage)
+def browse_species(
+ offset: int = Query(0, ge=0),
+ limit: int = Query(50, ge=1, le=100),
+ has_sequences: bool | None = Query(None, description="Filter to species with/without COI sequences"),
+ has_edges: bool | None = Query(None, description="Filter to species with/without MI edges"),
+ is_extinct: bool | None = Query(None, description="Filter by extinct status"),
+ clade: int | None = Query(None, description="Filter to descendants of this ott_id"),
+ sort: str = Query("name", pattern="^(name|edges)$", description="Sort by name or edge count"),
+ db: Session = Depends(get_db),
+) -> SpeciesBrowsePage:
+ """Browse species with optional filters.
+
+ Supports filtering by sequence availability, MI edge presence,
+ extinct status, and clade membership. Paginated with offset/limit.
+ """
+ # Base filter: only species-rank taxa
+ filters = [Taxon.rank == "species"]
+
+ # Filter: extinct status
+ if is_extinct is not None:
+ filters.append(Taxon.is_extinct == is_extinct)
+
+ # Filter: clade membership via lineage array contains
+ if clade is not None:
+ filters.append(Taxon.lineage.any(clade))
+
+ # Subqueries for sequence/edge existence
+ has_seq_subq = (
+ db.query(Sequence.ott_id)
+ .filter(Sequence.ott_id == Taxon.ott_id, Sequence.is_canonical.is_(True))
+ .correlate(Taxon)
+ .exists()
+ )
+
+ has_edge_subq = (
+ db.query(Edge.src_ott_id)
+ .filter(
+ or_(
+ Edge.src_ott_id == Taxon.ott_id,
+ Edge.dst_ott_id == Taxon.ott_id,
+ )
+ )
+ .correlate(Taxon)
+ .exists()
+ )
+
+ if has_sequences is True:
+ filters.append(has_seq_subq)
+ elif has_sequences is False:
+ filters.append(~has_seq_subq)
+
+ if has_edges is True:
+ filters.append(has_edge_subq)
+ elif has_edges is False:
+ filters.append(~has_edge_subq)
+
+ # Count total matching
+ total = (
+ db.query(func.count(Taxon.ott_id))
+ .filter(*filters)
+ .scalar()
+ ) or 0
+
+ # Build query for fetching rows
+ base = db.query(Taxon).filter(*filters)
+
+ # Sort order
+ if sort == "edges":
+ # Subquery for edge count per species
+ edge_count_sq = (
+ db.query(func.count())
+ .filter(
+ or_(
+ Edge.src_ott_id == Taxon.ott_id,
+ Edge.dst_ott_id == Taxon.ott_id,
+ )
+ )
+ .correlate(Taxon)
+ .scalar_subquery()
+ )
+ base = base.order_by(edge_count_sq.desc(), Taxon.name)
+ else:
+ base = base.order_by(Taxon.name)
+
+ # Fetch page
+ rows = base.offset(offset).limit(limit).all()
+
+ if not rows:
+ return SpeciesBrowsePage(items=[], total=total, offset=offset, limit=limit)
+
+ ott_ids = [t.ott_id for t in rows]
+
+ # Batch: images
+ images: dict[int, str] = {}
+ media_rows = (
+ db.query(NodeMedia.ott_id, NodeMedia.image_url)
+ .filter(NodeMedia.ott_id.in_(ott_ids))
+ .all()
+ )
+ images = {ott: url for ott, url in media_rows}
+
+ # Batch: which have canonical sequences
+ seq_ott_ids: set[int] = set()
+ seq_rows = (
+ db.query(Sequence.ott_id)
+ .filter(Sequence.ott_id.in_(ott_ids), Sequence.is_canonical.is_(True))
+ .all()
+ )
+ seq_ott_ids = {r[0] for r in seq_rows}
+
+ # Batch: edge counts per species
+ edge_counts: dict[int, int] = {}
+ src_counts = (
+ db.query(Edge.src_ott_id, func.count())
+ .filter(Edge.src_ott_id.in_(ott_ids))
+ .group_by(Edge.src_ott_id)
+ .all()
+ )
+ for ott, cnt in src_counts:
+ edge_counts[ott] = edge_counts.get(ott, 0) + cnt
+ dst_counts = (
+ db.query(Edge.dst_ott_id, func.count())
+ .filter(Edge.dst_ott_id.in_(ott_ids))
+ .group_by(Edge.dst_ott_id)
+ .all()
+ )
+ for ott, cnt in dst_counts:
+ edge_counts[ott] = edge_counts.get(ott, 0) + cnt
+
+ # Batch: family and order names from lineage arrays
+ # Collect all ancestor ott_ids from lineages
+ all_ancestor_ids: set[int] = set()
+ for t in rows:
+ if t.lineage:
+ all_ancestor_ids.update(t.lineage)
+
+ # Fetch only family/order ancestors in one query
+ ancestor_map: dict[int, tuple[str, str]] = {} # ott_id -> (name, rank)
+ if all_ancestor_ids:
+ ancestor_rows = (
+ db.query(Taxon.ott_id, Taxon.name, Taxon.rank)
+ .filter(
+ Taxon.ott_id.in_(all_ancestor_ids),
+ Taxon.rank.in_(["family", "order"]),
+ )
+ .all()
+ )
+ ancestor_map = {ott: (name, rank) for ott, name, rank in ancestor_rows}
+
+ # Build per-species family/order lookup
+ species_family: dict[int, str] = {}
+ species_order: dict[int, str] = {}
+ for t in rows:
+ if t.lineage:
+ for anc_id in t.lineage:
+ info = ancestor_map.get(anc_id)
+ if info:
+ if info[1] == "family":
+ species_family[t.ott_id] = info[0]
+ elif info[1] == "order":
+ species_order[t.ott_id] = info[0]
+
+ return SpeciesBrowsePage(
+ items=[
+ SpeciesSummary(
+ ott_id=t.ott_id,
+ name=t.name,
+ rank=t.rank,
+ image_url=images.get(t.ott_id),
+ is_extinct=t.is_extinct,
+ has_sequence=t.ott_id in seq_ott_ids,
+ edge_count=edge_counts.get(t.ott_id, 0),
+ family_name=species_family.get(t.ott_id),
+ order_name=species_order.get(t.ott_id),
+ )
+ for t in rows
+ ],
+ total=total,
+ offset=offset,
+ limit=limit,
+ )
diff --git a/apps/api/src/evograph/api/routes/taxa.py b/apps/api/src/evograph/api/routes/taxa.py
index 0d1a502..f43979f 100644
--- a/apps/api/src/evograph/api/routes/taxa.py
+++ b/apps/api/src/evograph/api/routes/taxa.py
@@ -1,7 +1,7 @@
"""Taxon detail endpoint with paginated children."""
from fastapi import APIRouter, Depends, HTTPException, Query
-from sqlalchemy import func, text
+from sqlalchemy import case, func, text
from sqlalchemy.orm import Session
from evograph.api.schemas.taxa import ChildrenPage, TaxonDetail, TaxonSummary
@@ -12,6 +12,20 @@
_INLINE_CHILDREN_LIMIT = 100
+# Higher-rank children appear first so navigating the tree starts with the
+# most useful groupings (orders, families) rather than a random alphabetical
+# mix of species and subspecies.
+_RANK_SORT_ORDER = case(
+ (Taxon.rank == "class", 0),
+ (Taxon.rank == "order", 1),
+ (Taxon.rank == "family", 2),
+ (Taxon.rank == "subfamily", 3),
+ (Taxon.rank == "genus", 4),
+ (Taxon.rank == "species", 5),
+ (Taxon.rank == "subspecies", 6),
+ else_=7,
+)
+
def _fetch_lineage(db: Session, ott_id: int) -> list[TaxonSummary]:
"""Fetch full lineage (root → ... → parent) using a recursive CTE.
@@ -62,10 +76,11 @@ def get_taxon(
) or 0
# Get children (limited for inline display)
+ # Sort by rank importance so orders/families appear before species/subspecies
children = (
db.query(Taxon)
.filter(Taxon.parent_ott_id == ott_id)
- .order_by(Taxon.name)
+ .order_by(_RANK_SORT_ORDER, Taxon.name)
.limit(_INLINE_CHILDREN_LIMIT)
.all()
)
@@ -159,7 +174,7 @@ def get_children(
children = (
db.query(Taxon)
.filter(Taxon.parent_ott_id == ott_id)
- .order_by(Taxon.name)
+ .order_by(_RANK_SORT_ORDER, Taxon.name)
.offset(offset)
.limit(limit)
.all()
diff --git a/apps/api/src/evograph/api/schemas/taxa.py b/apps/api/src/evograph/api/schemas/taxa.py
index cc6293b..aa9b1d5 100644
--- a/apps/api/src/evograph/api/schemas/taxa.py
+++ b/apps/api/src/evograph/api/schemas/taxa.py
@@ -37,3 +37,22 @@ class SearchPage(BaseModel):
items: list[TaxonSummary]
total: int
limit: int
+
+
+class SpeciesSummary(BaseModel):
+ ott_id: int
+ name: str
+ rank: str
+ image_url: str | None = None
+ is_extinct: bool | None = None
+ has_sequence: bool = False
+ edge_count: int = 0
+ family_name: str | None = None
+ order_name: str | None = None
+
+
+class SpeciesBrowsePage(BaseModel):
+ items: list[SpeciesSummary]
+ total: int
+ offset: int
+ limit: int
diff --git a/apps/api/src/evograph/main.py b/apps/api/src/evograph/main.py
index ba82bc4..4689a73 100644
--- a/apps/api/src/evograph/main.py
+++ b/apps/api/src/evograph/main.py
@@ -6,7 +6,7 @@
from fastapi.middleware.gzip import GZipMiddleware
from sqlalchemy import text
-from evograph.api.routes import graph, jobs, search, sequences, stats, taxa
+from evograph.api.routes import graph, jobs, search, sequences, species, stats, taxa
from evograph.db.session import SessionLocal, engine
from evograph.logging_config import configure_logging
from evograph.middleware.rate_limit import RateLimitMiddleware
@@ -62,6 +62,7 @@ async def lifespan(app: FastAPI):
app.include_router(taxa.router, prefix="/v1")
app.include_router(graph.router, prefix="/v1")
app.include_router(sequences.router, prefix="/v1")
+app.include_router(species.router, prefix="/v1")
app.include_router(stats.router, prefix="/v1")
app.include_router(jobs.router, prefix="/v1")
diff --git a/apps/api/tests/conftest.py b/apps/api/tests/conftest.py
index e51eeb5..932c6a1 100644
--- a/apps/api/tests/conftest.py
+++ b/apps/api/tests/conftest.py
@@ -113,6 +113,16 @@ def group_by(self, *args, **kwargs):
def select_from(self, *args, **kwargs):
return self
+ def correlate(self, *args, **kwargs):
+ return self
+
+ def scalar_subquery(self):
+ return self
+
+ def desc(self):
+ """Support ORDER BY ... DESC on scalar subqueries."""
+ return self
+
def exists(self):
"""Return an exists clause marker for use in outer query."""
return MockExistsClause()
diff --git a/apps/api/tests/test_species.py b/apps/api/tests/test_species.py
new file mode 100644
index 0000000..57e380c
--- /dev/null
+++ b/apps/api/tests/test_species.py
@@ -0,0 +1,148 @@
+"""Tests for the /v1/species endpoint."""
+
+from evograph.db.models import Edge, Sequence, Taxon
+from tests.conftest import _make_taxon
+
+
+class TestBrowseSpecies:
+ def test_returns_species_list(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ _make_taxon(893498, "Corvus corone", "species"),
+ ])
+
+ resp = client.get("/v1/species")
+ assert resp.status_code == 200
+
+ data = resp.json()
+ assert len(data["items"]) == 2
+ assert data["items"][0]["name"] == "Corvus corax"
+ assert data["items"][0]["ott_id"] == 700118
+ assert data["items"][0]["rank"] == "species"
+
+ def test_returns_empty_for_no_species(self, client, mock_db):
+ mock_db.set(Taxon, [])
+
+ resp = client.get("/v1/species")
+ assert resp.status_code == 200
+ data = resp.json()
+ assert data["items"] == []
+ assert data["total"] == 0
+
+ def test_response_includes_pagination_fields(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species", params={"offset": 10, "limit": 25})
+ assert resp.status_code == 200
+ data = resp.json()
+ assert data["offset"] == 10
+ assert data["limit"] == 25
+ assert "total" in data
+
+ def test_species_includes_has_sequence_field(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+ mock_db.set(Sequence, [])
+
+ resp = client.get("/v1/species")
+ assert resp.status_code == 200
+ item = resp.json()["items"][0]
+ assert "has_sequence" in item
+ assert isinstance(item["has_sequence"], bool)
+
+ def test_species_includes_edge_count_field(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+ mock_db.set(Edge, [])
+
+ resp = client.get("/v1/species")
+ assert resp.status_code == 200
+ item = resp.json()["items"][0]
+ assert "edge_count" in item
+ assert isinstance(item["edge_count"], int)
+
+ def test_accepts_has_sequences_filter(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species", params={"has_sequences": "true"})
+ assert resp.status_code == 200
+
+ def test_accepts_has_edges_filter(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species", params={"has_edges": "true"})
+ assert resp.status_code == 200
+
+ def test_accepts_is_extinct_filter(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species", params={"is_extinct": "false"})
+ assert resp.status_code == 200
+
+ def test_accepts_sort_param(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species", params={"sort": "edges"})
+ assert resp.status_code == 200
+
+ resp = client.get("/v1/species", params={"sort": "name"})
+ assert resp.status_code == 200
+
+ def test_rejects_invalid_sort_param(self, client, mock_db):
+ resp = client.get("/v1/species", params={"sort": "invalid"})
+ assert resp.status_code == 422
+
+ def test_limit_validation(self, client, mock_db):
+ resp = client.get("/v1/species", params={"limit": 0})
+ assert resp.status_code == 422
+
+ resp = client.get("/v1/species", params={"limit": 101})
+ assert resp.status_code == 422
+
+ def test_species_image_url_defaults_to_null(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species")
+ assert resp.status_code == 200
+ item = resp.json()["items"][0]
+ assert "image_url" in item
+ assert item["image_url"] is None
+
+ def test_species_fields_match_schema(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species")
+ data = resp.json()
+ assert set(data.keys()) >= {"items", "total", "offset", "limit"}
+ item = data["items"][0]
+ assert set(item.keys()) >= {
+ "ott_id", "name", "rank", "has_sequence", "edge_count",
+ "family_name", "order_name",
+ }
+
+ def test_species_taxonomy_defaults_to_null(self, client, mock_db):
+ mock_db.set(Taxon, [
+ _make_taxon(700118, "Corvus corax", "species"),
+ ])
+
+ resp = client.get("/v1/species")
+ item = resp.json()["items"][0]
+ # Without lineage data, family/order default to null
+ assert item["family_name"] is None
+ assert item["order_name"] is None
diff --git a/apps/web/src/__tests__/BrowsePage.test.tsx b/apps/web/src/__tests__/BrowsePage.test.tsx
new file mode 100644
index 0000000..49da9f6
--- /dev/null
+++ b/apps/web/src/__tests__/BrowsePage.test.tsx
@@ -0,0 +1,203 @@
+import { render, screen, waitFor, fireEvent } from "@testing-library/react";
+import BrowsePage from "../app/browse/page";
+
+const mockSpeciesData = {
+ items: [
+ {
+ ott_id: 700118,
+ name: "Corvus corax",
+ rank: "species",
+ image_url: "https://example.com/corax.jpg",
+ is_extinct: false,
+ has_sequence: true,
+ edge_count: 12,
+ family_name: "Corvidae",
+ order_name: "Passeriformes",
+ },
+ {
+ ott_id: 893498,
+ name: "Corvus corone",
+ rank: "species",
+ image_url: null,
+ is_extinct: false,
+ has_sequence: false,
+ edge_count: 0,
+ family_name: "Corvidae",
+ order_name: "Passeriformes",
+ },
+ ],
+ total: 2,
+ offset: 0,
+ limit: 50,
+};
+
+jest.mock("../lib/api", () => ({
+ browseSpecies: jest.fn(),
+}));
+
+jest.mock("next/link", () => {
+ return function MockLink({
+ children,
+ href,
+ }: {
+ children: React.ReactNode;
+ href: string;
+ }) {
+ return {children};
+ };
+});
+
+import { browseSpecies } from "../lib/api";
+const mockBrowseSpecies = browseSpecies as jest.MockedFunction
+ Explore species in the database. Filter to those with COI sequences or
+ MI similarity edges to find species with genetic data.
+
+ )}
+
Browse Species
+