From 1a95f7b58b81c1294409f063be2eb1bff9d02c3b Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 1 Mar 2026 14:43:32 +0000 Subject: [PATCH] Add MI metrics visibility across the app The app was computing MI (mutual information) metrics but not surfacing them to users. This change makes MI similarity data visible and actionable throughout the UI, enabling users to answer the core question: "How does genetic similarity compare to taxonomy?" Backend: - Add mi_norm and align_len to GraphEdge schema (was only distance) - Add shared_rank to NeighborOut (computed from lineage arrays) - MI network deduplication now preserves mi_norm and align_len - Neighbors endpoint batch-fetches lineage ranks for taxonomy coherence Frontend: - NeighborCard shows actual NMI% instead of misleading relative distance - Neighbor cards show shared taxonomic rank badge (genus/family/order) - Taxonomic coherence summary bar above neighbors section - Graph page MI stats panel (avg/median/range NMI, distribution buckets) - Sigma graph edge hover tooltip showing NMI%, distance, alignment cols - TypeScript types updated for new GraphEdge and NeighborOut fields Tests: 122 API tests + 86 frontend tests passing (5 new tests added) https://claude.ai/code/session_015FCj2G9CHpnsJqHcF3n8aw --- apps/api/src/evograph/api/routes/graph.py | 56 ++++++++- apps/api/src/evograph/api/schemas/graph.py | 4 + apps/api/tests/test_graph.py | 22 +++- apps/web/src/__tests__/GraphPage.test.tsx | 17 ++- .../src/__tests__/TaxonDetailPage.test.tsx | 37 ++++++ apps/web/src/app/globals.css | 107 ++++++++++++++++++ apps/web/src/app/graph/page.tsx | 63 ++++++++++- apps/web/src/app/taxa/[ottId]/page.tsx | 75 +++++++++--- apps/web/src/components/GraphViewSigma.tsx | 58 +++++++++- apps/web/src/lib/types.ts | 4 + 10 files changed, 410 insertions(+), 33 deletions(-) diff --git a/apps/api/src/evograph/api/routes/graph.py b/apps/api/src/evograph/api/routes/graph.py index 67b843f..06ac325 100644 --- a/apps/api/src/evograph/api/routes/graph.py +++ b/apps/api/src/evograph/api/routes/graph.py @@ -94,6 +94,8 @@ def get_subtree_graph( dst=e.dst_ott_id, kind="mi", distance=e.distance, + mi_norm=e.mi_norm, + align_len=e.align_len, ) ) @@ -153,15 +155,15 @@ def get_mi_network( # Build MI edges — deduplicate to undirected (keep the one with lower distance # when both A->B and B->A exist, otherwise keep the single direction) - seen_pairs: dict[tuple[int, int], float] = {} + seen_pairs: dict[tuple[int, int], tuple[float, float, int]] = {} for e in all_edges: pair = (min(e.src_ott_id, e.dst_ott_id), max(e.src_ott_id, e.dst_ott_id)) - if pair not in seen_pairs or e.distance < seen_pairs[pair]: - seen_pairs[pair] = e.distance + if pair not in seen_pairs or e.distance < seen_pairs[pair][0]: + seen_pairs[pair] = (e.distance, e.mi_norm, e.align_len) mi_edges = [ - GraphEdge(src=a, dst=b, kind="mi", distance=dist) - for (a, b), dist in seen_pairs.items() + GraphEdge(src=a, dst=b, kind="mi", distance=dist, mi_norm=nmi, align_len=alen) + for (a, b), (dist, nmi, alen) in seen_pairs.items() ] # Add taxonomy edges: connect species to their parent genus/family @@ -202,6 +204,27 @@ def get_mi_network( return result +def _find_shared_rank( + src_lineage: list[int] | None, + dst_lineage: list[int] | None, + rank_lookup: dict[int, str], +) -> str | None: + """Find the deepest shared taxonomic rank between two taxa. + + Lineage arrays run from root -> parent (not including self). + Walk dst lineage from deepest to shallowest to find the first common ancestor. + """ + if not src_lineage or not dst_lineage: + return None + + src_set = set(src_lineage) + for ott_id in reversed(dst_lineage): + if ott_id in src_set: + return rank_lookup.get(ott_id) + + return None + + @router.get("/graph/neighbors/{ott_id}", response_model=list[NeighborOut]) def get_neighbors( ott_id: int, @@ -211,7 +234,8 @@ def get_neighbors( """Get k nearest MI-neighbors for a taxon. Query Edge table where src_ott_id = ott_id, order by distance, limit k. - Join with Taxon to get name/rank. + Join with Taxon to get name/rank. Computes shared taxonomic rank + using lineage arrays to show taxonomy-vs-similarity coherence. """ taxon = db.query(Taxon).filter(Taxon.ott_id == ott_id).first() if taxon is None: @@ -226,6 +250,24 @@ def get_neighbors( .all() ) + # Collect all lineage ott_ids to batch-lookup ranks + all_lineage_ids: set[int] = set() + src_lineage = taxon.lineage or [] + all_lineage_ids.update(src_lineage) + for _e, t in rows: + if t.lineage: + all_lineage_ids.update(t.lineage) + + # Batch-fetch ranks for all lineage ancestors + rank_lookup: dict[int, str] = {} + if all_lineage_ids: + ancestor_rows = ( + db.query(Taxon.ott_id, Taxon.rank) + .filter(Taxon.ott_id.in_(all_lineage_ids)) + .all() + ) + rank_lookup = {ott: rank for ott, rank in ancestor_rows} + return [ NeighborOut( ott_id=t.ott_id, @@ -233,6 +275,8 @@ def get_neighbors( rank=t.rank, distance=e.distance, mi_norm=e.mi_norm, + align_len=e.align_len, + shared_rank=_find_shared_rank(src_lineage, t.lineage, rank_lookup), ) for e, t in rows ] diff --git a/apps/api/src/evograph/api/schemas/graph.py b/apps/api/src/evograph/api/schemas/graph.py index f51b02c..f6b9c8a 100644 --- a/apps/api/src/evograph/api/schemas/graph.py +++ b/apps/api/src/evograph/api/schemas/graph.py @@ -11,6 +11,8 @@ class GraphEdge(BaseModel): dst: int kind: str # "taxonomy" | "mi" distance: float | None = None + mi_norm: float | None = None + align_len: int | None = None class GraphResponse(BaseModel): nodes: list[Node] @@ -22,3 +24,5 @@ class NeighborOut(BaseModel): rank: str distance: float mi_norm: float + align_len: int + shared_rank: str | None = None diff --git a/apps/api/tests/test_graph.py b/apps/api/tests/test_graph.py index bbeab39..0134a8f 100644 --- a/apps/api/tests/test_graph.py +++ b/apps/api/tests/test_graph.py @@ -100,13 +100,15 @@ def test_mi_network_edge_schema(self, client, mock_db): assert "src" in e assert "dst" in e assert "distance" in e + assert "mi_norm" in e + assert "align_len" in e class TestNeighbors: def test_neighbors_returns_sorted(self, client, mock_db): taxon = _make_taxon(700118, "Corvus corax", "species") corone = _make_taxon(893498, "Corvus corone", "species") - edge = _make_edge(700118, 893498, distance=0.15, mi_norm=0.85) + edge = _make_edge(700118, 893498, distance=0.15, mi_norm=0.85, align_len=542) mock_db.set(Taxon, [taxon]) mock_db.set((Edge, Taxon), [(edge, corone)]) @@ -119,6 +121,7 @@ def test_neighbors_returns_sorted(self, client, mock_db): assert data[0]["ott_id"] == 893498 assert data[0]["distance"] == 0.15 assert data[0]["mi_norm"] == 0.85 + assert data[0]["align_len"] == 542 def test_neighbors_not_found(self, client, mock_db): mock_db.set(Taxon, []) @@ -158,4 +161,19 @@ def test_neighbor_schema(self, client, mock_db): resp = client.get("/v1/graph/neighbors/700118") item = resp.json()[0] - assert set(item.keys()) == {"ott_id", "name", "rank", "distance", "mi_norm"} + assert set(item.keys()) == { + "ott_id", "name", "rank", "distance", "mi_norm", + "align_len", "shared_rank", + } + + def test_neighbor_shared_rank_null_without_lineage(self, client, mock_db): + taxon = _make_taxon(700118, "Corvus corax", "species") + corone = _make_taxon(893498, "Corvus corone", "species") + edge = _make_edge(700118, 893498, distance=0.15, mi_norm=0.85) + + mock_db.set(Taxon, [taxon]) + mock_db.set((Edge, Taxon), [(edge, corone)]) + + resp = client.get("/v1/graph/neighbors/700118") + data = resp.json() + assert data[0]["shared_rank"] is None diff --git a/apps/web/src/__tests__/GraphPage.test.tsx b/apps/web/src/__tests__/GraphPage.test.tsx index 226f94f..46004b5 100644 --- a/apps/web/src/__tests__/GraphPage.test.tsx +++ b/apps/web/src/__tests__/GraphPage.test.tsx @@ -24,9 +24,9 @@ const mockGraph = { { ott_id: 3, name: "Corvus", rank: "genus", image_url: null }, ], edges: [ - { src: 1, dst: 2, kind: "mi" as const, distance: 0.15 }, - { src: 3, dst: 1, kind: "taxonomy" as const, distance: null }, - { src: 3, dst: 2, kind: "taxonomy" as const, distance: null }, + { src: 1, dst: 2, kind: "mi" as const, distance: 0.15, mi_norm: 0.85, align_len: 600 }, + { src: 3, dst: 1, kind: "taxonomy" as const, distance: null, mi_norm: null, align_len: null }, + { src: 3, dst: 2, kind: "taxonomy" as const, distance: null, mi_norm: null, align_len: null }, ], }; @@ -113,4 +113,15 @@ describe("GraphPage", () => { render(); expect(screen.queryByPlaceholderText("Search nodes...")).not.toBeInTheDocument(); }); + + it("shows MI metrics summary after loading", async () => { + mockGetMiNetwork.mockResolvedValue(mockGraph); + render(); + + await waitFor(() => { + expect(screen.getByText("Avg NMI")).toBeInTheDocument(); + expect(screen.getByText("Median")).toBeInTheDocument(); + expect(screen.getByText("Range")).toBeInTheDocument(); + }); + }); }); diff --git a/apps/web/src/__tests__/TaxonDetailPage.test.tsx b/apps/web/src/__tests__/TaxonDetailPage.test.tsx index cf046a5..7c846f1 100644 --- a/apps/web/src/__tests__/TaxonDetailPage.test.tsx +++ b/apps/web/src/__tests__/TaxonDetailPage.test.tsx @@ -129,4 +129,41 @@ describe("TaxonDetailPage", () => { expect(screen.getByText(/Network error/)).toBeInTheDocument(); }); }); + + it("renders MI neighbors with NMI similarity", async () => { + const neighbors = [ + { + ott_id: 100001, + name: "Pica pica", + rank: "species", + distance: 0.15, + mi_norm: 0.85, + align_len: 542, + shared_rank: "family", + }, + ]; + (getNeighbors as jest.Mock).mockResolvedValue(neighbors); + render(); + await waitFor(() => { + expect(screen.getByText(/85% NMI/)).toBeInTheDocument(); + expect(screen.getByText(/542 cols/)).toBeInTheDocument(); + expect(screen.getByText("Pica pica")).toBeInTheDocument(); + }); + }); + + it("shows taxonomic coherence summary for neighbors", async () => { + const neighbors = [ + { ott_id: 1, name: "Species A", rank: "species", distance: 0.1, mi_norm: 0.9, align_len: 600, shared_rank: "genus" }, + { ott_id: 2, name: "Species B", rank: "species", distance: 0.2, mi_norm: 0.8, align_len: 550, shared_rank: "family" }, + { ott_id: 3, name: "Species C", rank: "species", distance: 0.4, mi_norm: 0.6, align_len: 500, shared_rank: "order" }, + ]; + (getNeighbors as jest.Mock).mockResolvedValue(neighbors); + render(); + await waitFor(() => { + expect(screen.getByText("Taxonomic coherence:")).toBeInTheDocument(); + expect(screen.getByText("1 same genus")).toBeInTheDocument(); + expect(screen.getByText("1 same family")).toBeInTheDocument(); + expect(screen.getByText("1 cross-family")).toBeInTheDocument(); + }); + }); }); diff --git a/apps/web/src/app/globals.css b/apps/web/src/app/globals.css index 34a5ea3..fb2bf91 100644 --- a/apps/web/src/app/globals.css +++ b/apps/web/src/app/globals.css @@ -211,6 +211,82 @@ input:focus { color: #555; } +/* ── Edge tooltip ────────────────────────────────── */ + +.graph-edge-tooltip { + position: absolute; + z-index: 1000; + background: rgba(15, 18, 28, 0.95); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 0.5rem 0.75rem; + pointer-events: none; + max-width: 320px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); +} + +.graph-edge-tooltip-names { + display: flex; + gap: 0.4rem; + align-items: center; + font-size: 0.8rem; + margin-bottom: 0.35rem; + color: var(--fg); +} + +.graph-edge-tooltip-metrics { + display: flex; + gap: 0.75rem; + font-size: 0.75rem; + color: #aaa; + font-variant-numeric: tabular-nums; +} + +.graph-edge-tooltip-metrics strong { + color: #2a9d8f; +} + +/* ── MI stats bar ────────────────────────────────── */ + +.mi-stats-bar { + display: flex; + flex-wrap: wrap; + gap: 1rem; + align-items: center; + padding: 0.5rem 0.85rem; + margin-bottom: 0.5rem; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); + font-size: 0.8rem; +} + +.mi-stats-item { + display: flex; + flex-direction: column; + align-items: center; + gap: 0.1rem; +} + +.mi-stats-label { + font-size: 0.65rem; + color: #888; + text-transform: uppercase; + letter-spacing: 0.03em; +} + +.mi-stats-value { + font-weight: 600; + font-variant-numeric: tabular-nums; + color: var(--fg); +} + +.mi-stats-sep { + width: 1px; + height: 28px; + background: var(--border); +} + /* ── Breadcrumbs ──────────────────────────────────── */ .breadcrumbs { @@ -567,6 +643,37 @@ input:focus { white-space: nowrap; } +.neighbor-meta { + font-size: 0.7rem; + color: #666; + font-variant-numeric: tabular-nums; + white-space: nowrap; +} + +.neighbor-shared-rank { + font-size: 0.65rem; + font-weight: 600; + padding: 0.1rem 0.4rem; + border-radius: 3px; + white-space: nowrap; + flex-shrink: 0; + text-transform: capitalize; +} + +.neighbor-coherence { + display: flex; + flex-wrap: wrap; + gap: 0.5rem; + align-items: center; + font-size: 0.8rem; + color: #888; + margin-bottom: 0.75rem; + padding: 0.5rem 0.75rem; + background: var(--bg-card); + border: 1px solid var(--border); + border-radius: var(--radius); +} + /* ── Sequence viewer ─────────────────────────────── */ .sequence-card { diff --git a/apps/web/src/app/graph/page.tsx b/apps/web/src/app/graph/page.tsx index b7d17b8..be272f4 100644 --- a/apps/web/src/app/graph/page.tsx +++ b/apps/web/src/app/graph/page.tsx @@ -92,13 +92,37 @@ export default function GraphPage() { .finally(() => setLoading(false)); }, []); + const miEdges = graph ? graph.edges.filter((e) => e.kind === "mi") : []; + const miCount = miEdges.length; + const speciesCount = graph ? graph.nodes.length : 0; + + // Compute MI metrics summary + const miStats = useMemo(() => { + if (miEdges.length === 0) return null; + const nmiValues = miEdges + .map((e) => e.mi_norm) + .filter((v): v is number => v != null); + if (nmiValues.length === 0) return null; + + nmiValues.sort((a, b) => a - b); + const sum = nmiValues.reduce((a, b) => a + b, 0); + const avg = sum / nmiValues.length; + const median = nmiValues[Math.floor(nmiValues.length / 2)]; + const min = nmiValues[0]; + const max = nmiValues[nmiValues.length - 1]; + + // Distribution buckets for NMI similarity + const highSim = nmiValues.filter((v) => v >= 0.7).length; + const medSim = nmiValues.filter((v) => v >= 0.4 && v < 0.7).length; + const lowSim = nmiValues.filter((v) => v < 0.4).length; + + return { avg, median, min, max, highSim, medSim, lowSim, total: nmiValues.length }; + }, [miEdges]); + if (error) { return
Failed to load graph: {error}
; } - const miCount = graph ? graph.edges.filter((e) => e.kind === "mi").length : 0; - const speciesCount = graph ? graph.nodes.length : 0; - return (

@@ -108,7 +132,7 @@ export default function GraphPage() {

Species with COI barcodes connected by mutual information similarity. - Closer species have thicker, brighter edges. Hover to highlight, click to view details. + Closer species have thicker, brighter edges. Hover edges for MI metrics, click nodes to view details.

{graph && !loading && ( @@ -121,6 +145,37 @@ export default function GraphPage() { )}
+ {/* MI Metrics Summary */} + {miStats && !loading && ( +
+
+ Avg NMI + {Math.round(miStats.avg * 100)}% +
+
+ Median + {Math.round(miStats.median * 100)}% +
+
+ Range + {Math.round(miStats.min * 100)}–{Math.round(miStats.max * 100)}% +
+ +
+ High (≥70%) + {miStats.highSim} +
+
+ Medium (40–70%) + {miStats.medSim} +
+
+ Low (<40%) + {miStats.lowSim} +
+
+ )} + {loading ? ( ) : graph ? ( diff --git a/apps/web/src/app/taxa/[ottId]/page.tsx b/apps/web/src/app/taxa/[ottId]/page.tsx index 4d1e265..6b5af9d 100644 --- a/apps/web/src/app/taxa/[ottId]/page.tsx +++ b/apps/web/src/app/taxa/[ottId]/page.tsx @@ -98,25 +98,48 @@ function StatsBar({ items }: { items: TaxonSummary[] }) { ); } +// ── shared rank colors ────────────────────────────── +const SHARED_RANK_COLORS: Record = { + genus: "#81c784", + family: "#fff176", + subfamily: "#dce775", + order: "#ffb74d", + class: "#e57373", +}; + // ── neighbor card ─────────────────────────────────── -function NeighborCard({ neighbor, maxDist }: { neighbor: NeighborOut; maxDist: number }) { - // Similarity: 1 = identical, 0 = maximally distant - const similarity = Math.max(0, 1 - neighbor.distance / maxDist); - const pct = Math.round(similarity * 100); - // Color: green (similar) → orange (distant) - const hue = Math.round(similarity * 120); // 120=green, 0=red +function NeighborCard({ neighbor }: { neighbor: NeighborOut }) { + // Use actual NMI (normalized mutual information) as similarity percentage + const nmiPct = Math.round(neighbor.mi_norm * 100); + // Color: green (high NMI) → red (low NMI) + const hue = Math.round(neighbor.mi_norm * 120); // 120=green, 0=red const barColor = `hsl(${hue}, 70%, 50%)`; return ( -
+
- - {neighbor.name} - - - {pct}% similar - +
+ + {neighbor.name} + + {neighbor.shared_rank && ( + + {neighbor.shared_rank} + + )} +
+
+ + {nmiPct}% NMI + + + {neighbor.align_len} cols + +
); @@ -179,9 +202,9 @@ export default function TaxonDetailPage() { const showGraph = hasMiEdges && neighbors.length > 0; const grouped = groupByRank(allChildren); const hasMoreChildren = allChildren.length < taxon.total_children; - const neighborMaxDist = neighbors.length > 0 - ? Math.max(...neighbors.map((n) => n.distance)) * 1.1 // 10% headroom - : 1; + // Count how many neighbors share genus vs family for the summary + const genusCount = neighbors.filter((n) => n.shared_rank === "genus").length; + const familyCount = neighbors.filter((n) => n.shared_rank === "family" || n.shared_rank === "subfamily").length; return (
@@ -322,9 +345,27 @@ export default function TaxonDetailPage() { {neighbors.length > 0 && (

MI Neighbors ({neighbors.length})

+
+ Taxonomic coherence: + {genusCount > 0 && ( + + {genusCount} same genus + + )} + {familyCount > 0 && ( + + {familyCount} same family + + )} + {neighbors.length - genusCount - familyCount > 0 && ( + + {neighbors.length - genusCount - familyCount} cross-family + + )} +
{neighbors.map((n) => ( - + ))}
diff --git a/apps/web/src/components/GraphViewSigma.tsx b/apps/web/src/components/GraphViewSigma.tsx index a432680..cb81390 100644 --- a/apps/web/src/components/GraphViewSigma.tsx +++ b/apps/web/src/components/GraphViewSigma.tsx @@ -1,6 +1,6 @@ "use client"; -import { useEffect, useRef, useCallback } from "react"; +import { useEffect, useRef, useCallback, useState } from "react"; import { useRouter } from "next/navigation"; import Graph from "graphology"; import Sigma from "sigma"; @@ -93,6 +93,8 @@ function buildGraph(data: GraphResponse, mode: "force" | "radial"): Graph { origColor: color, origSize: size, distance: dist, + miNorm: e.mi_norm, + alignLen: e.align_len, }); } else if (mode === "radial") { // Only show taxonomy edges in tree mode, very faint @@ -155,6 +157,15 @@ export default function GraphViewSigma({ const graphRef = useRef(null); const router = useRouter(); const hoveredRef = useRef(null); + const [edgeTooltip, setEdgeTooltip] = useState<{ + x: number; + y: number; + srcName: string; + dstName: string; + miNorm: number; + distance: number; + alignLen: number | null; + } | null>(null); /** Highlight a node's neighborhood, dim everything else */ const focusNode = useCallback((g: Graph, nodeId: string | null) => { @@ -202,6 +213,7 @@ export default function GraphViewSigma({ allowInvalidContainer: true, renderLabels: true, renderEdgeLabels: false, + enableEdgeEvents: true, defaultNodeColor: "#b5a7d5", defaultEdgeColor: blendWithBg(42, 157, 143, 0.08), labelColor: { color: "#9a958a" }, @@ -254,6 +266,28 @@ export default function GraphViewSigma({ if (onNodeDoubleClick) onNodeDoubleClick(Number(node)); }); + // Edge hover → show MI metrics tooltip + sigma.on("enterEdge", ({ edge, event }) => { + const attr = g.getEdgeAttributes(edge); + if (attr.kind !== "mi") return; + const src = g.source(edge); + const dst = g.target(edge); + const srcName = g.getNodeAttribute(src, "label") as string; + const dstName = g.getNodeAttribute(dst, "label") as string; + setEdgeTooltip({ + x: (event as unknown as { x: number }).x, + y: (event as unknown as { y: number }).y, + srcName, + dstName, + miNorm: attr.miNorm as number ?? 0, + distance: attr.distance as number ?? 0, + alignLen: attr.alignLen as number | null ?? null, + }); + }); + sigma.on("leaveEdge", () => { + setEdgeTooltip(null); + }); + return () => { ro.disconnect(); sigma.kill(); @@ -285,6 +319,28 @@ export default function GraphViewSigma({ className="graph-sigma-container" style={{ height }} /> + {edgeTooltip && ( +
+
+ {edgeTooltip.srcName} + + {edgeTooltip.dstName} +
+
+ NMI: {Math.round(edgeTooltip.miNorm * 100)}% + Distance: {edgeTooltip.distance.toFixed(3)} + {edgeTooltip.alignLen && ( + Alignment: {edgeTooltip.alignLen} cols + )} +
+
+ )}
{LEGEND_RANKS.map((rank) => ( diff --git a/apps/web/src/lib/types.ts b/apps/web/src/lib/types.ts index 1d23b68..e862c6a 100644 --- a/apps/web/src/lib/types.ts +++ b/apps/web/src/lib/types.ts @@ -64,6 +64,8 @@ export interface GraphEdge { dst: number; kind: "taxonomy" | "mi"; distance: number | null; + mi_norm: number | null; + align_len: number | null; } export interface GraphResponse { @@ -77,6 +79,8 @@ export interface NeighborOut { rank: string; distance: number; mi_norm: number; + align_len: number; + shared_rank: string | null; } export interface StatsResponse {