Skip to content

Commit 99ee456

Browse files
committed
Add smoke test for validated 10x renal dataset
1 parent 588a124 commit 99ee456

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ Validation summary from a local download of the public bundle:
5151
- In the downloaded bundle used for validation, `metrics_summary.csv` reports `num_cells_detected=465545`,
5252
and pyXenium reproduced that value from both supported matrix backends.
5353

54+
An executable smoke-test example is included in
55+
`examples/smoke_test_10x_renal_ffpe_protein.py`.
56+
57+
```bash
58+
python examples/smoke_test_10x_renal_ffpe_protein.py \
59+
"Y:/long/10X_datasets/Xenium/Xenium_Renal/Xenium_V1_Human_Kidney_FFPE_Protein"
60+
```
61+
5462
Quick Start
5563
-----------
5664

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import json
5+
import os
6+
import sys
7+
from pathlib import Path
8+
9+
import pandas as pd
10+
11+
try:
12+
from pyXenium.datasets import RENAL_FFPE_PROTEIN_10X_DATASET
13+
from pyXenium.io.xenium_gene_protein_loader import load_xenium_gene_protein
14+
except ModuleNotFoundError:
15+
repo_src = Path(__file__).resolve().parents[1] / "src"
16+
if str(repo_src) not in sys.path:
17+
sys.path.insert(0, str(repo_src))
18+
from pyXenium.datasets import RENAL_FFPE_PROTEIN_10X_DATASET
19+
from pyXenium.io.xenium_gene_protein_loader import load_xenium_gene_protein
20+
21+
22+
DEFAULT_DATASET_PATH = (
23+
r"Y:\long\10X_datasets\Xenium\Xenium_Renal\Xenium_V1_Human_Kidney_FFPE_Protein"
24+
)
25+
EXPECTED_CELLS = 465545
26+
EXPECTED_RNA_FEATURES = 405
27+
EXPECTED_PROTEIN_MARKERS = 27
28+
29+
30+
def build_summary(base_path: str, prefer: str) -> dict:
31+
adata = load_xenium_gene_protein(base_path=base_path, prefer=prefer)
32+
33+
protein = adata.obsm.get("protein")
34+
protein_shape = getattr(protein, "shape", None)
35+
protein_markers = int(protein_shape[1]) if protein_shape is not None else 0
36+
37+
summary = {
38+
"dataset_title": RENAL_FFPE_PROTEIN_10X_DATASET.title,
39+
"dataset_url": RENAL_FFPE_PROTEIN_10X_DATASET.url,
40+
"base_path": base_path,
41+
"prefer": prefer,
42+
"n_cells": int(adata.n_obs),
43+
"n_rna_features": int(adata.n_vars),
44+
"n_protein_markers": protein_markers,
45+
"x_nnz": int(getattr(adata.X, "nnz", 0)),
46+
"has_spatial": "spatial" in adata.obsm,
47+
"has_cluster": "cluster" in adata.obs.columns,
48+
"obsm_keys": sorted(adata.obsm.keys()),
49+
"metrics_summary_num_cells_detected": None,
50+
}
51+
52+
metrics_path = Path(base_path) / "metrics_summary.csv"
53+
if metrics_path.exists():
54+
metrics = pd.read_csv(metrics_path)
55+
if "num_cells_detected" in metrics.columns and not metrics.empty:
56+
summary["metrics_summary_num_cells_detected"] = int(metrics.loc[0, "num_cells_detected"])
57+
58+
return summary
59+
60+
61+
def validate_summary(summary: dict) -> list[str]:
62+
issues: list[str] = []
63+
64+
if summary["n_cells"] != EXPECTED_CELLS:
65+
issues.append(f"Expected {EXPECTED_CELLS} cells, observed {summary['n_cells']}.")
66+
if summary["n_rna_features"] != EXPECTED_RNA_FEATURES:
67+
issues.append(
68+
f"Expected {EXPECTED_RNA_FEATURES} RNA features, observed {summary['n_rna_features']}."
69+
)
70+
if summary["n_protein_markers"] != EXPECTED_PROTEIN_MARKERS:
71+
issues.append(
72+
f"Expected {EXPECTED_PROTEIN_MARKERS} protein markers, observed {summary['n_protein_markers']}."
73+
)
74+
if not summary["has_spatial"]:
75+
issues.append("Expected adata.obsm['spatial'] to be present.")
76+
if not summary["has_cluster"]:
77+
issues.append("Expected adata.obs['cluster'] to be present.")
78+
79+
metric_cells = summary["metrics_summary_num_cells_detected"]
80+
if metric_cells is not None and metric_cells != summary["n_cells"]:
81+
issues.append(
82+
"metrics_summary.csv reports "
83+
f"{metric_cells} detected cells, but pyXenium loaded {summary['n_cells']} cells."
84+
)
85+
86+
return issues
87+
88+
89+
def parse_args() -> argparse.Namespace:
90+
parser = argparse.ArgumentParser(
91+
description=(
92+
"Smoke-test pyXenium on the official 10x Genomics FFPE Human Renal Cell Carcinoma "
93+
"RNA + Protein Xenium dataset."
94+
)
95+
)
96+
parser.add_argument(
97+
"base_path",
98+
nargs="?",
99+
default=os.environ.get("PYXENIUM_DATASET_PATH", DEFAULT_DATASET_PATH),
100+
help=(
101+
"Local path to the Xenium dataset directory. Defaults to the "
102+
"PYXENIUM_DATASET_PATH environment variable or the validated local path."
103+
),
104+
)
105+
parser.add_argument(
106+
"--prefer",
107+
choices=("auto", "zarr", "h5", "mex"),
108+
default="auto",
109+
help="Preferred matrix backend passed to load_xenium_gene_protein().",
110+
)
111+
parser.add_argument(
112+
"--allow-mismatch",
113+
action="store_true",
114+
help="Print the summary even if the observed values differ from the validated reference.",
115+
)
116+
parser.add_argument(
117+
"--output-json",
118+
default=None,
119+
help="Optional path to write the summary JSON.",
120+
)
121+
return parser.parse_args()
122+
123+
124+
def main() -> int:
125+
args = parse_args()
126+
summary = build_summary(base_path=args.base_path, prefer=args.prefer)
127+
issues = validate_summary(summary)
128+
129+
payload = {
130+
"summary": summary,
131+
"validated_reference": {
132+
"expected_cells": EXPECTED_CELLS,
133+
"expected_rna_features": EXPECTED_RNA_FEATURES,
134+
"expected_protein_markers": EXPECTED_PROTEIN_MARKERS,
135+
},
136+
"issues": issues,
137+
}
138+
139+
rendered = json.dumps(payload, indent=2)
140+
print(rendered)
141+
142+
if args.output_json:
143+
Path(args.output_json).write_text(rendered + "\n", encoding="utf-8")
144+
145+
if issues and not args.allow_mismatch:
146+
return 1
147+
return 0
148+
149+
150+
if __name__ == "__main__":
151+
raise SystemExit(main())

0 commit comments

Comments
 (0)