Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/atdata_app/frontend/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ async def dataset_detail(request: Request, did: str, rkey: str):
if len(parts) == 3:
schema_did, _, schema_rkey = parts

# Fetch the referenced schema for inline display of format/annotation info
schema_info = None
if schema_did and schema_rkey:
schema_row = await query_get_schema(pool, schema_did, schema_rkey)
if schema_row:
schema_info = row_to_schema(schema_row)

# Fetch labels pointing to this dataset
dataset_uri = entry["uri"]
label_rows = await query_labels_for_dataset(pool, dataset_uri)
Expand All @@ -117,6 +124,7 @@ async def dataset_detail(request: Request, did: str, rkey: str):
"entry": entry,
"schema_did": schema_did,
"schema_rkey": schema_rkey,
"schema_info": schema_info,
"labels": labels,
},
)
Expand Down
14 changes: 14 additions & 0 deletions src/atdata_app/frontend/templates/dataset.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ <h2>Details</h2>
<tr><th>License</th><td>{{ entry.license }}</td></tr>
{% endif %}
<tr><th>Schema</th><td><a href="/schema/{{ schema_did }}/{{ schema_rkey }}">{{ entry.schemaRef }}</a></td></tr>
{% if schema_info %}
{% if schema_info.arrayFormat is defined %}
<tr><th>Array Format</th><td>{{ schema_info.get("arrayFormatLabel", schema_info.arrayFormat) }}</td></tr>
{% endif %}
{% if schema_info.dtype is defined %}
<tr><th>Data Type</th><td><code>{{ schema_info.dtype }}</code></td></tr>
{% endif %}
{% if schema_info.shape is defined %}
<tr><th>Shape</th><td><code>{{ schema_info.shape | join(" × ") }}</code></td></tr>
{% endif %}
{% if schema_info.dimensionNames is defined %}
<tr><th>Dimensions</th><td>{{ schema_info.dimensionNames | join(", ") }}</td></tr>
{% endif %}
{% endif %}
{% if entry.size %}
<tr>
<th>Size</th>
Expand Down
3 changes: 2 additions & 1 deletion src/atdata_app/frontend/templates/profile.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ <h3><a href="/dataset/{{ entry.did }}/{{ entry.rkey }}">{{ entry.name }}</a></h3
<h2>Schemas</h2>
{% if schemas %}
<table>
<thead><tr><th>Name</th><th>Version</th><th>Type</th></tr></thead>
<thead><tr><th>Name</th><th>Version</th><th>Type</th><th>Format</th></tr></thead>
<tbody>
{% for s in schemas %}
<tr>
<td><a href="/schema/{{ s.did }}/{{ s.rkey }}">{{ s.name }}</a></td>
<td>{{ s.version }}</td>
<td>{{ s.schemaType }}</td>
<td>{{ s.get("arrayFormatLabel", "") }}</td>
</tr>
{% endfor %}
</tbody>
Expand Down
12 changes: 12 additions & 0 deletions src/atdata_app/frontend/templates/schema.html
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ <h2>Details</h2>
<tbody>
<tr><th>AT-URI</th><td><code>{{ schema.uri }}</code></td></tr>
<tr><th>Type</th><td>{{ schema.schemaType }}</td></tr>
{% if schema.arrayFormat is defined %}
<tr><th>Array Format</th><td>{{ schema.get("arrayFormatLabel", schema.arrayFormat) }}</td></tr>
{% endif %}
{% if schema.dtype is defined %}
<tr><th>Data Type</th><td><code>{{ schema.dtype }}</code></td></tr>
{% endif %}
{% if schema.shape is defined %}
<tr><th>Shape</th><td><code>{{ schema.shape | join(" × ") }}</code></td></tr>
{% endif %}
{% if schema.dimensionNames is defined %}
<tr><th>Dimensions</th><td>{{ schema.dimensionNames | join(", ") }}</td></tr>
{% endif %}
<tr><th>Version</th><td>{{ schema.version }}</td></tr>
<tr><th>Created</th><td>{{ schema.createdAt }}</td></tr>
</tbody>
Expand Down
3 changes: 2 additions & 1 deletion src/atdata_app/frontend/templates/schemas.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ <h1>Schemas</h1>
{% if schemas %}
<table>
<thead>
<tr><th>Name</th><th>Version</th><th>Type</th><th>Description</th><th>Publisher</th></tr>
<tr><th>Name</th><th>Version</th><th>Type</th><th>Format</th><th>Description</th><th>Publisher</th></tr>
</thead>
<tbody>
{% for s in schemas %}
<tr>
<td><a href="/schema/{{ s.did }}/{{ s.rkey }}">{{ s.name }}</a></td>
<td>{{ s.version }}</td>
<td>{{ s.schemaType }}</td>
<td>{{ s.get("arrayFormatLabel", "") }}</td>
<td>{{ s.get("description", "") }}</td>
<td><a href="/profile/{{ s.did }}">{{ s.did[:20] }}…</a></td>
</tr>
Expand Down
8 changes: 6 additions & 2 deletions src/atdata_app/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ async def server_lifespan(server: FastMCP) -> AsyncIterator[ServerContext]:
"ATProto AppView for the science.alt.dataset namespace. "
"Use these tools to discover and query scientific datasets, "
"schemas, and lenses (bidirectional schema transforms) published "
"on the AT Protocol network."
"on the AT Protocol network. "
"Schemas may specify an arrayFormat (numpyBytes, parquetBytes, "
"sparseBytes, structuredBytes, arrowTensor, safetensors) and "
"ndarray annotations (dtype, shape, dimensionNames)."
),
lifespan=server_lifespan,
)
Expand Down Expand Up @@ -127,7 +130,8 @@ async def get_schema(ctx: Ctx, uri: str) -> dict[str, Any]:
uri: AT-URI of the schema (e.g. at://did:plc:abc/science.alt.dataset.schema/my.schema@1.0.0).

Returns:
Full schema record including name, version, type, schema body, and description.
Full schema record including name, version, type, schema body, description,
and (when present) arrayFormat, dtype, shape, and dimensionNames.
"""
sc = _get_ctx(ctx)
did, _collection, rkey = parse_at_uri(uri)
Expand Down
39 changes: 39 additions & 0 deletions src/atdata_app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,32 @@
from pydantic import BaseModel


# ---------------------------------------------------------------------------
# Known array format tokens (atdata-lexicon#21)
# ---------------------------------------------------------------------------

KNOWN_ARRAY_FORMATS: set[str] = {
# Original formats
"numpyBytes",
"parquetBytes",
# New formats
"sparseBytes",
"structuredBytes",
"arrowTensor",
"safetensors",
}

#: Human-friendly display names for array format tokens.
ARRAY_FORMAT_LABELS: dict[str, str] = {
"numpyBytes": "NumPy ndarray",
"parquetBytes": "Parquet",
"sparseBytes": "Sparse matrix (CSR/CSC/COO)",
"structuredBytes": "NumPy structured array",
"arrowTensor": "Arrow tensor IPC",
"safetensors": "Safetensors",
}


# ---------------------------------------------------------------------------
# AT-URI parsing
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -119,6 +145,19 @@ def row_to_schema(row) -> dict[str, Any]:
}
if row["description"]:
d["description"] = row["description"]

# Surface array format and ndarray v1.1.0 annotation fields for display
array_format = schema_body.get("arrayFormat")
if array_format:
d["arrayFormat"] = array_format
d["arrayFormatLabel"] = ARRAY_FORMAT_LABELS.get(array_format, array_format)
if schema_body.get("dtype"):
d["dtype"] = schema_body["dtype"]
if schema_body.get("shape"):
d["shape"] = schema_body["shape"]
if schema_body.get("dimensionNames"):
d["dimensionNames"] = schema_body["dimensionNames"]

return d


Expand Down
97 changes: 95 additions & 2 deletions tests/test_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def _make_schema_row(
did: str = "did:plc:test123",
rkey: str = "test@1.0.0",
name: str = "TestSchema",
schema_body: str | dict = '{"type": "object"}',
) -> dict:
return {
"did": did,
Expand All @@ -49,7 +50,7 @@ def _make_schema_row(
"name": name,
"version": "1.0.0",
"schema_type": "jsonSchema",
"schema_body": '{"type": "object"}',
"schema_body": schema_body,
"description": "A test schema",
"metadata": None,
"created_at": "2025-01-01T00:00:00Z",
Expand Down Expand Up @@ -140,10 +141,12 @@ async def test_home_search(mock_search):

@pytest.mark.asyncio
@patch("atdata_app.frontend.routes.query_labels_for_dataset", new_callable=AsyncMock)
@patch("atdata_app.frontend.routes.query_get_schema", new_callable=AsyncMock)
@patch("atdata_app.frontend.routes.query_get_entry", new_callable=AsyncMock)
async def test_dataset_detail(mock_get, mock_labels):
async def test_dataset_detail(mock_get, mock_schema, mock_labels):
pool, _conn = _mock_pool()
mock_get.return_value = _make_entry_row()
mock_schema.return_value = _make_schema_row()
mock_labels.return_value = [_make_label_row()]
app = _make_app(pool)
transport = ASGITransport(app=app)
Expand Down Expand Up @@ -260,6 +263,96 @@ async def test_about(mock_counts):
assert "did:web:localhost%3A8000" in resp.text


# ---------------------------------------------------------------------------
# Schema detail — array format & ndarray annotations
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
@patch("atdata_app.frontend.routes.query_get_schema", new_callable=AsyncMock)
async def test_schema_detail_array_format(mock_get):
pool, _conn = _mock_pool()
mock_get.return_value = _make_schema_row(
schema_body={
"arrayFormat": "sparseBytes",
"dtype": "float32",
"shape": [100, 200],
"dimensionNames": ["samples", "features"],
},
)
app = _make_app(pool)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/schema/did:plc:test123/test@1.0.0")
assert resp.status_code == 200
assert "Sparse matrix" in resp.text
assert "float32" in resp.text
assert "100" in resp.text
assert "samples" in resp.text


@pytest.mark.asyncio
@patch("atdata_app.frontend.routes.query_get_schema", new_callable=AsyncMock)
async def test_schema_detail_no_array_format(mock_get):
"""Plain schemas should not show array format rows."""
pool, _conn = _mock_pool()
mock_get.return_value = _make_schema_row()
app = _make_app(pool)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/schema/did:plc:test123/test@1.0.0")
assert resp.status_code == 200
assert "Array Format" not in resp.text
assert "Data Type" not in resp.text


# ---------------------------------------------------------------------------
# Dataset detail — schema format info
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
@patch("atdata_app.frontend.routes.query_labels_for_dataset", new_callable=AsyncMock)
@patch("atdata_app.frontend.routes.query_get_schema", new_callable=AsyncMock)
@patch("atdata_app.frontend.routes.query_get_entry", new_callable=AsyncMock)
async def test_dataset_detail_with_schema_format(mock_entry, mock_schema, mock_labels):
pool, _conn = _mock_pool()
mock_entry.return_value = _make_entry_row()
mock_schema.return_value = _make_schema_row(
did="did:plc:test",
rkey="test@1.0.0",
schema_body={"arrayFormat": "numpyBytes", "dtype": "float64"},
)
mock_labels.return_value = []
app = _make_app(pool)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/dataset/did:plc:test123/3xyz")
assert resp.status_code == 200
assert "NumPy ndarray" in resp.text
assert "float64" in resp.text


# ---------------------------------------------------------------------------
# Schemas list — format column
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
@patch("atdata_app.frontend.routes.query_list_schemas", new_callable=AsyncMock)
async def test_schemas_list_shows_format(mock_list):
pool, _conn = _mock_pool()
mock_list.return_value = [
_make_schema_row(schema_body={"arrayFormat": "safetensors"}),
]
app = _make_app(pool)
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as client:
resp = await client.get("/schemas")
assert resp.status_code == 200
assert "Safetensors" in resp.text


# ---------------------------------------------------------------------------
# Static files
# ---------------------------------------------------------------------------
Expand Down
Loading