From 18614874d55875cdaa28f07e1adf5b351adc49b8 Mon Sep 17 00:00:00 2001 From: Zio Gabber <78922322+Gabrymi93@users.noreply.github.com> Date: Sun, 29 Mar 2026 10:32:32 +0100 Subject: [PATCH] add istat-sdmx-explorer skill --- README.md | 1 + evals/README.md | 1 + evals/istat-sdmx-explorer/checks.md | 26 +++ evals/istat-sdmx-explorer/prompts.csv | 9 ++ evals/istat-sdmx-explorer/rubric.schema.json | 22 +++ skills/istat-sdmx-explorer/SKILL.md | 161 +++++++++++++++++++ 6 files changed, 220 insertions(+) create mode 100644 evals/istat-sdmx-explorer/checks.md create mode 100644 evals/istat-sdmx-explorer/prompts.csv create mode 100644 evals/istat-sdmx-explorer/rubric.schema.json create mode 100644 skills/istat-sdmx-explorer/SKILL.md diff --git a/README.md b/README.md index f913029..a12f728 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ During installation you'll be asked: | Skill | Description | Category | Eval | |---|---|---|---| | [ipa](skills/ipa/) | Look up PEC addresses and contacts for Italian public administrations via the IPA registry | Italy / PA | — | +| [istat-sdmx-explorer](skills/istat-sdmx-explorer/) | Explore ISTAT SDMX dataflows step by step before querying or downloading data | Italy / Statistics | — | | [open-data-quality](skills/open-data-quality/) | Validate open data quality for CSV files and CKAN datasets; produces severity-ranked reports with a quality score | Open Data | — | | [openalex](skills/openalex/) | Query OpenAlex API for scholarly works, authors, and PDF retrieval | Research | [🟡 78/100](evals/openalex/) | diff --git a/evals/README.md b/evals/README.md index 6cb1467..90b857d 100644 --- a/evals/README.md +++ b/evals/README.md @@ -6,6 +6,7 @@ Evaluation results for each skill in the collection. | Skill | Prompts | Last run | Score | Status | |---|---|---|---|---| +| [istat-sdmx-explorer](istat-sdmx-explorer/) | 8 | — | — | — | | [openalex](openalex/) | 8 | 2026-02-14 | 78/100 | 🟡 | ## How to add evals for a skill diff --git a/evals/istat-sdmx-explorer/checks.md b/evals/istat-sdmx-explorer/checks.md new file mode 100644 index 0000000..620c3a8 --- /dev/null +++ b/evals/istat-sdmx-explorer/checks.md @@ -0,0 +1,26 @@ +# Deterministic Checks - istat-sdmx-explorer + +## Trigger checks + +- [ ] Skill activates on explicit invocations about ISTAT SDMX exploration +- [ ] Skill activates on implicit prompts about finding a dataflow, decoding codes, or inspecting ISTAT dimensions +- [ ] Skill does NOT activate on generic open data requests unrelated to ISTAT SDMX + +## Process checks + +- [ ] Starts from dataflow discovery before assuming a dataflow ID +- [ ] Inspects structure before suggesting filters +- [ ] Uses constraints as the main anti-error step before sampling data +- [ ] Decodes codelists when coded dimensions matter for the task +- [ ] Fetches only a small sample, not a full download + +## Output checks + +- [ ] Output is structured with practical sections such as Dataflow, Structure, Valid filters, Sample, and Next step +- [ ] The response explains what the user can do next after exploration + +## Pitfall checks + +- [ ] Does NOT invent SDMX codes +- [ ] Goes back to discovery if the chosen dataflow looks wrong +- [ ] Does NOT treat a sample as final analysis diff --git a/evals/istat-sdmx-explorer/prompts.csv b/evals/istat-sdmx-explorer/prompts.csv new file mode 100644 index 0000000..5a4dd7f --- /dev/null +++ b/evals/istat-sdmx-explorer/prompts.csv @@ -0,0 +1,9 @@ +id,should_trigger,prompt +test-01,true,"Use the $istat-sdmx-explorer skill to find an ISTAT dataflow about employment" +test-02,true,"I need to understand the dimensions of an ISTAT SDMX flow before querying it" +test-03,true,"Which values are valid for the territorial dimension in this ISTAT dataset?" +test-04,true,"Decode these ISTAT SDMX codes before I try to build filters" +test-05,true,"Show me a small sample of an ISTAT SDMX dataflow, but first help me understand the structure" +test-06,false,"Validate the quality of this CKAN dataset" +test-07,false,"Download all rows from this ISTAT dataset into CSV immediately" +test-08,false,"Find open government papers on OpenAlex" diff --git a/evals/istat-sdmx-explorer/rubric.schema.json b/evals/istat-sdmx-explorer/rubric.schema.json new file mode 100644 index 0000000..2f41390 --- /dev/null +++ b/evals/istat-sdmx-explorer/rubric.schema.json @@ -0,0 +1,22 @@ +{ + "type": "object", + "properties": { + "overall_pass": { "type": "boolean" }, + "score": { "type": "integer", "minimum": 0, "maximum": 100 }, + "checks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "pass": { "type": "boolean" }, + "notes": { "type": "string" } + }, + "required": ["id", "pass", "notes"], + "additionalProperties": false + } + } + }, + "required": ["overall_pass", "score", "checks"], + "additionalProperties": false +} diff --git a/skills/istat-sdmx-explorer/SKILL.md b/skills/istat-sdmx-explorer/SKILL.md new file mode 100644 index 0000000..5846d73 --- /dev/null +++ b/skills/istat-sdmx-explorer/SKILL.md @@ -0,0 +1,161 @@ +--- +name: istat-sdmx-explorer +description: Explore ISTAT SDMX datasets step by step before downloading data. Use when the user wants to find an ISTAT dataflow, understand its dimensions, see valid filter values, decode codelists, or fetch a small sample without already knowing SDMX codes. +license: CC BY-SA 4.0 (Creative Commons Attribution-ShareAlike 4.0 International) +--- + +# ISTAT SDMX Explorer + +Use this skill to explore ISTAT SDMX dataflows safely and efficiently before attempting full downloads or pipeline ingestion. + +It works with any SDMX-aware toolchain that lets you: + +- discover dataflows +- inspect structure +- inspect valid constraints +- decode codelists +- fetch a very small sample + +The goal is not to fetch a huge dataset immediately. The goal is to: + +1. identify the right dataflow +2. understand the structure +3. inspect valid values for the dimensions +4. decode the relevant codelists +5. fetch only a small sample to confirm the shape + +## Definition of done + +A task is complete when: + +- the correct ISTAT dataflow has been identified +- the main dimensions are known +- valid filter values are available for the dimensions that matter +- at least one important codelist has been decoded when needed +- a small sample has been fetched successfully +- the response explains what the user can do next + +## Preferred workflow + +### Step 1 - Discover the dataflow + +Start with a keyword search on dataflows. + +Use a dataflow listing tool first. Search with simple topic keywords in Italian when possible. + +Good examples: + +- `occupazione` +- `popolazione` +- `reddito` +- `salute` +- `mobilita` + +Do not guess the dataflow ID if you can discover it first. + +### Step 2 - Inspect the structure + +Once a candidate dataflow is selected: + +- inspect the structure +- list the dimensions +- identify which dimensions look territorial, temporal, categorical, or measure-like + +At this stage, explain the shape in plain language: + +- what one row or observation likely represents +- which dimensions are likely mandatory filters +- whether time is explicit + +### Step 3 - Get valid constraints + +Before trying to build a query, inspect the valid values for each dimension. + +This is the most important anti-error step. + +Look for: + +- territorial dimensions such as `REF_AREA` +- frequency dimensions such as `FREQ` +- time-related dimensions +- topic-specific dimensions + +If a dimension has many values, summarize the count first and only expand the relevant ones. + +### Step 4 - Decode codelists when needed + +If a dimension uses coded values, decode the linked codelist before suggesting filters. + +Prioritize: + +- territorial codelists +- frequency codelists +- topic codelists relevant to the user question + +When possible, provide both: + +- code +- human label + +If the codelist is hierarchical, mention parent-child relations when they matter. + +### Step 5 - Fetch only a small sample + +Only after the previous steps: + +- fetch a small sample +- confirm that the sample matches the expected structure + +Use a very small limit. +The sample is for validation, not for final analysis. + +### Step 6 - Explain the next move + +End with a short practical conclusion. For example: + +- ready to build a filtered request +- ready for a source-check +- ready for pipeline ingestion +- not ready because the dimensions are still ambiguous + +## Output style + +Keep the output structured and practical. + +Recommended sections: + +- `Dataflow` +- `Structure` +- `Valid filters` +- `Decoded codelists` +- `Sample` +- `Next step` + +## Rules + +- Do not start with a full data download +- Do not invent SDMX codes +- Do not assume territorial codes without decoding or constraints +- Prefer Italian labels when available +- If the sample endpoint is slow or incomplete, say so clearly +- If the dataflow looks wrong, go back to discovery instead of forcing the workflow + +## Typical user intents + +Use this skill when the user says things like: + +- "find an ISTAT dataset about employment" +- "what dimensions does this ISTAT flow have?" +- "which values are valid for this ISTAT dimension?" +- "decode these ISTAT codes" +- "show me a sample of this ISTAT dataflow" + +## Notes + +This skill is about exploration and validation. + +It does not replace: + +- a pipeline connector +- a bulk ingestion workflow +- a public analysis