diff --git a/doc/release-notes/12014-croissant-1.1.md b/doc/release-notes/12014-croissant-1.1.md new file mode 100644 index 00000000000..44c796c593d --- /dev/null +++ b/doc/release-notes/12014-croissant-1.1.md @@ -0,0 +1,25 @@ +### Croissant 1.1 (Summary Statistics) + +The Croissant metadata export format has been updated from version 1.0 to 1.1. + +Summary statistics (mean, min, max, etc.) are now included for tabular files that were successfully ingested. + +You can download an example Croissant file from the [Supported Metadata Export Formats](https://dataverse-guide--12214.org.readthedocs.build/en/12214/user/dataset-management.html#supported-metadata-export-formats) section of the guides. + +Minor backward-incompatible changes were made, which are noted below. + +See #12014 and #12214 + +## Backward Incompatible Changes + +Generally speaking, see the [API Changelog](https://guides.dataverse.org/en/latest/api/changelog.html) for a list of backward-incompatible API changes. + +Minor changes in the `croissant` format are noted in the [API changelog](https://dataverse-guide--12214.org.readthedocs.build/en/12214/api/changelog.html). + +## Upgrade Instructions + +1. Re-export metadata export formats + + We re-export because the Croissant format was updated. + + `curl http://localhost:8080/api/admin/metadata/reExportAll` diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 1b49a0982f4..8c55cc33883 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,6 +7,11 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 +v6.11 +----- + +- The Croissant :ref:`metadata export format ` has been updated from version 1.0 to 1.1, which is reflected in the ``conformsTo`` property. ``@vocab`` and ``sc`` properties now use "http" as `recommended `_. The unused ``wd`` property has been dropped. + v6.9 ---- diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 8836961bae1..9c389ef4be3 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -28,7 +28,7 @@ Supported Metadata Export Formats Once a dataset has been published, its metadata can be exported in a variety of other metadata standards and formats, which help make datasets more :doc:`discoverable ` and usable in other systems, such as other data repositories. On each dataset page's metadata tab, the following exports are available: -- Croissant +- Croissant (example: :download:`max-croissant.json <../../../../src/test/resources/croissant/max/expected/max-croissant.json>`) - Dublin Core - DDI (Data Documentation Initiative Codebook 2.5) - DDI HTML Codebook (A more human-readable, HTML version of the DDI Codebook 2.5 metadata export) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/croissant/CroissantExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/croissant/CroissantExportUtil.java index 9ff2bba121d..e9ba39eeba5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/croissant/CroissantExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/croissant/CroissantExportUtil.java @@ -31,7 +31,7 @@ public static void exportDataset( { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -46,6 +46,7 @@ public static void exportDataset( "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -69,12 +70,11 @@ public static void exportDataset( "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" } } """; @@ -84,7 +84,7 @@ public static void exportDataset( } job.add("@type", "sc:Dataset"); - job.add("conformsTo", "http://mlcommons.org/croissant/1.0"); + job.add("conformsTo", "http://mlcommons.org/croissant/1.1"); JsonObject datasetJson = dataProvider.getDatasetJson(); @@ -261,6 +261,13 @@ public static void exportDataset( int varQuantity = dataTableObject.getInt("varQuantity"); // Unused int caseQuantity = dataTableObject.getInt("caseQuantity"); + recordSetContent.add( + "cr:annotation", + Json.createObjectBuilder() + .add("@type", "cr:Field") + .add("name", fileId.toString() + "/count") + .add("value", caseQuantity) + .add("dataType", "http://www.wikidata.org/entity/Q4049983")); JsonArray dataVariables = dataTableObject.getJsonArray("dataVariables"); JsonArrayBuilder fieldSetArray = Json.createArrayBuilder(); for (JsonValue dataVariableValue : dataVariables) { @@ -278,6 +285,8 @@ public static void exportDataset( dataVariableObject.getString("variableFormatType"); String variableIntervalType = dataVariableObject.getString("variableIntervalType"); + JsonObject variableSummaryStatistics = + dataVariableObject.getJsonObject("summaryStatistics"); String dataType = null; /** * There are only two variableFormatType types on the Dataverse side: @@ -293,7 +302,129 @@ public static void exportDataset( default: break; } - fieldSetArray.add( + JsonArrayBuilder annotationsBuilder = Json.createArrayBuilder(); + if (variableSummaryStatistics != null) { + // Same order as upstream: MEAN, MEDN, MODE, MIN, MAX, STDEV, VALD, INVD + annotationsBuilder + .add( + Json.createObjectBuilder() + // We're aware that an @id of + // "data/stata13-auto.dta/price/mean" + // looks nice but won't validate if there's + // whitespace in the filename. + // We've asked for guidance here: + // https://github.com/mlcommons/croissant/issues/639#issuecomment-3792179493 + .add( + "@id", + fileId.toString() + + "/" + + variableName + // The spec gives "mean" as an + // example but we'll use + // ArithmeticMean from + // https://rdf-vocabulary.ddialliance.org/ddi-cv/SummaryStatisticType/2.1.2/SummaryStatisticType.html + + "/ArithmeticMean") + .add( + "value", + variableSummaryStatistics.getString( + "mean")) + .add("dataType", "ddi-stats:7975ed0")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/Median") + .add( + "value", + variableSummaryStatistics.getString( + "medn")) + .add("dataType", "ddi-stats:66851a3") + .add("equivalentProperty", "sc:median")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/Mode") + .add( + "value", + variableSummaryStatistics.getString( + "mode")) + .add("dataType", "ddi-stats:650be61")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/Minimum") + .add( + "value", + variableSummaryStatistics.getString( + "min")) + .add("dataType", "ddi-stats:a1d0ec6") + .add("equivalentProperty", "sc:minValue")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/Maximum") + .add( + "value", + variableSummaryStatistics.getString( + "max")) + .add("dataType", "ddi-stats:8321e79") + .add("equivalentProperty", "sc:maxValue")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/StandardDeviation") + .add( + "value", + variableSummaryStatistics.getString( + "stdev")) + .add("dataType", "ddi-stats:690ab50")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/ValidCases") + .add( + "value", + variableSummaryStatistics.getString( + "vald")) + .add("dataType", "ddi-stats:c646dd8")) + .add( + Json.createObjectBuilder() + .add( + "@id", + fileId.toString() + + "/" + + variableName + + "/InvalidCases") + .add( + "value", + variableSummaryStatistics.getString( + "invd")) + .add("dataType", "ddi-stats:6459c62")); + } + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder() .add("@type", "cr:Field") .add("name", variableName) @@ -312,7 +443,12 @@ public static void exportDataset( Json.createObjectBuilder() .add( "column", - variableName)))); + variableName))); + JsonArray annotations = annotationsBuilder.build(); + if (!annotations.isEmpty()) { + fieldBuilder.add("annotation", annotations); + } + fieldSetArray.add(fieldBuilder); } recordSetContent.add("field", fieldSetArray); recordSet.add(recordSetContent); diff --git a/src/test/resources/croissant/cars/expected/cars-croissant.json b/src/test/resources/croissant/cars/expected/cars-croissant.json index a9c0d48b217..a4f14cdde93 100644 --- a/src/test/resources/croissant/cars/expected/cars-croissant.json +++ b/src/test/resources/croissant/cars/expected/cars-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Cars", "url": "https://doi.org/10.5072/FK2/CY7BWA", "creator": [ @@ -115,6 +115,12 @@ "recordSet": [ { "@type": "cr:RecordSet", + "cr:annotation": { + "@type": "cr:Field", + "name": "data/stata13-auto.dta/count", + "value": 74, + "dataType": "http://www.wikidata.org/entity/Q4049983" + }, "field": [ { "@type": "cr:Field", @@ -144,7 +150,52 @@ "extract": { "column": "price" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/price/ArithmeticMean", + "value": "6165.256756756757", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/price/Median", + "value": "5006.5", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/price/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/price/Minimum", + "value": "3291.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/price/Maximum", + "value": "15906.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/price/StandardDeviation", + "value": "2949.4958847689186", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/price/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/price/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -159,7 +210,52 @@ "extract": { "column": "mpg" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/mpg/ArithmeticMean", + "value": "21.2972972972973", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/mpg/Median", + "value": "20.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/mpg/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/mpg/Minimum", + "value": "12.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/mpg/Maximum", + "value": "41.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/mpg/StandardDeviation", + "value": "5.785503209735141", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/mpg/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/mpg/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -174,7 +270,52 @@ "extract": { "column": "rep78" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/rep78/ArithmeticMean", + "value": "3.4057971014492754", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/rep78/Median", + "value": "3.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/rep78/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/rep78/Minimum", + "value": "1.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/rep78/Maximum", + "value": "5.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/rep78/StandardDeviation", + "value": "0.989932270109041", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/rep78/ValidCases", + "value": "69.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/rep78/InvalidCases", + "value": "5.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -189,7 +330,52 @@ "extract": { "column": "headroom" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/headroom/ArithmeticMean", + "value": "2.993243243243243", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/headroom/Median", + "value": "3.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/headroom/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/headroom/Minimum", + "value": "1.5", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/headroom/Maximum", + "value": "5.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/headroom/StandardDeviation", + "value": "0.845994766828771", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/headroom/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/headroom/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -204,7 +390,52 @@ "extract": { "column": "trunk" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/trunk/ArithmeticMean", + "value": "13.756756756756756", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/trunk/Median", + "value": "14.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/trunk/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/trunk/Minimum", + "value": "5.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/trunk/Maximum", + "value": "23.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/trunk/StandardDeviation", + "value": "4.277404189173201", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/trunk/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/trunk/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -219,7 +450,52 @@ "extract": { "column": "weight" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/weight/ArithmeticMean", + "value": "3019.459459459459", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/weight/Median", + "value": "3190.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/weight/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/weight/Minimum", + "value": "1760.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/weight/Maximum", + "value": "4840.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/weight/StandardDeviation", + "value": "777.1935671373664", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/weight/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/weight/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -234,7 +510,52 @@ "extract": { "column": "length" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/length/ArithmeticMean", + "value": "187.93243243243245", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/length/Median", + "value": "192.5", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/length/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/length/Minimum", + "value": "142.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/length/Maximum", + "value": "233.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/length/StandardDeviation", + "value": "22.266339902021585", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/length/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/length/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -249,7 +570,52 @@ "extract": { "column": "turn" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/turn/ArithmeticMean", + "value": "39.648648648648646", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/turn/Median", + "value": "40.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/turn/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/turn/Minimum", + "value": "31.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/turn/Maximum", + "value": "51.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/turn/StandardDeviation", + "value": "4.399353727233908", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/turn/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/turn/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -264,7 +630,52 @@ "extract": { "column": "displacement" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/displacement/ArithmeticMean", + "value": "197.2972972972973", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/displacement/Median", + "value": "196.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/displacement/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/displacement/Minimum", + "value": "79.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/displacement/Maximum", + "value": "425.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/displacement/StandardDeviation", + "value": "91.83721896440396", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/displacement/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/displacement/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -279,7 +690,52 @@ "extract": { "column": "gear_ratio" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/gear_ratio/ArithmeticMean", + "value": "3.0148648667979883", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/Median", + "value": "2.9550000429153442", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/Minimum", + "value": "2.190000057220459", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/Maximum", + "value": "3.890000104904175", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/StandardDeviation", + "value": "0.45628709670763035", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/gear_ratio/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -294,7 +750,52 @@ "extract": { "column": "foreign" } - } + }, + "annotation": [ + { + "@id": "data/stata13-auto.dta/foreign/ArithmeticMean", + "value": "0.2972972972972975", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data/stata13-auto.dta/foreign/Median", + "value": "0.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data/stata13-auto.dta/foreign/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data/stata13-auto.dta/foreign/Minimum", + "value": "0.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data/stata13-auto.dta/foreign/Maximum", + "value": "1.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data/stata13-auto.dta/foreign/StandardDeviation", + "value": "0.46018845840901884", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data/stata13-auto.dta/foreign/ValidCases", + "value": "74.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data/stata13-auto.dta/foreign/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] } ] } diff --git a/src/test/resources/croissant/cars/expected/cars-croissantSlim.json b/src/test/resources/croissant/cars/expected/cars-croissantSlim.json index 392ddd3a5dd..258f39862c0 100644 --- a/src/test/resources/croissant/cars/expected/cars-croissantSlim.json +++ b/src/test/resources/croissant/cars/expected/cars-croissantSlim.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Cars", "url": "https://doi.org/10.5072/FK2/CY7BWA", "creator": [ diff --git a/src/test/resources/croissant/draft/expected/draft-croissant.json b/src/test/resources/croissant/draft/expected/draft-croissant.json index b2065f79195..0f1a23b65c1 100644 --- a/src/test/resources/croissant/draft/expected/draft-croissant.json +++ b/src/test/resources/croissant/draft/expected/draft-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Draft Dataset", "url": "https://doi.org/10.5072/FK2/OO7TEP", "creator": [ diff --git a/src/test/resources/croissant/draft/expected/draft-croissantSlim.json b/src/test/resources/croissant/draft/expected/draft-croissantSlim.json index 30eabda4c9c..e59549906be 100644 --- a/src/test/resources/croissant/draft/expected/draft-croissantSlim.json +++ b/src/test/resources/croissant/draft/expected/draft-croissantSlim.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Draft Dataset", "url": "https://doi.org/10.5072/FK2/OO7TEP", "creator": [ diff --git a/src/test/resources/croissant/junk/expected/junk-croissant.json b/src/test/resources/croissant/junk/expected/junk-croissant.json index b02bed5694e..d2da01ddd33 100644 --- a/src/test/resources/croissant/junk/expected/junk-croissant.json +++ b/src/test/resources/croissant/junk/expected/junk-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "</script><script>alert(666)</script>", "url": "https://doi.org/10.5072/FK2/0CNXUJ", "creator": [ diff --git a/src/test/resources/croissant/max/expected/max-croissant.json b/src/test/resources/croissant/max/expected/max-croissant.json index bf1941c7289..46535f95ccb 100644 --- a/src/test/resources/croissant/max/expected/max-croissant.json +++ b/src/test/resources/croissant/max/expected/max-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Max Schema.org", "url": "https://doi.org/10.5072/FK2/VQTYHD", "creator": [ @@ -144,6 +144,12 @@ "recordSet": [ { "@type": "cr:RecordSet", + "cr:annotation": { + "@type": "cr:Field", + "name": "data.tsv/count", + "value": 3, + "dataType": "http://www.wikidata.org/entity/Q4049983" + }, "field": [ { "@type": "cr:Field", @@ -173,7 +179,52 @@ "extract": { "column": "bar" } - } + }, + "annotation": [ + { + "@id": "data.tsv/bar/ArithmeticMean", + "value": "2.0", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data.tsv/bar/Median", + "value": "2.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data.tsv/bar/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data.tsv/bar/Minimum", + "value": "1.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data.tsv/bar/Maximum", + "value": "3.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data.tsv/bar/StandardDeviation", + "value": "1.0", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data.tsv/bar/ValidCases", + "value": "3.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data.tsv/bar/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] }, { "@type": "cr:Field", @@ -188,7 +239,52 @@ "extract": { "column": "baz" } - } + }, + "annotation": [ + { + "@id": "data.tsv/baz/ArithmeticMean", + "value": "20.0", + "dataType": "ddi-stats:7975ed0" + }, + { + "@id": "data.tsv/baz/Median", + "value": "20.0", + "dataType": "ddi-stats:66851a3", + "equivalentProperty": "sc:median" + }, + { + "@id": "data.tsv/baz/Mode", + "value": ".", + "dataType": "ddi-stats:650be61" + }, + { + "@id": "data.tsv/baz/Minimum", + "value": "10.0", + "dataType": "ddi-stats:a1d0ec6", + "equivalentProperty": "sc:minValue" + }, + { + "@id": "data.tsv/baz/Maximum", + "value": "30.0", + "dataType": "ddi-stats:8321e79", + "equivalentProperty": "sc:maxValue" + }, + { + "@id": "data.tsv/baz/StandardDeviation", + "value": "10.0", + "dataType": "ddi-stats:690ab50" + }, + { + "@id": "data.tsv/baz/ValidCases", + "value": "3.0", + "dataType": "ddi-stats:c646dd8" + }, + { + "@id": "data.tsv/baz/InvalidCases", + "value": "0.0", + "dataType": "ddi-stats:6459c62" + } + ] } ] } diff --git a/src/test/resources/croissant/max/expected/max-croissantSlim.json b/src/test/resources/croissant/max/expected/max-croissantSlim.json index fa3d632838e..58bca2ea064 100644 --- a/src/test/resources/croissant/max/expected/max-croissantSlim.json +++ b/src/test/resources/croissant/max/expected/max-croissantSlim.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Max Schema.org", "url": "https://doi.org/10.5072/FK2/VQTYHD", "creator": [ diff --git a/src/test/resources/croissant/minimal/expected/minimal-croissant.json b/src/test/resources/croissant/minimal/expected/minimal-croissant.json index 7c47afc1485..4a50dde63b1 100644 --- a/src/test/resources/croissant/minimal/expected/minimal-croissant.json +++ b/src/test/resources/croissant/minimal/expected/minimal-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Minimal", "url": "https://doi.org/10.5072/FK2/4C0JYC", "creator": [ diff --git a/src/test/resources/croissant/restricted/expected/restricted-croissant.json b/src/test/resources/croissant/restricted/expected/restricted-croissant.json index 19d970d1bbb..d658a0982f4 100644 --- a/src/test/resources/croissant/restricted/expected/restricted-croissant.json +++ b/src/test/resources/croissant/restricted/expected/restricted-croissant.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Cars", "url": "https://doi.org/10.5072/FK2/CY7BWA", "creator": [ diff --git a/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json b/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json index 392ddd3a5dd..258f39862c0 100644 --- a/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json +++ b/src/test/resources/croissant/restricted/expected/restricted-croissantSlim.json @@ -1,7 +1,7 @@ { "@context": { "@language": "en", - "@vocab": "https://schema.org/", + "@vocab": "http://schema.org/", "citeAs": "cr:citeAs", "column": "cr:column", "conformsTo": "dct:conformsTo", @@ -16,6 +16,7 @@ "@type": "@vocab" }, "dct": "http://purl.org/dc/terms/", + "ddi-stats": "http://rdf-vocabulary.ddialliance.org/cv/SummaryStatisticType/2.1.2/", "examples": { "@id": "cr:examples", "@type": "@json" @@ -39,15 +40,14 @@ "repeated": "cr:repeated", "replace": "cr:replace", "samplingRate": "cr:samplingRate", - "sc": "https://schema.org/", + "sc": "http://schema.org/", "separator": "cr:separator", "source": "cr:source", "subField": "cr:subField", - "transform": "cr:transform", - "wd": "https://www.wikidata.org/wiki/" + "transform": "cr:transform" }, "@type": "sc:Dataset", - "conformsTo": "http://mlcommons.org/croissant/1.0", + "conformsTo": "http://mlcommons.org/croissant/1.1", "name": "Cars", "url": "https://doi.org/10.5072/FK2/CY7BWA", "creator": [