From c830614e7d9373158a171591296908363388421e Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 4 Nov 2025 11:33:16 +0100 Subject: [PATCH 1/9] BugFix: controlled vocab values validation --- .../harvard/iq/dataverse/DatasetVersion.java | 14 +++-- .../DatasetVersionValidationTest.java | 55 +++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index cfd4c886bf3..52c5224f886 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1788,11 +1788,18 @@ private void removeEmptyValues() { private void removeEmptyValues(DatasetField dsf) { if (dsf.getDatasetFieldType().isPrimitive()) { // primitive + final boolean isControlledVocabulary = dsf.getDatasetFieldType().isControlledVocabulary(); final Iterator i = dsf.getDatasetFieldValues().iterator(); while (i.hasNext()) { - final String v = i.next().getValue(); + final DatasetFieldValue fieldValue = i.next(); + final String v = fieldValue.getValue(); if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { i.remove(); + continue; + } + if (isControlledVocabulary && dsf.getDatasetFieldType().getControlledVocabularyValue(v) == null) { + // Drop placeholders that never matched an allowed vocabulary value. + i.remove(); } } } else { @@ -1800,9 +1807,8 @@ private void removeEmptyValues(DatasetField dsf) { } } - public Set validate() { - Set returnSet = new HashSet<>(); - + public Set> validate() { + Set> returnSet = new HashSet<>(); for (DatasetField dsf : this.getFlatDatasetFields()) { dsf.setValidationMessage(null); // clear out any existing validation message diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java new file mode 100644 index 00000000000..74ccb44d9ad --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java @@ -0,0 +1,55 @@ +package edu.harvard.iq.dataverse; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertFalse; + +class DatasetVersionValidationTest { + + @Test + void requiredControlledVocabularyWithEmptyValueRemainsInvalid() { + DatasetFieldType subjectType = new DatasetFieldType(); + subjectType.setName("subject"); + subjectType.setAllowControlledVocabulary(true); + subjectType.setRequired(true); + subjectType.setAllowMultiples(false); + subjectType.setFieldType(DatasetFieldType.FieldType.TEXT); + subjectType.setControlledVocabularyValues(new ArrayList<>()); + subjectType.setChildDatasetFieldTypes(new ArrayList<>()); + + MetadataBlock citation = new MetadataBlock(); + citation.setName("citation"); + citation.setDisplayName("Citation"); + citation.setDatasetFieldTypes(List.of(subjectType)); + subjectType.setMetadataBlock(citation); + + Dataverse dataverse = new Dataverse(); + dataverse.setMetadataBlockRoot(true); + dataverse.setMetadataBlocks(List.of(citation)); + + Dataset dataset = new Dataset(); + dataset.setOwner(dataverse); + + DatasetVersion version = new DatasetVersion(); + version.setVersionState(DatasetVersion.VersionState.DRAFT); + version.setDataset(dataset); + dataset.getVersions().add(version); + + DatasetField subjectField = new DatasetField(); + subjectField.setDatasetFieldType(subjectType); + subjectField.setDatasetFieldValues(new ArrayList<>()); + subjectField.setControlledVocabularyValues(new ArrayList<>()); + + DatasetFieldValue orphanValue = new DatasetFieldValue(subjectField); + orphanValue.setValue("__placeholder__"); + subjectField.getDatasetFieldValues().add(orphanValue); + + version.setDatasetFields(new ArrayList<>(List.of(subjectField))); + + assertFalse(version.isValid(), + "Controlled vocabulary fields without a selected term should keep dataset version invalid"); + } +} From 6d4e043ce90e57bb2a1c3c97119e323c0ef0e8c0 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 4 Nov 2025 13:14:21 +0100 Subject: [PATCH 2/9] fixed build issue --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 52c5224f886..4d2d9e00472 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1807,8 +1807,8 @@ private void removeEmptyValues(DatasetField dsf) { } } - public Set> validate() { - Set> returnSet = new HashSet<>(); + public Set validate() { + Set returnSet = new HashSet<>(); for (DatasetField dsf : this.getFlatDatasetFields()) { dsf.setValidationMessage(null); // clear out any existing validation message From 2a939ff1216901087e94664b93b43057e8565742 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Tue, 4 Nov 2025 13:49:06 +0100 Subject: [PATCH 3/9] Improved the detection of incomplete metadata when saving a dataset in UI (JSF) --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b41e8d4ac35..19adcbc35ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3981,8 +3981,7 @@ public String save() { dataset.setOwner(ownerId != null ? dataverseService.find(ownerId) : null); } // Validate - Set constraintViolations = workingVersion.validate(); - if (!constraintViolations.isEmpty()) { + if (!workingVersion.isValid()) { FacesContext.getCurrentInstance().validationFailed(); return ""; } From 1159bfd89e02a6b082929f1650f3d20352e6cc90 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 28 Nov 2025 09:27:45 +0100 Subject: [PATCH 4/9] revert removing non-existing vocab values --- .../java/edu/harvard/iq/dataverse/DatasetVersion.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4d2d9e00472..cfd4c886bf3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1788,18 +1788,11 @@ private void removeEmptyValues() { private void removeEmptyValues(DatasetField dsf) { if (dsf.getDatasetFieldType().isPrimitive()) { // primitive - final boolean isControlledVocabulary = dsf.getDatasetFieldType().isControlledVocabulary(); final Iterator i = dsf.getDatasetFieldValues().iterator(); while (i.hasNext()) { - final DatasetFieldValue fieldValue = i.next(); - final String v = fieldValue.getValue(); + final String v = i.next().getValue(); if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { i.remove(); - continue; - } - if (isControlledVocabulary && dsf.getDatasetFieldType().getControlledVocabularyValue(v) == null) { - // Drop placeholders that never matched an allowed vocabulary value. - i.remove(); } } } else { @@ -1810,6 +1803,7 @@ private void removeEmptyValues(DatasetField dsf) { public Set validate() { Set returnSet = new HashSet<>(); + for (DatasetField dsf : this.getFlatDatasetFields()) { dsf.setValidationMessage(null); // clear out any existing validation message Set> constraintViolations = validator.validate(dsf); From fd10dd120c4fe177e4496f21b2287239f3fbd044 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 28 Nov 2025 09:43:02 +0100 Subject: [PATCH 5/9] removed the unit test --- .../DatasetVersionValidationTest.java | 55 ------------------- 1 file changed, 55 deletions(-) delete mode 100644 src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java deleted file mode 100644 index 74ccb44d9ad..00000000000 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetVersionValidationTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package edu.harvard.iq.dataverse; - -import org.junit.jupiter.api.Test; - -import java.util.ArrayList; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertFalse; - -class DatasetVersionValidationTest { - - @Test - void requiredControlledVocabularyWithEmptyValueRemainsInvalid() { - DatasetFieldType subjectType = new DatasetFieldType(); - subjectType.setName("subject"); - subjectType.setAllowControlledVocabulary(true); - subjectType.setRequired(true); - subjectType.setAllowMultiples(false); - subjectType.setFieldType(DatasetFieldType.FieldType.TEXT); - subjectType.setControlledVocabularyValues(new ArrayList<>()); - subjectType.setChildDatasetFieldTypes(new ArrayList<>()); - - MetadataBlock citation = new MetadataBlock(); - citation.setName("citation"); - citation.setDisplayName("Citation"); - citation.setDatasetFieldTypes(List.of(subjectType)); - subjectType.setMetadataBlock(citation); - - Dataverse dataverse = new Dataverse(); - dataverse.setMetadataBlockRoot(true); - dataverse.setMetadataBlocks(List.of(citation)); - - Dataset dataset = new Dataset(); - dataset.setOwner(dataverse); - - DatasetVersion version = new DatasetVersion(); - version.setVersionState(DatasetVersion.VersionState.DRAFT); - version.setDataset(dataset); - dataset.getVersions().add(version); - - DatasetField subjectField = new DatasetField(); - subjectField.setDatasetFieldType(subjectType); - subjectField.setDatasetFieldValues(new ArrayList<>()); - subjectField.setControlledVocabularyValues(new ArrayList<>()); - - DatasetFieldValue orphanValue = new DatasetFieldValue(subjectField); - orphanValue.setValue("__placeholder__"); - subjectField.getDatasetFieldValues().add(orphanValue); - - version.setDatasetFields(new ArrayList<>(List.of(subjectField))); - - assertFalse(version.isValid(), - "Controlled vocabulary fields without a selected term should keep dataset version invalid"); - } -} From c548e477a890161b23de31d3acd061055417c9e6 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 28 Nov 2025 09:52:05 +0100 Subject: [PATCH 6/9] added validation call for working version in DatasetPage --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index c9a60cff77d..0563d7d64cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -4005,6 +4005,7 @@ public String save() { dataset.setOwner(ownerId != null ? dataverseService.find(ownerId) : null); } // Validate + workingVersion.validate(); // and validation messages to dataset fields if (!workingVersion.isValid()) { FacesContext.getCurrentInstance().validationFailed(); return ""; From f51c670c0e077544cbda7e4805e7b5c7fe933569 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 28 Nov 2025 09:56:37 +0100 Subject: [PATCH 7/9] typo fix --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0563d7d64cd..5168c03a675 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -4005,7 +4005,7 @@ public String save() { dataset.setOwner(ownerId != null ? dataverseService.find(ownerId) : null); } // Validate - workingVersion.validate(); // and validation messages to dataset fields + workingVersion.validate(); // add validation messages to dataset fields if (!workingVersion.isValid()) { FacesContext.getCurrentInstance().validationFailed(); return ""; From 09c55cea18e48ef1836f81b9121e4909d7360c4a Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Sat, 17 Jan 2026 00:52:16 +0100 Subject: [PATCH 8/9] Improve CVOC value validation by preventing N/A population in controlled vocabulary fields --- .../engine/command/impl/AbstractDatasetCommand.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 15faaad5d52..55ec6c555fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -110,10 +110,14 @@ protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws Command Set constraintViolations = dsv.validate(); if (!constraintViolations.isEmpty()) { if (lenient) { - // populate invalid fields with N/A + // populate invalid primitive fields with N/A + // Note: controlled vocabulary fields should NOT get N/A values in datasetfieldvalue, + // as this creates an inconsistent state where the CV field appears valid but is empty. + // See https://github.com/IQSS/dataverse/issues/11900 constraintViolations.stream() .filter(cv -> cv.getRootBean() instanceof DatasetField) .map(cv -> ((DatasetField) cv.getRootBean())) + .filter(f -> !f.getDatasetFieldType().isControlledVocabulary()) .forEach(f -> f.setSingleValue(DatasetField.NA_VALUE)); } else { From 08bfa304f40408a29a480914d64940e8e1972f1a Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Sat, 17 Jan 2026 00:52:40 +0100 Subject: [PATCH 9/9] Enhance CVOC value validation to ensure actual controlled vocabulary values are selected, preventing invalid N/A placeholders. --- .../iq/dataverse/DatasetFieldValidator.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java index 6d3fda2812d..595786ab758 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java @@ -59,9 +59,25 @@ public boolean isValid(DatasetField value, ConstraintValidatorContext context) { } // if value is not primitive or not empty - if (!dsfType.isPrimitive() || !StringUtils.isBlank(value.getValue())) { + // For controlled vocabulary fields, check that actual CV values are selected, + // not just that datasetFieldValues contains something (which might be an invalid N/A placeholder) + // See https://github.com/IQSS/dataverse/issues/11900 + if (!dsfType.isPrimitive()) { return true; } + + if (dsfType.isControlledVocabulary()) { + // For CV fields, check if there are actual controlled vocabulary values selected + if (value.getControlledVocabularyValues() != null && !value.getControlledVocabularyValues().isEmpty()) { + return true; + } + // If no CV values, fall through to required field check below + } else { + // For non-CV primitive fields, check if value is not blank + if (!StringUtils.isBlank(value.getValue())) { + return true; + } + } if (value.isRequired()) { String errorMessage = null;