From 584c89662815d348cab42599392317bedb4e0b14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herminio=20Garc=C3=ADa=20Gonz=C3=A1lez?= Date: Thu, 18 Sep 2025 11:40:47 +0200 Subject: [PATCH 1/3] Fixed a bug preventing the correct processing of CSV files through the stdin. Fixes #235. --- .../shexml/helper/SourceHelper.scala | 2 +- .../herminiogarcia/shexml/FilmsStdin.scala | 2 +- ...eratorExpressionWithCSVFromStdinTest.scala | 122 ++++++++++++++++++ 3 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 src/test/scala/com/herminiogarcia/shexml/MultipleElementIteratorExpressionWithCSVFromStdinTest.scala diff --git a/src/main/scala/com/herminiogarcia/shexml/helper/SourceHelper.scala b/src/main/scala/com/herminiogarcia/shexml/helper/SourceHelper.scala index 39ac25d..6bea6de 100644 --- a/src/main/scala/com/herminiogarcia/shexml/helper/SourceHelper.scala +++ b/src/main/scala/com/herminiogarcia/shexml/helper/SourceHelper.scala @@ -33,7 +33,7 @@ class SourceHelper { } def getStdinContents(): LoadedSource = { - LoadedSource(scala.io.Source.stdin.getLines().mkString, "-") + LoadedSource(scala.io.Source.stdin.mkString, "-") } } diff --git a/src/test/scala/com/herminiogarcia/shexml/FilmsStdin.scala b/src/test/scala/com/herminiogarcia/shexml/FilmsStdin.scala index 8cd12ad..8a49325 100644 --- a/src/test/scala/com/herminiogarcia/shexml/FilmsStdin.scala +++ b/src/test/scala/com/herminiogarcia/shexml/FilmsStdin.scala @@ -10,7 +10,7 @@ import java.io.ByteArrayInputStream class FilmsStdin extends AnyFunSuite with Matchers with RDFStatementCreator - with BeforeAndAfter with ParallelConfigInferenceDatatypesNormaliseURIsFixture{ + with BeforeAndAfter with ParallelConfigInferenceDatatypesNormaliseURIsFixture { private val example = """ diff --git a/src/test/scala/com/herminiogarcia/shexml/MultipleElementIteratorExpressionWithCSVFromStdinTest.scala b/src/test/scala/com/herminiogarcia/shexml/MultipleElementIteratorExpressionWithCSVFromStdinTest.scala new file mode 100644 index 0000000..5b777c1 --- /dev/null +++ b/src/test/scala/com/herminiogarcia/shexml/MultipleElementIteratorExpressionWithCSVFromStdinTest.scala @@ -0,0 +1,122 @@ +package com.herminiogarcia.shexml + +import com.herminiogarcia.shexml.helper.SourceHelper +import org.apache.jena.datatypes.xsd.XSDDatatype +import org.apache.jena.rdf.model.Model +import org.scalatest.ConfigMap +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.must.Matchers + +import java.io.ByteArrayInputStream + +class MultipleElementIteratorExpressionWithCSVFromStdinTest extends AnyFunSuite + with Matchers with RDFStatementCreator + with ParallelConfigInferenceDatatypesNormaliseURIsFixture { + + private val example = + """ + |PREFIX : + |PREFIX xs: + |SOURCE films_xml_file + |SOURCE films_json_file + |SOURCE films_csv_file + |ITERATOR film_xml { + | FIELD id <@id> + | FIELD name + | FIELD year + | FIELD country + | FIELD directors + |} + |ITERATOR film_json { + | FIELD id + | FIELD name + | FIELD year + | FIELD country + | FIELD directors + |} + |ITERATOR film_csv { + | FIELD id + | FIELD name + | FIELD year + | FIELD country + | FIELD directors + |} + |EXPRESSION films + | + |:Films :[films.id] { + | :type :Film ; + | :name [films.name] @en ; + | :year [films.year] xs:gYear ; + | :country [films.country] ; + | :director [films.directors] ; + |} + """.stripMargin + + private var output: Model = _ + private val prefix = "http://example.com/" + + override def beforeAll(configMap: ConfigMap): Unit = { + super.beforeAll(configMap) + val stream = new ByteArrayInputStream(new SourceHelper().getURLContent("https://rawgit.com/herminiogg/ShExML/enhancement-%239/src/test/resources/films.csv").fileContent.getBytes()) + System.setIn(stream) + stream.close() + output = mappingLauncher.launchMapping(example).getDefaultModel + System.setIn(System.in) + } + + test("Shape 1 is translated correctly") { + assert(output.contains(createStatement(prefix, "1", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "1", "name", "Dunkirk", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "1", "year", "2017", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "1", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "1", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + } + + test("Shape 2 is translated correctly") { + assert(output.contains(createStatement(prefix, "2", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "2", "name", "Interstellar", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "2", "year", "2014", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "2", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "2", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "2", "director", "Jonathan Nolan", XSDDatatype.XSDstring))) + } + + test("Shape 3 is translated correctly") { + assert(output.contains(createStatement(prefix, "3", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "3", "name", "Inception", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "3", "year", "2010", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "3", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "3", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + } + + test("Shape 4 is translated correctly") { + assert(output.contains(createStatement(prefix, "4", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "4", "name", "The Prestige", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "4", "year", "2006", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "4", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "4", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "4", "director", "Jonathan Nolan", XSDDatatype.XSDstring))) + } + + test("Shape 5 is translated correctly") { + assert(output.contains(createStatement(prefix, "4", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "5", "name", "Memento", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "5", "year", "2000", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "5", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "5", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + } + + test("Shape 6 is translated correctly") { + assert(output.contains(createStatement(prefix, "4", "type", "Film"))) + assert(output.contains(createStatementWithLiteral(prefix, "6", "name", "Insomnia", "en"))) + assert(output.contains(createStatementWithLiteral(prefix, "6", "year", "2002", XSDDatatype.XSDgYear))) + assert(output.contains(createStatementWithLiteral(prefix, "6", "country", "USA", XSDDatatype.XSDstring))) + assert(output.contains(createStatementWithLiteral(prefix, "6", "director", "Christopher Nolan", XSDDatatype.XSDstring))) + } + + test("No additional triples are generated") { + val triplesCount = 32 + assert(output.size() == triplesCount) + } + +} From bb2516d2e0669d310bc070ba20e79b23602104d2 Mon Sep 17 00:00:00 2001 From: herminiogg <2806005+herminiogg@users.noreply.github.com> Date: Thu, 18 Sep 2025 09:41:21 +0000 Subject: [PATCH 2/3] Pulled latest version of the ShExML CodeMeta file --- codemeta.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/codemeta.json b/codemeta.json index 6db2cf3..8f405ce 100644 --- a/codemeta.json +++ b/codemeta.json @@ -135,18 +135,18 @@ "identifier" : "https://orcid.org/0000-0003-0765-7390" }, "dateCreated" : "2018-02-22", - "dateModified" : "2025-07-08", + "dateModified" : "2025-09-10", "description" : "A heterogeneous data mapping language based on Shape Expressions", "developmentStatus" : "active", "downloadUrl" : "https://api.github.com/repos/herminiogg/ShExML/downloads", - "identifier" : "https://doi.org/10.5281/zenodo.15837379", + "identifier" : "https://doi.org/10.5281/zenodo.17092549", "license" : "https://api.github.com/licenses/mit", "name" : "ShExML", "programmingLanguage" : "Scala", - "releaseNotes" : "## What's Changed\r\n* Fixed a bug which prevented the generation of a subject based on a condition applied down the hierarchy.\r\n* Added the possibility to get the iteration index as part of a new builtin functions mechanism.\r\n* Iterators and fields can now be placed in any order.\r\n* Allowed the option to use dashes in the variables names.\r\n* Added the possibility to pass autoincrement ids as arguments of a function.\r\n* Added an option to precompile the ShExML input (`-pc` in the CLI) to generate a single version with all the imported files incorporated and check the input for syntactic and grammatical errors. (This should facilitate debugging in these kinds of files.)\r\n* Improved the CLI help message, removing unnecessary nesting of options and grouping them by categories.\r\n* The databases needed for some tests can now be set up locally using Docker (thanks @mikesname for this contribution).\r\n\r\n**Full Changelog**: https://github.com/herminiogg/ShExML/compare/v0.5.3...v0.5.4", + "releaseNotes" : "## What's Changed\r\n- Added a parellelisation option in the RDF conversion. You can decide which parts of the execution you want to run in parallel and the number of threads to be used (or let the engine decide based on you hardware specs).\r\n- Stdin can be used as input for the mapping rules or as a input source.\r\n- Some minor fixes and stability improvements.\r\n\r\n**Full Changelog**: https://github.com/herminiogg/ShExML/compare/v0.5.4...v0.6.0", "runtimePlatform" : "JVM", - "softwareRequirements" : [ "http://example.org/jena-shacl", "http://example.org/Saxon-HE", "http://example.org/logback-classic", "http://example.org/jena-core", "http://example.org/scala-compiler", "http://example.org/mysql-connector-java", "http://example.org/stringdistance_2.13", "http://example.org/rmlmapper", "http://example.org/jena-base", "http://example.org/srdf_3", "http://example.org/mssql-jdbc", "http://example.org/json-path", "http://example.org/picocli", "http://example.org/postgresql", "http://example.org/scala3-library_3", "http://example.org/shex_3", "http://example.org/scala-logging_3", "http://example.org/jena-arq", "http://example.org/srdf4j_3", "http://example.org/slf4j-nop", "http://example.org/antlr4", "http://example.org/mariadb-java-client", "http://example.org/sqlite-jdbc", "http://example.org/scalatest_3", "http://example.org/scala-csv_2.13", "http://example.org/scala-reflect" ], - "version" : "0.5.4", + "softwareRequirements" : [ "http://example.org/jena-shacl", "http://example.org/Saxon-HE", "http://example.org/logback-classic", "http://example.org/jena-core", "http://example.org/scala-compiler", "http://example.org/mysql-connector-java", "http://example.org/stringdistance_2.13", "http://example.org/rmlmapper", "http://example.org/jena-base", "http://example.org/srdf_3", "http://example.org/scala-parallel-collections_3", "http://example.org/mssql-jdbc", "http://example.org/json-path", "http://example.org/picocli", "http://example.org/postgresql", "http://example.org/scala3-library_3", "http://example.org/shex_3", "http://example.org/scala-logging_3", "http://example.org/jena-arq", "http://example.org/srdf4j_3", "http://example.org/slf4j-nop", "http://example.org/antlr4", "http://example.org/mariadb-java-client", "http://example.org/sqlite-jdbc", "http://example.org/scalatest_3", "http://example.org/scala-csv_2.13", "http://example.org/scala-reflect" ], + "version" : "0.6.0", "issueTracker" : "https://api.github.com/repos/herminiogg/ShExML/issues", "referencePublication" : "https://doi.org/10.7717/peerj-cs.318" } From fa4d2f8dbc2c7b6ff7904abd38984538c24222f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herminio=20Garc=C3=ADa=20Gonz=C3=A1lez?= Date: Wed, 1 Oct 2025 15:20:17 +0200 Subject: [PATCH 3/3] Version 0.6.1 --- build.sbt | 2 +- src/main/scala/com/herminiogarcia/shexml/Main.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index fd77a43..5a0ce40 100644 --- a/build.sbt +++ b/build.sbt @@ -4,7 +4,7 @@ lazy val shexml = project .in(file(".")) .settings( name := "ShExML", - version := "0.6.0", + version := "0.6.1", scalaVersion := "3.3.6", crossScalaVersions := Seq("2.12.20", "2.13.16", "3.3.6"), libraryDependencies ++= Seq( diff --git a/src/main/scala/com/herminiogarcia/shexml/Main.scala b/src/main/scala/com/herminiogarcia/shexml/Main.scala index 5e99613..22b56ba 100644 --- a/src/main/scala/com/herminiogarcia/shexml/Main.scala +++ b/src/main/scala/com/herminiogarcia/shexml/Main.scala @@ -20,7 +20,7 @@ object Main { } } -@Command(name = "ShExML", version = Array("v0.6.0"), +@Command(name = "ShExML", version = Array("v0.6.1"), mixinStandardHelpOptions = true, sortOptions = false, description = Array("Map and merge heterogeneous data sources with a Shape Expressions based syntax"))