diff --git a/bundles/io.github.linkedfactory.core/src/main/java/io/github/linkedfactory/core/kvin/util/CsvFormatParser.java b/bundles/io.github.linkedfactory.core/src/main/java/io/github/linkedfactory/core/kvin/util/CsvFormatParser.java index 2537d5a5..b4bdbc55 100644 --- a/bundles/io.github.linkedfactory.core/src/main/java/io/github/linkedfactory/core/kvin/util/CsvFormatParser.java +++ b/bundles/io.github.linkedfactory.core/src/main/java/io/github/linkedfactory/core/kvin/util/CsvFormatParser.java @@ -36,6 +36,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.InputMismatchException; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -49,6 +50,12 @@ public class CsvFormatParser { protected char separator; protected URI context = Kvin.DEFAULT_CONTEXT; + // RFC 3986 path characters + private static final Pattern URI_REGEX = Pattern.compile( + "^(?:[a-zA-Z][a-zA-Z0-9+.-]*:[^\\s]+" + + "|[a-zA-Z0-9._~!$&'()*+,;=:@/\\-]+)$" + ); + public CsvFormatParser(URI base, char separator, InputStream content) throws IOException { this.base = base; this.separator = separator; @@ -77,15 +84,22 @@ URI toUri(String uriOrName) { if (uriOrName == null) { uriOrName = ""; } - URI uri; + URI uri = null; + String uriString = null; if (uriOrName.startsWith("<") && uriOrName.endsWith(">")) { - uri = URIs.createURI(uriOrName.substring(1, uriOrName.length() - 1)); + uriString = uriOrName.substring(1, uriOrName.length() - 1); } else if (uriOrName.isEmpty()) { uri = base; } else { - uri = URIs.createURI(uriOrName); + uriString = uriOrName; + } + if (uriString != null) { + if (!URI_REGEX.matcher(uriString).matches()) { + throw new InputMismatchException(String.format("Invalid URI: %s.", uriString)); + } + uri = URIs.createURI(uriString); if (uri.isRelative()) { - uri = base.appendLocalPart(uriOrName); + uri = base.appendLocalPart(uriString); } } return uri; diff --git a/bundles/io.github.linkedfactory.core/src/test/java/io/github/linkedfactory/core/kvin/util/CsvFormatParserTest.java b/bundles/io.github.linkedfactory.core/src/test/java/io/github/linkedfactory/core/kvin/util/CsvFormatParserTest.java index fabeb950..9d0fccb0 100644 --- a/bundles/io.github.linkedfactory.core/src/test/java/io/github/linkedfactory/core/kvin/util/CsvFormatParserTest.java +++ b/bundles/io.github.linkedfactory.core/src/test/java/io/github/linkedfactory/core/kvin/util/CsvFormatParserTest.java @@ -18,9 +18,11 @@ import io.github.linkedfactory.core.kvin.KvinTuple; import net.enilink.commons.iterator.IExtendedIterator; import net.enilink.komma.core.URIs; +import org.junit.Assert; import org.junit.Test; import java.io.IOException; +import java.util.InputMismatchException; import java.util.List; import static org.junit.Assert.assertEquals; @@ -59,4 +61,17 @@ public void shouldParseCsv2() throws IOException { assertEquals(36, count); } } + + @Test + public void shouldFail() throws IOException { + try { + CsvFormatParser csvParser = new CsvFormatParser(URIs.createURI("urn:base:"), ';', + getClass().getResourceAsStream("/CsvFormatParserTestWrongContent.csv")); + csvParser.parse(); + } catch (InputMismatchException e) { + Assert.assertEquals("Invalid URI: WKZ ID.", e.getMessage()); + return; + } + Assert.fail("Expected Invalid URI: WKZ ID."); + } } diff --git a/bundles/io.github.linkedfactory.core/src/test/resources/CsvFormatParserTestWrongContent.csv b/bundles/io.github.linkedfactory.core/src/test/resources/CsvFormatParserTestWrongContent.csv new file mode 100644 index 00000000..b16679e8 --- /dev/null +++ b/bundles/io.github.linkedfactory.core/src/test/resources/CsvFormatParserTestWrongContent.csv @@ -0,0 +1,3 @@ +Zeit;Hub; WKZ ID; Stufenkennung; Sachnummer; Hubweg; Pleuel1; Pleuel2; Pleuel3; Pleuel4; ZK1; ZK2; ZK3; ZK4; ZK5; ZK6; Sp1_SB2; Sp1_SB1; Sp1_GB2; Sp1_GB1; Sp2_SB2; Sp2_SB1; Sp2_GB2; Sp2_GB1; Sp3_SB2; Sp3_SB1; Sp3_GB2; Sp3_GB1; Sp4_SB2; Sp4_SB1; Sp4_GB2; Sp4_GB1; Sp5_SB2; Sp5_SB1; Sp5_GB2; Sp5_GB1; Sp6_SB2; Sp6_SB1; Sp6_GB2; Sp6_GB1; Sp7_SB2; Sp7_SB1; Sp7_GB2; Sp7_GB1; Sp8_SB2; Sp8_SB1; Sp8_GB2; Sp8_GB1; Sp01_Spannkraft; Sp01_Loesestellung; Sp01_Spannhub; Sp01_Parkposition; Sp01_AmWerkzeug; Sp02_Spannkraft; Sp02_Loesestellung; Sp02_Spannhub; Sp02_Parkposition; Sp02_AmWerkzeug; Sp03_Spannkraft; Sp03_Loesestellung; Sp03_Spannhub; Sp03_Parkposition; Sp03_AmWerkzeug; Sp04_Spannkraft; Sp04_Loesestellung; Sp04_Spannhub; Sp04_Parkposition; Sp04_AmWerkzeug; Sp05_Spannkraft; Sp05_Loesestellung; Sp05_Spannhub; Sp05_Parkposition; Sp05_AmWerkzeug; Sp06_Spannkraft; Sp06_Loesestellung; Sp06_Spannhub; Sp06_Parkposition; Sp06_AmWerkzeug; Sp07_Spannkraft; Sp07_Loesestellung; Sp07_Spannhub; Sp07_Parkposition; Sp07_AmWerkzeug; Sp08_Spannkraft; Sp08_Loesestellung; Sp08_Spannhub; Sp08_Parkposition; Sp08_AmWerkzeug; Kurbelwinkel +13351561201838;15820365;48;1;0;1038.89;8800.0;8800.0;8800.0;9200.0;0;8635;24952;0;12995;0;1331;1430;1601;1840;1465;1757;1547;1615;1327;1474;1791;1749;1175;1342;1654;1908;1408;1600;1834;1425;1225;1211;1890;1863;1225;1360;1664;1869;1348;3991;1517;1736;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;0; 0; 1; 0; 1;0; 0; 1; 0; 1;0; 0; 1; 0; 1; 54.5 +13351561201839;15820365;48;1;0;1038.89;8800.0;8800.0;8800.0;9200.0;0;8635;24952;0;12995;0;1330;1431;1601;1842;1465;1757;1547;1615;1327;1474;1791;1749;1175;1342;1654;1908;1402;1600;1839;1430;1225;1211;1890;1863;1225;1360;1664;1869;1348;3991;1517;1736;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;1; 0; 1; 0; 1;0; 0; 1; 0; 1;0; 0; 1; 0; 1;0; 0; 1; 0; 1; 54.5 \ No newline at end of file diff --git a/bundles/io.github.linkedfactory.service/src/main/scala/io/github/linkedfactory/service/util/JsonFormatParser.scala b/bundles/io.github.linkedfactory.service/src/main/scala/io/github/linkedfactory/service/util/JsonFormatParser.scala index 01776d44..788d718f 100644 --- a/bundles/io.github.linkedfactory.service/src/main/scala/io/github/linkedfactory/service/util/JsonFormatParser.scala +++ b/bundles/io.github.linkedfactory.service/src/main/scala/io/github/linkedfactory/service/util/JsonFormatParser.scala @@ -30,6 +30,14 @@ import javax.xml.datatype.DatatypeFactory object JsonFormatParser extends Loggable { val dtFactoryLocal = new ThreadLocal[DatatypeFactory] + // RFC 3986 path characters: + // ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "&" / "'" / + // "(" / ")" / "*" / "+" / "," / ";" / "=" / ":" / "@" / "/" + private val uriRegex = ( + """^(?:[a-zA-Z][a-zA-Z0-9+.-]*:[^\s]+""" + // absolute URI (scheme + rest, no spaces) + """|[a-zA-Z0-9._~!$&'()*+,;=:@/\-]+)$""" // relative URI (RFC3986 path chars only) + ).r + def datatypeFactory = { var factory = dtFactoryLocal.get if (factory == null) { @@ -55,9 +63,12 @@ object JsonFormatParser extends Loggable { } def objectToRecord(o: JObject): Record = o.obj.foldLeft(Record.NULL) { case (e, field) => - val property = resolveUri(field.name, activeContexts) - parseValue(field.value) match { - case Full(value) => e.append(new Record(property, value)) + resolveUri(field.name, activeContexts) match { + case Full(property) => + parseValue(field.value) match { + case Full(value) => e.append(new Record(property, value)) + case _ => e + } case _ => e } } @@ -124,59 +135,78 @@ object JsonFormatParser extends Loggable { } } - def resolveUri(uri: String, contexts: List[JValue]): URI = { + def getCandidateUri(uri: String, contexts: List[JValue]): String = { uri.split(":") match { // is a URI with scheme - case Array(_, suf, _*) if suf.startsWith("//") => URIs.createURI(uri) + case Array(_, suf, _*) if suf.startsWith("//") => uri // may be a CURIE case Array(pref, _*) => contexts match { case first :: rest => first \ pref match { case JString(s) => - val sufPref = resolveUri(s, contexts).toString + val sufPref = getCandidateUri(s, contexts) if (pref.length < uri.length) - URIs.createURI(sufPref.concat(uri.substring(pref.length + 1))) + sufPref.concat(uri.substring(pref.length + 1)) else - URIs.createURI(sufPref.concat(uri.substring(pref.length))) - case _ => resolveUri(uri, rest) + sufPref.concat(uri.substring(pref.length)) + case _ => getCandidateUri(uri, rest) } // no prefix defined, just use item as URI - case Nil => { - val result = URIs.createURI(uri) - if (result.isRelative) result.resolve(rootItem) else result - } + case Nil => uri } } } + + def resolveUri(uri: String, contexts: List[JValue]): Box[URI] = { + val returnedUri: String = getCandidateUri(uri, contexts) + if (uriRegex.pattern.matcher(returnedUri).matches) { + val result = URIs.createURI(returnedUri) + if (result.isRelative) Full(result.resolve(rootItem)) else Full(result) + } else { + Failure(s"Invalid URI: $returnedUri") + } + + } + json match { // [ { "time" : 123, "seqNr" : 2, "value" : 1.3 } ] case JArray(values) => parseProperty(rootItem, URIs.createURI("value"), values, currentTime) // { "item" : { "property1" : [ { "time" : 123, "seqNr" : 2, "value" : 1.3 } ], "property2" : [ { "time" : 123, "seqNr" : 5, "value" : 3.2 } ] } } - case JObject(fields) => fields.flatMap { + case JObject(fields) => fields.map[Box[List[KvinTuple]]] { case JField(item, itemData) if item.equals("@context") => activeContexts = itemData :: activeContexts; None // "item" : { ... } case JField(item, itemData) if !item.equals("@context") => // resolve relative URIs - var itemUri = resolveUri(item, activeContexts) - if (itemUri.lastSegment == "") itemUri = itemUri.trimSegments(1) - itemData match { - // "property1" : [{ ... }] - case JObject(props) => - props.map { - case JField(prop, propData) => - // support single and multiple values - val values = propData match { - case JArray(values) => values - case other => List(other) - } - parseProperty(itemUri, resolveUri(prop, activeContexts), values, currentTime) + resolveUri(item, activeContexts) match { + case Full(uri) => + var itemUri = uri + if (itemUri.lastSegment == "") itemUri = itemUri.trimSegments(1) + itemData match { + // "property1" : [{ ... }] + case JObject(props) => + props.map[Box[List[KvinTuple]]] { + case JField(prop, propData) => + // support single and multiple values + val values: List[JValue] = propData match { + case JArray(vs) => vs + case other => List(other) + } + resolveUri(prop, activeContexts) match { + case Full(propUri) => + parseProperty(itemUri, propUri, values, currentTime) + case e: Failure => + e: Box[List[KvinTuple]] + } + }.foldLeft(Full(Nil): Box[List[KvinTuple]])(collectErrors) + case _ => Failure("Invalid data: Expected an object with property keys.") } - case _ => Failure("Invalid data: Expected an object with property keys.") + case e: Failure => e: Box[List[KvinTuple]] } - }.foldRight(Empty: Box[List[KvinTuple]])(collectErrors _) + + }.foldLeft(Full(Nil): Box[List[KvinTuple]])(collectErrors _) case _ => Failure("Invalid data") } } diff --git a/bundles/io.github.linkedfactory.service/src/test/scala/io/github/linkedfactory/service/test/JsonFormatParserTest.scala b/bundles/io.github.linkedfactory.service/src/test/scala/io/github/linkedfactory/service/test/JsonFormatParserTest.scala index ae5c9bbe..e8f3f246 100644 --- a/bundles/io.github.linkedfactory.service/src/test/scala/io/github/linkedfactory/service/test/JsonFormatParserTest.scala +++ b/bundles/io.github.linkedfactory.service/src/test/scala/io/github/linkedfactory/service/test/JsonFormatParserTest.scala @@ -18,10 +18,11 @@ package io.github.linkedfactory.service.test import io.github.linkedfactory.core.kvin.{Kvin, KvinTuple, Record} import io.github.linkedfactory.service.util.JsonFormatParser import net.enilink.komma.core.URIs -import net.liftweb.common.Full +import net.liftweb.common.{Failure, Full} import net.liftweb.json.JsonAST._ import net.liftweb.json.JsonDSL._ import org.junit.{Assert, Test} +import org.scalatest.Assertions.fail class JsonFormatParserTest { @@ -97,4 +98,29 @@ class JsonFormatParserTest { )) Assert.assertEquals(expected, parsed) } + + @Test + def shouldFail(): Unit = { + val context = Kvin.DEFAULT_CONTEXT + val time = System.currentTimeMillis + val root = URIs.createURI("http://example.root") + val itemError: JValue = + ("some item" -> ("http://example.org/property" -> "val")) + var result = JsonFormatParser.parseItem(root, context, itemError, time) + assert(result.isInstanceOf[Failure]) + result match { + case Failure(msg, _, _) => + assert(msg.contains("Invalid URI: some item")) + case _ => fail("Expected Invalid URI: some item, got " + result) + } + + val propertyError = ("item" -> ("http://example.org/my property" -> "val")) + result = JsonFormatParser.parseItem(root, context, propertyError, time) + assert(result.isInstanceOf[Failure]) + result match { + case Failure(msg, _, _) => + assert(msg.contains("Invalid URI: http://example.org/my property")) + case _ => fail("Expected Invalid URI: http://example.org/my property, got " + result) + } + } } \ No newline at end of file