Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.18
go-version: 1.21

- name: Checkout code
uses: actions/checkout@v2
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
uses: actions/checkout@v3
- uses: actions/setup-go@v3
with:
go-version: 1.18
go-version: 1.21
- uses: actions/cache@v3
with:
path: |
Expand All @@ -65,7 +65,7 @@ jobs:
- uses: zencargo/github-action-go-mod-tidy@v1
with:
path: .
go-version: 1.18
go-version: 1.21

test:
runs-on: ubuntu-latest
Expand All @@ -75,7 +75,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18
go-version: 1.21
- name: Set up Cache
uses: actions/cache@v3
with:
Expand Down
7 changes: 4 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
module github.com/piprate/json-gold

go 1.18
go 1.21

require (
github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35
github.com/stretchr/testify v1.8.3
github.com/cayleygraph/quad v1.3.0
github.com/pquerna/cachecontrol v0.2.0
github.com/stretchr/testify v1.9.0
)

require (
Expand Down
14 changes: 10 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
github.com/cayleygraph/quad v1.3.0 h1:xg7HOLWWPgvZ4CcvzEpfCwq42L8mzYUR+8V0jtYoBzc=
github.com/cayleygraph/quad v1.3.0/go.mod h1:NadtM7uMm78FskmX++XiOOrNvgkq0E1KvvhQdMseMz4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35 h1:J9b7z+QKAmPf4YLrFg6oQUotqHQeUNWwkvo7jZp1GLU=
github.com/pquerna/cachecontrol v0.0.0-20180517163645-1555304b9b35/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/pquerna/cachecontrol v0.2.0 h1:vBXSNuE5MYP9IJ5kjsdo8uq+w41jSPgvba2DEnkRx9k=
github.com/pquerna/cachecontrol v0.2.0/go.mod h1:NrUG3Z7Rdu85UNR3vm7SOsl1nFIeSiQnrHV5K9mBcUI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
150 changes: 51 additions & 99 deletions ld/serialize_nquads.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ import (
"bytes"
"fmt"
"io"
"regexp"
"strings"

"github.com/cayleygraph/quad"
"github.com/cayleygraph/quad/nquads"
)

// NQuadRDFSerializer parses and serializes N-Quads.
Expand Down Expand Up @@ -108,15 +110,6 @@ func toNQuad(triple *Quad, graphName string) string {
return quad
}

func unescape(str string) string {
str = strings.ReplaceAll(str, "\\\\", "\\")
str = strings.ReplaceAll(str, "\\\"", "\"")
str = strings.ReplaceAll(str, "\\n", "\n")
str = strings.ReplaceAll(str, "\\r", "\r")
str = strings.ReplaceAll(str, "\\t", "\t")
return str
}

func escape(str string) string {
str = strings.ReplaceAll(str, "\\", "\\\\")
str = strings.ReplaceAll(str, "\"", "\\\"")
Expand All @@ -126,66 +119,6 @@ func escape(str string) string {
return str
}

const (
wso = "[ \\t]*"
iri = "(?:<([^:]+:[^>]*)>)"

// https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL

pnCharsBase = "A-Z" + "a-z" +
"\u00C0-\u00D6" +
"\u00D8-\u00F6" +
"\u00F8-\u02FF" +
"\u0370-\u037D" +
"\u037F-\u1FFF" +
"\u200C-\u200D" +
"\u2070-\u218F" +
"\u2C00-\u2FEF" +
"\u3001-\uD7FF" +
"\uF900-\uFDCF" +
"\uFDF0-\uFFFD"
// TODO:
//"\u10000-\uEFFFF"

pnCharsU = pnCharsBase + "_"

pnChars = pnCharsU +
"0-9" +
"-" +
"\u00B7" +
"\u0300-\u036F" +
"\u203F-\u2040"

blankNodeLabel = "(_:" +
"(?:[" + pnCharsU + "0-9])" +
"(?:(?:[" + pnChars + ".])*(?:[" + pnChars + "]))?" +
")"

// '(_:' +
// '(?:[' + PN_CHARS_U + '0-9])' +
// '(?:(?:[' + PN_CHARS + '.])*(?:[' + PN_CHARS + ']))?' +
// ')';

bnode = blankNodeLabel

plain = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\""
datatype = "(?:\\^\\^" + iri + ")"
language = "(?:@([a-z]+(?:-[a-zA-Z0-9]+)*))"
literal = "(?:" + plain + "(?:" + datatype + "|" + language + ")?)"
ws = "[ \\t]+"

subject = "(?:" + iri + "|" + bnode + ")" + ws
property = iri + ws
object = "(?:" + iri + "|" + bnode + "|" + literal + ")" + wso
graph = "(?:\\.|(?:(?:" + iri + "|" + bnode + ")" + wso + "\\.))"
)

var regexEmpty = regexp.MustCompile("^" + wso + "$")

// full quad regex

var regexQuad = regexp.MustCompile("^" + wso + subject + property + object + graph + wso + "$") //nolint:gocritic

type lineScanner interface {
Bytes() []byte
Scan() bool
Expand Down Expand Up @@ -253,53 +186,63 @@ func ParseNQuadsFrom(o interface{}) (*RDFDataset, error) {
lineNumber++

// skip empty lines
if regexEmpty.Match(line) {
if isEmpty(line) {
continue
}

// parse quad
if !regexQuad.Match(line) {
return nil, NewJsonLdError(SyntaxError, fmt.Errorf("error while parsing N-Quads; invalid quad. line: %d", lineNumber))
q, err := nquads.ParseRaw(string(line))
if err != nil {
return nil, NewJsonLdError(SyntaxError, fmt.Errorf("error while parsing N-Quads; invalid quad. line: %d. reason: %w", lineNumber, err))
}
match := regexQuad.FindStringSubmatch(string(line))

// get subject
var subject Node
if match[1] != "" {
subject = NewIRI(unescape(match[1]))
} else {
subject = NewBlankNode(unescape(match[2]))
switch v := q.Subject.(type) {
case quad.IRI:
subject = NewIRI(string(v))
case quad.BNode:
subject = NewBlankNode(v.String())
default:
return nil, fmt.Errorf("invalid subject: %s", q.Subject.String())
}

// get predicate
predicate := NewIRI(unescape(match[3]))
var predicate Node
if iri, ok := q.Predicate.(quad.IRI); ok {
predicate = NewIRI(string(iri))
} else {
return nil, fmt.Errorf("invalid predicate: %s", q.Predicate.String())
}

// get object
var object Node
if match[4] != "" {
object = NewIRI(unescape(match[4]))
} else if match[5] != "" {
object = NewBlankNode(unescape(match[5]))
} else {
language := unescape(match[8])
var datatype string
if match[7] != "" {
datatype = unescape(match[7])
} else if match[8] != "" {
datatype = RDFLangString
} else {
datatype = XSDString
}
unescaped := unescape(match[6])
object = NewLiteral(unescaped, datatype, language)
switch obj := q.Object.(type) {
case quad.IRI:
object = NewIRI(string(obj))
case quad.BNode:
object = NewBlankNode(obj.String())
case quad.TypedString:
object = NewLiteral(string(obj.Value), string(obj.Type), "")
case quad.LangString:
object = NewLiteral(string(obj.Value), RDFLangString, obj.Lang)
case quad.String:
object = NewLiteral(string(obj), XSDString, "")
default:
return nil, fmt.Errorf("invalid object: %s", q.Object.String())
}

// get graph name ('@default' is used for the default graph)
name := "@default"
if match[9] != "" {
name = unescape(match[9])
} else if match[10] != "" {
name = unescape(match[10])
if label := q.Label; label != nil {
switch label := label.(type) {
case quad.IRI:
name = string(label)
case quad.BNode:
name = label.String()
default:
return nil, fmt.Errorf("invalid label: %s", q.Label.String())
}
}

triple := NewQuad(subject, predicate, object, name)
Expand Down Expand Up @@ -331,3 +274,12 @@ func ParseNQuadsFrom(o interface{}) (*RDFDataset, error) {
func ParseNQuads(input string) (*RDFDataset, error) {
return ParseNQuadsFrom(input)
}

func isEmpty(line []byte) bool {
for _, b := range line {
if b != ' ' && b != '\t' {
return false
}
}
return true
}
51 changes: 51 additions & 0 deletions ld/serialize_nquads_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2026 Siemens AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ld

import (
"testing"
)

var benchInput string = `
<http://example.org/subject> <http://example.org/predicate> <http://example.org/object> .

_:b0 <http://example.org/predicate> <http://example.org/object> .

<http://example.org/subject> <http://example.org/predicate> _:b0 .

<http://example.org/subject> <http://example.org/predicate> "literal value" .

<http://example.org/subject> <http://example.org/predicate> "Hello World"@en .

<http://example.org/subject> <http://example.org/predicate> "42"^^<http://www.w3.org/2001/XMLSchema#integer> .

<http://example.org/subject> <http://example.org/predicate> "Line 1\\nLine 2\\tTabbed" .

<http://example.org/subject> <http://example.org/predicate> <http://example.org/object> <http://example.org/graph> .

<http://example.org/subject> <http://example.org/predicate> <http://example.org/object> _:graph .

<http://example.org/subject> <http://example.org/predicate> "Quote: \"nested\" and backslash: \\\\" .
`

func BenchmarkParseNQuadsFrom(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := ParseNQuads(benchInput)
if err != nil {
b.Fatalf("failed to parse benchInput: %s", err)
}
}
}
Loading