diff --git a/Project.toml b/Project.toml index a254394..80b4dec 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "JSON3" uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" authors = ["Jacob Quinn "] -version = "1.9.5" +version = "1.10.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/README.md b/README.md index 272f868..14c1456 100644 --- a/README.md +++ b/README.md @@ -55,3 +55,41 @@ using StructTypes JSON3.@generatetypes json_string_sample JSON3.read(json_string, JSONTypes.Root) ``` + +#### Non-allocating interface + +```julia +json_string = """ +{ + "a": 1, + "b": [1,2,3,5,8], + "c": { + "a": ["2"], + } +} +""" +reader = JSON3.Reader() +obj = JSON3.parse!(reader, json_string, JSON3.JSONObject) # returns a JSON3.JSONObject + +# method 1 +obj["a", Int64] # 1 +# method 2 +cursor = findcursor(obj, "a") # JSON3.Cursor +item = obj[cursor, Int64] # 1 +# method 3 +field = first(obj) +key(field) # "a", SubString +value(field, Int64) # 1 + +# Array +collect(obj["b", JSON3.JSONArray{Int64}]) # [1,2,3,5,8] +# Object +obj["c", JSON3.JSONObject] # JSON3.JSONObject + +# Data is accessed by iterating over the fields. +# When fields are orders, it is possible to access them sequentially: +cursor = findcursor(obj, "a") # JSON3.Cursor +cursor2 = findcursor(obj, "b", cursor) # iterates from cursor +obj[cursor2] # obj["b"] + +``` diff --git a/benchmarks/lowlevel.jl b/benchmarks/lowlevel.jl new file mode 100644 index 0000000..a20ad71 --- /dev/null +++ b/benchmarks/lowlevel.jl @@ -0,0 +1,39 @@ +using JSON3, BenchmarkTools + +str = """ +{ + "a": 1, + "b" :2.1, + "c": + "3", + "d": [1,2,3,5,8], + "e": [ + 1,"23", + [4,5]], + "f": { + "a": ["2"], + "b": 1 + } +} +""" + +reader = JSON3.Reader() +obj = JSON3.parse!(reader, str, JSON3.JSONObject) + +function f0(str) + x = JSON3.read(str) + x["f"]["b"] +end + +function f1(reader, str) + obj = JSON3.parse!(reader, str, JSON3.JSONObject) + obj["f", JSON3.JSONObject]["b", Int64] +end + +function f2(obj) + obj["f", JSON3.JSONObject]["b", Int64] +end + +@benchmark f0($str) +@benchmark f1($reader, $str) +@benchmark f2($obj) diff --git a/src/JSON3.jl b/src/JSON3.jl index 645f740..8d4c4b1 100644 --- a/src/JSON3.jl +++ b/src/JSON3.jl @@ -48,7 +48,7 @@ function populateinds!(x::Object) key = getvalue(Symbol, buf, tape, tapeidx, t) tapeidx += 2 inds[key] = tapeidx - @inbounds tapeidx += gettapelen(Any, tape[tapeidx]) + @inbounds tapeidx += gettapelen(tape[tapeidx]) i += 1 end return @@ -64,7 +64,7 @@ function populateinds!(x::Array) i = 1 while i <= len @inbounds inds[i] = tapeidx - @inbounds tapeidx += gettapelen(Any, tape[tapeidx]) + @inbounds tapeidx += gettapelen(tape[tapeidx]) i += 1 end return @@ -78,7 +78,7 @@ end tapeidx += 2 @inbounds t = tape[tapeidx] x = Pair{Symbol, Any}(key, getvalue(Any, getbuf(obj), tape, tapeidx, t)) - tapeidx += gettapelen(Any, t) + tapeidx += gettapelen(t) return x, (i + 1, tapeidx) end @@ -149,7 +149,7 @@ function Base.iterate(arr::Array{T}, (i, tapeidx)=(1, 3)) where {T} tape = gettape(arr) @inbounds t = tape[tapeidx] val = getvalue(T, getbuf(arr), tape, tapeidx, t) - tapeidx += gettapelen(T, t) + tapeidx += gettapelen(t) return val, (i + 1, tapeidx) end @@ -171,5 +171,6 @@ include("structs.jl") include("write.jl") include("pretty.jl") include("gentypes.jl") +include("read_tape.jl") end # module diff --git a/src/read_tape.jl b/src/read_tape.jl new file mode 100644 index 0000000..8af388f --- /dev/null +++ b/src/read_tape.jl @@ -0,0 +1,276 @@ +struct Cursor + cursor::Int +end +Base.convert(::Type{T}, x::Cursor) where {T<:Integer} = convert(T, x.cursor) +Base.convert(::Type{Cursor}, x::T) where {T<:Integer} = Cursor(x) +Base.:+(c::Cursor, i::Integer) = Cursor(c.cursor+i) +Base.:-(c::Cursor, i::Integer) = Cursor(c.cursor-i) +Base.to_index(c::Cursor) = c.cursor +Base.isless(c1::Cursor, c2::Cursor) = isless(c1.cursor, c2.cursor) + +struct Reader + tape::Vector{UInt64} +end + +Reader() = Reader( + Vector{UInt64}(), +) + +struct JSONItem{S} + parser::Reader + str::S + cursor::Cursor +end + +gettape(x::JSONItem) = x.parser.tape + +function parse!(parser::Reader, str; jsonlines::Bool=false, kw...) + buf = codeunits(str) + tape = parser.tape + len = length(buf) + tapesize = len < 1000 ? len + 4 : div(len, 10) + resize!(tape, tapesize) + pos = 1 + b = getbyte(buf, pos) + if jsonlines + pos, tapeidx = jsonlines!(buf, pos, len, b, tape, Int64(1); kw...) + else + pos, tapeidx = read!(buf, pos, len, b, tape, Int64(1), Any; kw...) + end + JSONItem(parser, str, Cursor(1)) +end + +function parse!(parser::Reader, str, ::Type{T}) where {T} + item = parse!(parser, str) + as(item, T) +end + +@inline function gettypemask(x::JSONItem) + @inbounds gettape(x)[x.cursor] & TYPEMASK +end + +empty(x::JSONItem) = gettypemask(x) == EMPTY +isany(x::JSONItem) = gettypemask(x) == ANY +isobject(x::JSONItem) = gettypemask(x) == OBJECT +isarray(x::JSONItem) = gettypemask(x) == ARRAY +isstring(x::JSONItem) = gettypemask(x) == STRING +isint(x::JSONItem) = gettypemask(x) == INT +isfloat(x::JSONItem) = gettypemask(x) == FLOAT +isbool(x::JSONItem) = gettypemask(x) == BOOL +isnull(x::JSONItem) = gettypemask(x) == NULL +isintfloat(x::JSONItem) = gettypemask(x) == (INT | FLOAT) +nonnull(x::JSONItem) = gettypemask(x) & ~NULL + +struct JSONField{S} + parser::Reader + str::S + cursor::Cursor +end + +@inline function key(field::JSONField) + JSONItem( + field.parser, + field.str, + field.cursor + ) |> asstring +end + +@inline function value(field::JSONField) + JSONItem( + field.parser, + field.str, + field.cursor + 2 + ) +end + +@inline function value(field::JSONField, ::Type{T}) where {T} + as(value(field), T) +end + +struct JSONObject{S} + parser::Reader + str::S + cursor::Cursor + nfields::Int + maxcursor::Cursor +end + +@inline Base.length(x::JSONObject) = x.nfields +@inline Base.isempty(x::JSONObject) = length(x) == 0 + +@inline function next(x::JSONObject, cursor::Union{Nothing,Cursor}=nothing) + if isnothing(cursor) + isempty(x) ? nothing : x.cursor + else + cursor += 2 # jumping over the key + u = x.parser.tape[cursor] + cursor += isobject(u) | isarray(u) ? getnontypemask(u) : 2 + cursor > x.maxcursor ? nothing : cursor + end +end + +function tryfindcursor(x::JSONObject, key_::AbstractString, default, start::Union{Nothing, Cursor}=nothing) + cursor = next(x, start) + while cursor !== nothing + field = getpair(x, cursor) + key(field) == key_ && return value(field).cursor + cursor = next(x, cursor) + end + default +end + +function findcursor(x::JSONObject, key_::AbstractString, start::Union{Nothing, Cursor}=nothing) + cursor = next(x, start) + while cursor !== nothing + field = getpair(x, cursor) + key(field) == key_ && return value(field).cursor + cursor = next(x, cursor) + end + throw(KeyError(key_)) # this allows for type stability optimizations +end + +function Base.iterate(x::JSONObject) + cursor = next(x) + getpair(x, cursor), cursor +end + +function Base.iterate(x::JSONObject, cursor::Cursor) + cursor = next(x, cursor) + isnothing(cursor) ? nothing : (getpair(x, cursor), cursor) +end + +@inline function getpair(x::JSONObject, cursor::Cursor) + JSONField( + x.parser, + x.str, + cursor, + ) +end + +function Base.getindex(x::JSONObject, key::AbstractString, ::Type{T}=Any, start::Union{Nothing, Cursor}=nothing) where {T} + cursor = findcursor(x, key, start) + x[cursor, T] +end + +@inline function Base.getindex(x::JSONObject, cursor_onvalue::Cursor, ::Type{T}=Any) where {T} + item = JSONItem( + x.parser, + x.str, + cursor_onvalue, + ) + as(item, T) +end + +struct JSONArray{T,S} + parser::Reader + str::S + cursor::Cursor + nfields::Int + maxcursor::Cursor +end + +@inline Base.length(x::JSONArray) = x.nfields +@inline Base.isempty(x::JSONArray) = length(x) == 0 +@inline Base.eltype(::JSONArray{T}) where {T} = T +@inline Base.eltype(::JSONArray{Any}) = JSONItem + +@inline function next(x::JSONArray) + isempty(x) ? nothing : x.cursor +end + +@inline function next(x::JSONArray{T}, cursor::Cursor) where {T} + if issmalltype(geteltype(T)) + cursor += 2 + else + @inbounds u = x.parser.tape[cursor] + cursor += isobject(u) | isarray(u) ? getnontypemask(u) : 2 + end + cursor > x.maxcursor ? nothing : cursor +end + +function Base.iterate(x::JSONArray) + cursor = next(x) + isnothing(cursor) ? nothing : (x[cursor], cursor) +end + +function Base.iterate(x::JSONArray{T}, cursor::Cursor) where {T} + cursor = next(x, cursor) + isnothing(cursor) ? nothing : (x[cursor], cursor) +end + +@inline function Base.getindex(x::JSONArray{T}, cursor::Cursor) where {T} + item = JSONItem( + x.parser, + x.str, + cursor, + ) + as(item, T) +end + +@inline as(item::JSONItem, ::Type{Any}) = item +@inline as(item::JSONItem, ::Type{T}) where {T<:AbstractString} = asstring(item) +@inline as(item::JSONItem, ::Type{T}) where {T<:Integer} = convert(T, asint(item)) +@inline as(item::JSONItem, ::Type{T}) where {T<:AbstractFloat} = convert(T, asfloat(item)) +@inline as(item::JSONItem, ::Type{Bool}) = asbool(item) +@inline as(item::JSONItem, ::Type{<:JSONArray})= asarray(item, Any) +@inline as(item::JSONItem, ::Type{<:JSONArray{T}}) where {T} = asarray(item, T) +@inline as(item::JSONItem, ::Type{<:JSONObject}) = asobject(item) + +@inline function asobject(x::JSONItem) + @assert isobject(x) + tape = gettape(x) + @inbounds JSONObject( + x.parser, + x.str, + x.cursor+2, + getlen(tape[x.cursor+1]), + x.cursor + getlen(tape[x.cursor])-1, + ) +end + +@inline function asarray(x::JSONItem, ::Type{T}) where {T} + @assert isarray(x) + tape = gettape(x) + # type = tape[x.cursor+1] |> gettypemask + # todo: make this work + # @assert (type == EMPTY) || T==Any || (type == geteltype(T)) + @inbounds JSONArray{T, typeof(x.str)}( + x.parser, + x.str, + x.cursor+2, + getlen(tape[x.cursor+1]), + x.cursor + getlen(tape[x.cursor])-1, + ) +end + +@inline function asstring(x::JSONItem) + @assert isstring(x) + tape = gettape(x) + @inbounds len = getlen(tape[x.cursor])-1 + @inbounds offset = Int64(tape[x.cursor+1]) + @inbounds SubString(x.str, offset:(offset+len)) +end + +@inline function asint(x::JSONItem) + @assert isint(x) + tape = gettape(x) + @inbounds Core.bitcast(Int64, tape[x.cursor+1]) +end + +@inline function asfloat(x::JSONItem) + tape = gettape(x) + @inbounds u = tape[x.cursor+1] + if isint(x) + # todo: remove this branch by not converting the float into an integer in the tape! + Float64(Core.bitcast(Int64, u)) + else + @assert isfloat(x) + @inbounds Core.bitcast(Float64, tape[x.cursor+1]) + end +end + +@inline function asbool(x::JSONItem) + @assert isbool(x) + tape = gettape(x) + @inbounds getnontypemask(tape[x.cursor]) == UInt64(1) +end diff --git a/src/utils.jl b/src/utils.jl index 1f35901..263c627 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -67,7 +67,7 @@ isnull(x::UInt64) = (x & TYPEMASK) == NULL isintfloat(x::UInt64) = (x & TYPEMASK) == (INT | FLOAT) nonnull(x::UInt64) = (x & TYPEMASK) & ~NULL -function geteltype(T) +function geteltype(T::UInt64) if empty(T); return Union{} elseif isany(T); return Any elseif isobject(T); return Object @@ -82,6 +82,15 @@ function geteltype(T) end end +geteltype(::Type{Any}) = ANY +geteltype(::Type{T}) where {T} = ANY +geteltype(::Type{Nothing}) = NULL +geteltype(::Type{Union{Nothing, T}}) where {T} = (NULL | geteltype(T)) +geteltype(::Type{<:AbstractString}) = STRING +geteltype(::Type{<:Integer}) = INT +geteltype(::Type{<:AbstractFloat}) = FLOAT +geteltype(::Type{Bool}) = BOOL + object(tapelen) = OBJECT | Core.bitcast(UInt64, tapelen) array(tapelen) = ARRAY | Core.bitcast(UInt64, tapelen) eltypelen(T, len) = T | Core.bitcast(UInt64, len) @@ -115,12 +124,10 @@ getnontypemask(x::UInt64) = Core.bitcast(Int64, x & ~TYPEMASK) getpos(x::UInt64) = Core.bitcast(Int64, getnontypemask(x) >> 16) getlen(x::UInt64) = Core.bitcast(Int64, x & 0x000000000000ffff) -gettapelen(T, x::UInt64) = ifelse(isobject(x) | isarray(x), getnontypemask(x), 2) +gettapelen(x::UInt64) = ifelse(isobject(x) | isarray(x), getnontypemask(x), 2) gettapelen(::Union{Type{Int64}, Type{Float64}, Type{Bool}, Type{Nothing}}) = 2 -regularstride(T) = false -regularstride(::Union{Type{Int64}, Type{Float64}, Type{Bool}, Type{Nothing}}) = true -regularstride(::Type{Union{Int64, Float64}}) = true +issmalltype(x::UInt64) = x & (EMPTY | NULL | INT | FLOAT | BOOL | STRING) == gettypemask(x) function getvalue(::Type{Object}, buf, tape, tapeidx, t) x = Object(buf, Base.unsafe_view(tape, tapeidx:tapeidx + getnontypemask(t)), Dict{Symbol, Int}()) diff --git a/test/reader.jl b/test/reader.jl new file mode 100644 index 0000000..9d99d15 --- /dev/null +++ b/test/reader.jl @@ -0,0 +1,113 @@ +using JSON3: JSONObject, JSONArray, JSONField, JSONItem, key, value, Reader, parse!, next, getpair + +@testset "JSONReader" begin + @testset "JSONReader: vector INT" begin + str = """ + { + "a": [1,2,3,5,8] + } + """ + + reader = Reader() + obj = parse!(reader, str, JSONObject) + + field = first(obj) + @test field isa JSONField + @test key(field) == "a" + @test value(field) isa JSONItem + @test value(field, JSONArray) isa JSONArray + + v = obj["a"] + @test v isa JSONItem + + v = obj["a", JSONArray] + @test first(v) isa JSONItem + @test eltype(v) == JSONItem + @test [JSON3.as(x, Int64) for x in v] == [1,2,3,5,8] + + v = obj["a", JSONArray{Int64}] + @test collect(v) == [1,2,3,5,8] + @test eltype(v) == Int64 + end + + @testset "JSONReader: vector INT|FLOAT" begin + str = """ + { + "a": [1.0,2.5] + } + """ + + reader = Reader() + obj = parse!(reader, str, JSONObject) + + v = obj["a", JSONArray{Float64}] + @test collect(v) == [1.0, 2.5] + end + + @testset "JSONReader: Object" begin + str = """ + { + "a": 1, + "b" :2.1, + "c": + "3", + "d": [1,2,3,5,8], + "e": [ + 1,"23", + [4,5]], + "f": { + "a": ["2"], + "b": 1 + } + } + """ + + reader = Reader() + obj_ = parse!(reader, str) + @test obj_ isa JSONItem + obj = parse!(reader, str, JSONObject) + @test obj isa JSONObject + @test JSON3.as(obj_, JSONObject) == obj + + @test [key(field) for field in obj] == ["a", "b", "c", "d", "e", "f"] + + cursor1 = next(obj) + field = getpair(obj, cursor1) + k, v1, v2 = key(field), value(field, Int), value(field, Float64) + v3 = obj["a", Int] + v4 = obj["a", Float64] + @test k == "a" + @test_throws AssertionError value(field, JSONArray) + @test (v1, v2, v3, v4) == (1,1,1.0,1.0) + + cursor2 = next(obj, cursor1) # cursor on the field + field = getpair(obj, cursor2) + @test field isa JSONField + @test key(field) == "b" + v1 = value(field, Float64) + + cursor2_value = JSON3.findcursor(obj, "b") # cursor on the value + @test_throws KeyError JSON3.findcursor(obj, "b_") + @test JSON3.tryfindcursor(obj, "b_", nothing) === nothing + v2 = obj[cursor2_value, Float64] + @test (v1, v2) == (2.1, 2.1) + + @test obj["c", AbstractString] == "3" + + v = obj["d", JSONArray{Int64}] + @test collect(v) == [1,2,3,5,8] + + v = obj["e", JSONArray] + @test v == obj["e", JSONArray{Any}] + @test length(v) == 3 + vs = collect(v) + @test JSON3.as(vs[1], Int64) == 1 + @test JSON3.as(vs[2], AbstractString) == "23" + @test JSON3.as(vs[3], JSONArray{Int64}) |> collect == [4,5] + + v = obj["f", JSONObject] + @test length(v) == 2 + @test collect(v["a", JSONArray{AbstractString}]) == ["2"] + @test v["b", Int64] == 1 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index ed9f860..cf76a16 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using Test, JSON3, StructTypes, UUIDs, Dates + @static if Base.VERSION < v"1.5" ismutable(o::T) where {T} = T.mutable end @@ -834,9 +835,6 @@ x = Dict(:hey=>1) @test StructTypes.construct(Dict{Symbol, Any}, x) == x @test JSON3.gettapelen(Int64) == 2 -@test JSON3.regularstride(Missing) == false -@test JSON3.regularstride(Int64) == true -@test JSON3.regularstride(Union{Int64, Float64}) == true @test JSON3.getvalue(Nothing, [], [], 1, 2) === nothing @test JSON3.defaultminimum(nothing) == 4 @test JSON3.defaultminimum(Int64) == 16 @@ -1019,5 +1017,7 @@ x = System(duration=3600.0) include("gentypes.jl") include("stringnumber.jl") +include("reader.jl") end # @testset "JSON3" +