diff --git a/README.md b/README.md index e28e2e8f..447881fa 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,16 @@ The options for decode are: will ensure that the parsed object only contains a single entry containing the last value seen. This mirrors the parsing beahvior of virtually every other JSON parser. +* `{labels, Label}` - Controls if keys are returned as atoms. `Label` + accepts the following options: + * `binary` - the default, return all keys as binaries. + * `atom` - convert all keys to atoms. Use with caution, this can exhaust the atom space. + Creating atoms from a NIF with UTF-8 characters is only supported since OTP-26. + Attempting to decode JSON that has non 7bit ASCII will result in atoms with weird content + when the NIF has been compiled for an older OTP releases. + * `existing_atom` - convert keys to atoms, the atoms must exists or the decode will fail. + * `attempt_atom` - try to convert keys to existing atoms, + return key as binary if the atom does not exist. * `copy_strings` - Normally, when strings are decoded, they are created as sub-binaries of the input data. With some workloads, this leads to an undesirable bloating of memory: Strings in the decode @@ -91,8 +101,8 @@ The options for encode are: Data Format ----------- - Erlang JSON Erlang - ========================================================================== + Erlang JSON Erlang Notes + ====================================================================================== null -> null -> null true -> true -> true @@ -107,11 +117,17 @@ Data Format {[]} -> {} -> {[]} {[{foo, bar}]} -> {"foo": "bar"} -> {[{<<"foo">>, <<"bar">>}]} {[{<<"foo">>, <<"bar">>}]} -> {"foo": "bar"} -> {[{<<"foo">>, <<"bar">>}]} - #{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{<<"foo">> => <<"bar">>} + #{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{<<"foo">> => <<"bar">>} (1) + {[{<<"foo">>, <<"bar">>}]} -> {"foo": "bar"} -> {[{foo, <<"bar">>}]} (2) + #{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{foo => <<"bar">>} (1, 2) + +Note 1: This entry is only valid for VM's that support the `maps` data type + (i.e., 17.0 and newer) and client code must pass the `return_maps` + option to `jiffy:decode/2`. + +Note 2: This entry is only valid if the atom existed before and the client code must + pass the `{labels, attempt_atom}` option to `jiffy:decode/2`. -N.B. The last entry in this table is only valid for VM's that support -the `maps` data type (i.e., 17.0 and newer) and client code must pass -the `return_maps` option to `jiffy:decode/2`. Improvements over EEP0018 ------------------------- diff --git a/c_src/decoder.c b/c_src/decoder.c index 8f78117c..de5cb1cb 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -55,6 +55,7 @@ typedef struct { int return_trailer; int dedupe_keys; int copy_strings; + js_labels labels; ERL_NIF_TERM null_term; unsigned char* p; @@ -85,6 +86,7 @@ dec_new(ErlNifEnv* env) d->return_trailer = 0; d->dedupe_keys = 0; d->copy_strings = 0; + d->labels = jsl_binary; d->null_term = d->atoms->atom_null; d->p = NULL; @@ -651,6 +653,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) ERL_NIF_TERM tmp_argv[5]; ERL_NIF_TERM opts; ERL_NIF_TERM val; + const ERL_NIF_TERM *tuple; + int arity; if(argc != 2) { return enif_make_badarg(env); @@ -695,6 +699,20 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) d->null_term = d->atoms->atom_nil; } else if(get_null_term(env, val, &(d->null_term))) { continue; + } else if(enif_get_tuple(env, val, &arity, &tuple) + && arity == 2 + && enif_is_identical(tuple[0], d->atoms->atom_labels)) { + if (enif_is_identical(tuple[1], d->atoms->atom_binary)) { + d->labels = jsl_binary; + } else if (enif_is_identical(tuple[1], d->atoms->atom_atom)) { + d->labels = jsl_atom; + } else if (enif_is_identical(tuple[1], d->atoms->atom_existing_atom)) { + d->labels = jsl_existing_atom; + } else if (enif_is_identical(tuple[1], d->atoms->atom_attempt_atom)) { + d->labels = jsl_attempt_atom; + } else { + return enif_make_badarg(env); + } } else { return enif_make_badarg(env); } @@ -980,7 +998,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } dec_pop_assert(d, st_value); if(!make_object(env, curr, &val, - d->return_maps, d->dedupe_keys)) { + d->return_maps, d->dedupe_keys, d->labels)) { ret = dec_error(d, "internal_object_error"); goto done; } diff --git a/c_src/encoder.c b/c_src/encoder.c index 4cfb3533..c82551a2 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -257,21 +257,14 @@ enc_special_character(Encoder* e, int val) { } } +#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17) + static int -enc_atom(Encoder* e, ERL_NIF_TERM val) +enc_latin1(Encoder* e, unsigned char *data, size_t size) { static const int MAX_ESCAPE_LEN = 12; - unsigned char data[512]; - - size_t size; int i; - if(!enif_get_atom(e->env, val, (char*)data, 512, ERL_NIF_LATIN1)) { - return 0; - } - - size = strlen((const char*)data); - /* Reserve space for the first quotation mark and most of the output. */ if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) { return 0; @@ -311,27 +304,20 @@ enc_atom(Encoder* e, ERL_NIF_TERM val) e->count++; return 1; + } +#endif + static int -enc_string(Encoder* e, ERL_NIF_TERM val) +enc_utf8(Encoder* e, unsigned char *data, size_t size) { static const int MAX_ESCAPE_LEN = 12; - ErlNifBinary bin; - - unsigned char* data; - size_t size; int esc_len; int ulen; int uval; int i; - if(!enif_inspect_binary(e->env, val, &bin)) { - return 0; - } - - data = bin.data; - size = bin.size; /* Reserve space for the first quotation mark and most of the output. */ if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) { @@ -386,6 +372,42 @@ enc_string(Encoder* e, ERL_NIF_TERM val) return 1; } +static int +enc_atom(Encoder* e, ERL_NIF_TERM val) +{ + unsigned char data[1024]; + +#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17) + + if(!enif_get_atom(e->env, val, (char*)data, sizeof(data), ERL_NIF_LATIN1)) { + return 0; + } + + return enc_latin1(e, data, strlen((const char*)data)); + +#else + + if(!enif_get_atom(e->env, val, (char*)data, sizeof(data), ERL_NIF_UTF8)) { + return 0; + } + + return enc_utf8(e, data, strlen((const char*)data)); + +#endif +} + +static int +enc_string(Encoder* e, ERL_NIF_TERM val) +{ + ErlNifBinary bin; + + if(!enif_inspect_binary(e->env, val, &bin)) { + return 0; + } + + return enc_utf8(e, bin.data, bin.size); +} + static inline int enc_object_key(ErlNifEnv *env, Encoder* e, ERL_NIF_TERM val) { diff --git a/c_src/jiffy.c b/c_src/jiffy.c index dfca7c70..29d42a1b 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -35,6 +35,11 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); st->atom_copy_strings = make_atom(env, "copy_strings"); + st->atom_labels = make_atom(env, "labels"); + st->atom_binary = make_atom(env, "binary"); + st->atom_atom = make_atom(env, "atom"); + st->atom_existing_atom = make_atom(env, "existing_atom"); + st->atom_attempt_atom = make_atom(env, "attempt_atom"); // Markers used in encoding st->ref_object = make_atom(env, "$object_ref$"); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 9c97945d..a5d14626 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -44,6 +44,11 @@ typedef struct { ERL_NIF_TERM atom_escape_forward_slashes; ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM atom_copy_strings; + ERL_NIF_TERM atom_labels; + ERL_NIF_TERM atom_binary; + ERL_NIF_TERM atom_atom; + ERL_NIF_TERM atom_existing_atom; + ERL_NIF_TERM atom_attempt_atom; ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; @@ -52,6 +57,13 @@ typedef struct { ErlNifResourceType* res_enc; } jiffy_st; +typedef enum { + jsl_binary=0, + jsl_atom, + jsl_existing_atom, + jsl_attempt_atom +} js_labels; + ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); ERL_NIF_TERM make_ok(jiffy_st* st, ErlNifEnv* env, ERL_NIF_TERM data); ERL_NIF_TERM make_error(jiffy_st* st, ErlNifEnv* env, const char* error); @@ -72,7 +84,7 @@ void dec_destroy(ErlNifEnv* env, void* obj); void enc_destroy(ErlNifEnv* env, void* obj); int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, - int ret_map, int dedupe_keys); + int ret_map, int dedupe_keys, js_labels labels); int int_from_hex(const unsigned char* p); int int_to_hex(int val, unsigned char* p); @@ -85,4 +97,9 @@ int unicode_from_pair(int hi, int lo); int unicode_uescape(int c, unsigned char* buf); int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val); +#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17) +/* ERL_NIF_UTF8 was introduce in OTP-26, fall back to LATIN1 for older versions */ +#define ERL_NIF_UTF8 ERL_NIF_LATIN1 +#endif + #endif // Included JIFFY_H diff --git a/c_src/objects.cc b/c_src/objects.cc index 1a16699c..47f8e151 100644 --- a/c_src/objects.cc +++ b/c_src/objects.cc @@ -17,12 +17,63 @@ BEGIN_C +#include "jiffy.h" + +#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17) + +/* enif_make_new_atom was introduce in OTP-26 */ + +static int enif_make_new_atom_len(ErlNifEnv *env, const char *name, size_t len, + ERL_NIF_TERM *atom, ErlNifCharEncoding encoding) +{ + *atom = enif_make_atom_len(env, name, len); + return !enif_is_exception(env, *atom); +} + +#endif + +static int +make_key(ErlNifEnv* env, ERL_NIF_TERM key, ERL_NIF_TERM *deckey, js_labels labels) { + ErlNifBinary bin; + + switch (labels) { + case jsl_binary: + *deckey = key; + return 1; + + case jsl_atom: + if (!enif_inspect_binary(env, key, &bin)) + return 0; + if (enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8)) { + return 1; + } + if (enif_make_new_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8)) { + return 1; + } + return 0; + + case jsl_existing_atom: + if (!enif_inspect_binary(env, key, &bin)) + return 0; + return enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8); + + case jsl_attempt_atom: + if (!enif_inspect_binary(env, key, &bin)) + return 0; + if (!enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8)) + *deckey = key; + return 1; + } + + return 0; +} + int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, - int ret_map, int dedupe_keys) + int ret_map, int dedupe_keys, js_labels labels) { ERL_NIF_TERM ret; - ERL_NIF_TERM key; + ERL_NIF_TERM key, deckey; ERL_NIF_TERM val; std::set seen; @@ -37,8 +88,10 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, if(!enif_get_list_cell(env, pairs, &key, &pairs)) { assert(0 == 1 && "Unbalanced object pairs."); } - if(!enif_get_map_value(env, ret, key, &old_val)) { - if(!enif_make_map_put(env, ret, key, val, &ret)) { + if (!make_key(env, key, &deckey, labels)) + return 0; + if(!enif_get_map_value(env, ret, deckey, &old_val)) { + if(!enif_make_map_put(env, ret, deckey, val, &ret)) { return 0; } } @@ -53,6 +106,8 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, if(!enif_get_list_cell(env, pairs, &key, &pairs)) { assert(0 == 1 && "Unbalanced object pairs."); } + if (!make_key(env, key, &deckey, labels)) + return 0; if(dedupe_keys) { ErlNifBinary bin; if(!enif_inspect_binary(env, key, &bin)) { @@ -61,11 +116,11 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, std::string skey((char*) bin.data, bin.size); if(seen.count(skey) == 0) { seen.insert(skey); - val = enif_make_tuple2(env, key, val); + val = enif_make_tuple2(env, deckey, val); ret = enif_make_list_cell(env, val, ret); } } else { - val = enif_make_tuple2(env, key, val); + val = enif_make_tuple2(env, deckey, val); ret = enif_make_list_cell(env, val, ret); } } diff --git a/c_src/util.c b/c_src/util.c index f6dcb68c..89aa0888 100644 --- a/c_src/util.c +++ b/c_src/util.c @@ -8,7 +8,7 @@ ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name) { ERL_NIF_TERM ret; - if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) { + if(enif_make_existing_atom(env, name, &ret, ERL_NIF_UTF8)) { return ret; } return enif_make_atom(env, name); diff --git a/src/jiffy.erl b/src/jiffy.erl index 6af15b5f..d1cf8b9f 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -41,6 +41,10 @@ | return_trailer | dedupe_keys | copy_strings + | {labels, binary} + | {labels, atom} + | {labels, attempt_atom} + | {labels, existing_atom} | {null_term, any()} | {bytes_per_iter, non_neg_integer()} | {bytes_per_red, non_neg_integer()}. diff --git a/test/jiffy_18_atom_tests.erl b/test/jiffy_18_atom_tests.erl new file mode 100644 index 00000000..6e0e6057 --- /dev/null +++ b/test/jiffy_18_atom_tests.erl @@ -0,0 +1,38 @@ +% This file is part of Jiffy released under the MIT license. +% See the LICENSE file for more information. + +-module(jiffy_18_atom_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include("jiffy_util.hrl"). + +attempt_atom_test() -> + Opts = [{labels, atom}], + K1 = mk_key(), + K2 = mk_key(), + Json = enc({[{K1, 1}, {K2, 2}, {foo, 3}]}), + {Props} = dec(Json, Opts), + ?_assertEqual(3, length(Props)), + [?_assertEqual(true, is_atom(K)) || {K, _V} <- Props]. + +attempt_atom_dedupe_keys_test() -> + Opts = [{labels, atom}, dedupe_keys], + K1 = mk_key(), + Json = enc({[{K1, 1}, {K1, 2}]}), + {[{K, V}]} = dec(Json, Opts), + ?_assertEqual(true, is_atom(K)), + ?_assertEqual(K1, atom_to_binary(K)), + ?_assertEqual(V, 2). + +-ifndef(JIFFY_NO_MAPS). + +attempt_atom_map_test() -> + Opts = [{labels, atom}, return_maps], + K1 = mk_key(), + K2 = mk_key(), + Json = enc({[{K1, 1}, {K2, 2}, {foo, 3}]}), + Map = dec(Json, Opts), + ?_assertEqual(3, map_size(Map)), + maps:map(fun(K, _) -> ?_assertEqual(true, is_atom(K)) end, Map). + +-endif. diff --git a/test/jiffy_19_existing_atom_tests.erl b/test/jiffy_19_existing_atom_tests.erl new file mode 100644 index 00000000..89fd594a --- /dev/null +++ b/test/jiffy_19_existing_atom_tests.erl @@ -0,0 +1,49 @@ +% This file is part of Jiffy released under the MIT license. +% See the LICENSE file for more information. + +-module(jiffy_19_existing_atom_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include("jiffy_util.hrl"). + +existing_atom_test() -> + Opts = [{labels, existing_atom}], + Json = {[{key_is_atom, 1}]}, + Data = enc(Json), + ?_assertMatch(Json, jiffy:decode(Data, Opts)). + +existing_atom_no_atom_test() -> + Opts = [{labels, existing_atom}], + Json = {[{<<"key_is_no_atom">>, 1}]}, + Data = enc(Json), + ?_assertException(exit, _, jiffy:decode(Data, Opts)). + +existing_atom_dedupe_keys_test_() -> + Opts = [{labels, existing_atom}, dedupe_keys], + Cases = [ + % Basic test + { + {[{<<"foo">>, 1}, {<<"foo">>, 2}]}, + {[{foo, 2}]} + } + ], + {"Test _dedupe_keys", lists:map(fun({Data, Result}) -> + Json = jiffy:encode(Data), + ?_assertEqual(Result, jiffy:decode(Json, Opts)) + end, Cases)}. + +-ifndef(JIFFY_NO_MAPS). + +existing_atom_map_test() -> + Opts = [{labels, existing_atom}, return_maps], + Json = #{key_is_atom => 1}, + Data = enc(Json), + ?_assertMatch(Json, jiffy:decode(Data, Opts)). + +existing_atom_map_no_atom_test() -> + Opts = [{labels, existing_atom}, return_maps], + Json = #{<<"key_is_no_atom">> => 1}, + Data = enc(Json), + ?_assertException(exit, _, jiffy:decode(Data, Opts)). + +-endif. diff --git a/test/jiffy_20_attempt_atom_tests.erl b/test/jiffy_20_attempt_atom_tests.erl new file mode 100644 index 00000000..c266a290 --- /dev/null +++ b/test/jiffy_20_attempt_atom_tests.erl @@ -0,0 +1,47 @@ +% This file is part of Jiffy released under the MIT license. +% See the LICENSE file for more information. + +-module(jiffy_20_attempt_atom_tests). + +-include_lib("eunit/include/eunit.hrl"). + +attempt_atom_test_() -> + Opts = [{labels, attempt_atom}], + _ = key_is_atom, + Cases = [ + {<<"{\"key_no_atom\":1}">>, {[{<<"key_no_atom">>, 1}]}}, + {<<"{\"key_is_atom\":1}">>, {[{key_is_atom, 1}]}} + ], + {"Test attempt_atom", lists:map(fun({Data, Result}) -> + ?_assertEqual(Result, jiffy:decode(Data, Opts)) + end, Cases)}. + +attempt_atom_dedupe_keys_test_() -> + Opts = [{labels, attempt_atom}, dedupe_keys], + _ = foo, + Cases = [ + % Basic test + { + {[{<<"foo">>, 1}, {<<"foo">>, 2}]}, + {[{foo, 2}]} + } + ], + {"Test _dedupe_keys", lists:map(fun({Data, Result}) -> + Json = jiffy:encode(Data), + ?_assertEqual(Result, jiffy:decode(Json, Opts)) + end, Cases)}. + +-ifndef(JIFFY_NO_MAPS). + +attempt_atom_map_test_() -> + Opts = [{labels, attempt_atom}, return_maps], + _ = key_is_atom, + Cases = [ + {<<"{\"key_no_atom\":1}">>, #{<<"key_no_atom">> => 1}}, + {<<"{\"key_is_atom\":1}">>, #{key_is_atom => 1}} + ], + {"Test attempt_atom_map", lists:map(fun({Data, Result}) -> + ?_assertEqual(Result, jiffy:decode(Data, Opts)) + end, Cases)}. + +-endif. diff --git a/test/jiffy_util.hrl b/test/jiffy_util.hrl index 983f7e3b..a651d83b 100644 --- a/test/jiffy_util.hrl +++ b/test/jiffy_util.hrl @@ -45,3 +45,7 @@ cases_path(Suffix) -> end, Path = "test/cases", filename:join([Prefix, Path, Suffix]). + +mk_key() -> + erlang:iolist_to_binary( + [<<"key-">>, erlang:integer_to_binary(erlang:unique_integer([positive]))]).