Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ The options for decode are:
will ensure that the parsed object only contains a single entry
containing the last value seen. This mirrors the parsing beahvior
of virtually every other JSON parser.
* `{labels, Label}` - Controls if keys are returned as atoms. `Label`
accepts the following options:
* `binary` - the default, return all keys as binaries.
* `atom` - convert all keys to atoms. Use with caution, this can exhaust the atom space.
Creating atoms from a NIF with UTF-8 characters is only supported since OTP-26.
Attempting to decode JSON that has non 7bit ASCII will result in atoms with weird content
when the NIF has been compiled for an older OTP releases.
* `existing_atom` - convert keys to atoms, the atoms must exists or the decode will fail.
* `attempt_atom` - try to convert keys to existing atoms,
return key as binary if the atom does not exist.
* `copy_strings` - Normally, when strings are decoded, they are
created as sub-binaries of the input data. With some workloads, this
leads to an undesirable bloating of memory: Strings in the decode
Expand Down Expand Up @@ -91,8 +101,8 @@ The options for encode are:
Data Format
-----------

Erlang JSON Erlang
==========================================================================
Erlang JSON Erlang Notes
======================================================================================

null -> null -> null
true -> true -> true
Expand All @@ -107,11 +117,17 @@ Data Format
{[]} -> {} -> {[]}
{[{foo, bar}]} -> {"foo": "bar"} -> {[{<<"foo">>, <<"bar">>}]}
{[{<<"foo">>, <<"bar">>}]} -> {"foo": "bar"} -> {[{<<"foo">>, <<"bar">>}]}
#{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{<<"foo">> => <<"bar">>}
#{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{<<"foo">> => <<"bar">>} (1)
{[{<<"foo">>, <<"bar">>}]} -> {"foo": "bar"} -> {[{foo, <<"bar">>}]} (2)
#{<<"foo">> => <<"bar">>} -> {"foo": "bar"} -> #{foo => <<"bar">>} (1, 2)

Note 1: This entry is only valid for VM's that support the `maps` data type
(i.e., 17.0 and newer) and client code must pass the `return_maps`
option to `jiffy:decode/2`.

Note 2: This entry is only valid if the atom existed before and the client code must
pass the `{labels, attempt_atom}` option to `jiffy:decode/2`.

N.B. The last entry in this table is only valid for VM's that support
the `maps` data type (i.e., 17.0 and newer) and client code must pass
the `return_maps` option to `jiffy:decode/2`.

Improvements over EEP0018
-------------------------
Expand Down
20 changes: 19 additions & 1 deletion c_src/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ typedef struct {
int return_trailer;
int dedupe_keys;
int copy_strings;
js_labels labels;
ERL_NIF_TERM null_term;

unsigned char* p;
Expand Down Expand Up @@ -85,6 +86,7 @@ dec_new(ErlNifEnv* env)
d->return_trailer = 0;
d->dedupe_keys = 0;
d->copy_strings = 0;
d->labels = jsl_binary;
d->null_term = d->atoms->atom_null;

d->p = NULL;
Expand Down Expand Up @@ -651,6 +653,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
ERL_NIF_TERM tmp_argv[5];
ERL_NIF_TERM opts;
ERL_NIF_TERM val;
const ERL_NIF_TERM *tuple;
int arity;

if(argc != 2) {
return enif_make_badarg(env);
Expand Down Expand Up @@ -695,6 +699,20 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
d->null_term = d->atoms->atom_nil;
} else if(get_null_term(env, val, &(d->null_term))) {
continue;
} else if(enif_get_tuple(env, val, &arity, &tuple)
&& arity == 2
&& enif_is_identical(tuple[0], d->atoms->atom_labels)) {
if (enif_is_identical(tuple[1], d->atoms->atom_binary)) {
d->labels = jsl_binary;
} else if (enif_is_identical(tuple[1], d->atoms->atom_atom)) {
d->labels = jsl_atom;
} else if (enif_is_identical(tuple[1], d->atoms->atom_existing_atom)) {
d->labels = jsl_existing_atom;
} else if (enif_is_identical(tuple[1], d->atoms->atom_attempt_atom)) {
d->labels = jsl_attempt_atom;
} else {
return enif_make_badarg(env);
}
} else {
return enif_make_badarg(env);
}
Expand Down Expand Up @@ -980,7 +998,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
}
dec_pop_assert(d, st_value);
if(!make_object(env, curr, &val,
d->return_maps, d->dedupe_keys)) {
d->return_maps, d->dedupe_keys, d->labels)) {
ret = dec_error(d, "internal_object_error");
goto done;
}
Expand Down
64 changes: 43 additions & 21 deletions c_src/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,21 +257,14 @@ enc_special_character(Encoder* e, int val) {
}
}

#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17)

static int
enc_atom(Encoder* e, ERL_NIF_TERM val)
enc_latin1(Encoder* e, unsigned char *data, size_t size)
{
static const int MAX_ESCAPE_LEN = 12;
unsigned char data[512];

size_t size;
int i;

if(!enif_get_atom(e->env, val, (char*)data, 512, ERL_NIF_LATIN1)) {
return 0;
}

size = strlen((const char*)data);

/* Reserve space for the first quotation mark and most of the output. */
if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) {
return 0;
Expand Down Expand Up @@ -311,27 +304,20 @@ enc_atom(Encoder* e, ERL_NIF_TERM val)
e->count++;

return 1;

}

#endif

static int
enc_string(Encoder* e, ERL_NIF_TERM val)
enc_utf8(Encoder* e, unsigned char *data, size_t size)
{
static const int MAX_ESCAPE_LEN = 12;
ErlNifBinary bin;

unsigned char* data;
size_t size;
int esc_len;
int ulen;
int uval;
int i;

if(!enif_inspect_binary(e->env, val, &bin)) {
return 0;
}

data = bin.data;
size = bin.size;

/* Reserve space for the first quotation mark and most of the output. */
if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) {
Expand Down Expand Up @@ -386,6 +372,42 @@ enc_string(Encoder* e, ERL_NIF_TERM val)
return 1;
}

static int
enc_atom(Encoder* e, ERL_NIF_TERM val)
{
unsigned char data[1024];

#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17)

if(!enif_get_atom(e->env, val, (char*)data, sizeof(data), ERL_NIF_LATIN1)) {
return 0;
}

return enc_latin1(e, data, strlen((const char*)data));

#else

if(!enif_get_atom(e->env, val, (char*)data, sizeof(data), ERL_NIF_UTF8)) {
return 0;
}

return enc_utf8(e, data, strlen((const char*)data));

#endif
}

static int
enc_string(Encoder* e, ERL_NIF_TERM val)
{
ErlNifBinary bin;

if(!enif_inspect_binary(e->env, val, &bin)) {
return 0;
}

return enc_utf8(e, bin.data, bin.size);
}

static inline int
enc_object_key(ErlNifEnv *env, Encoder* e, ERL_NIF_TERM val)
{
Expand Down
5 changes: 5 additions & 0 deletions c_src/jiffy.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
st->atom_copy_strings = make_atom(env, "copy_strings");
st->atom_labels = make_atom(env, "labels");
st->atom_binary = make_atom(env, "binary");
st->atom_atom = make_atom(env, "atom");
st->atom_existing_atom = make_atom(env, "existing_atom");
st->atom_attempt_atom = make_atom(env, "attempt_atom");

// Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$");
Expand Down
19 changes: 18 additions & 1 deletion c_src/jiffy.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ typedef struct {
ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM atom_copy_strings;
ERL_NIF_TERM atom_labels;
ERL_NIF_TERM atom_binary;
ERL_NIF_TERM atom_atom;
ERL_NIF_TERM atom_existing_atom;
ERL_NIF_TERM atom_attempt_atom;

ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array;
Expand All @@ -52,6 +57,13 @@ typedef struct {
ErlNifResourceType* res_enc;
} jiffy_st;

typedef enum {
jsl_binary=0,
jsl_atom,
jsl_existing_atom,
jsl_attempt_atom
} js_labels;

ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name);
ERL_NIF_TERM make_ok(jiffy_st* st, ErlNifEnv* env, ERL_NIF_TERM data);
ERL_NIF_TERM make_error(jiffy_st* st, ErlNifEnv* env, const char* error);
Expand All @@ -72,7 +84,7 @@ void dec_destroy(ErlNifEnv* env, void* obj);
void enc_destroy(ErlNifEnv* env, void* obj);

int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys);
int ret_map, int dedupe_keys, js_labels labels);

int int_from_hex(const unsigned char* p);
int int_to_hex(int val, unsigned char* p);
Expand All @@ -85,4 +97,9 @@ int unicode_from_pair(int hi, int lo);
int unicode_uescape(int c, unsigned char* buf);
int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val);

#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17)
/* ERL_NIF_UTF8 was introduce in OTP-26, fall back to LATIN1 for older versions */
#define ERL_NIF_UTF8 ERL_NIF_LATIN1
#endif

#endif // Included JIFFY_H
67 changes: 61 additions & 6 deletions c_src/objects.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,63 @@

BEGIN_C

#include "jiffy.h"

#if (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION < 17)

/* enif_make_new_atom was introduce in OTP-26 */

static int enif_make_new_atom_len(ErlNifEnv *env, const char *name, size_t len,
ERL_NIF_TERM *atom, ErlNifCharEncoding encoding)
{
*atom = enif_make_atom_len(env, name, len);
return !enif_is_exception(env, *atom);
}

#endif

static int
make_key(ErlNifEnv* env, ERL_NIF_TERM key, ERL_NIF_TERM *deckey, js_labels labels) {
ErlNifBinary bin;

switch (labels) {
case jsl_binary:
*deckey = key;
return 1;

case jsl_atom:
if (!enif_inspect_binary(env, key, &bin))
return 0;
if (enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8)) {
return 1;
}
if (enif_make_new_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8)) {
return 1;
}
return 0;

case jsl_existing_atom:
if (!enif_inspect_binary(env, key, &bin))
return 0;
return enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8);

case jsl_attempt_atom:
if (!enif_inspect_binary(env, key, &bin))
return 0;
if (!enif_make_existing_atom_len(env, (char *)bin.data, bin.size, deckey, ERL_NIF_UTF8))
*deckey = key;
return 1;
}

return 0;
}

int
make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys)
int ret_map, int dedupe_keys, js_labels labels)
{
ERL_NIF_TERM ret;
ERL_NIF_TERM key;
ERL_NIF_TERM key, deckey;
ERL_NIF_TERM val;

std::set<std::string> seen;
Expand All @@ -37,8 +88,10 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
if(!enif_get_map_value(env, ret, key, &old_val)) {
if(!enif_make_map_put(env, ret, key, val, &ret)) {
if (!make_key(env, key, &deckey, labels))
return 0;
if(!enif_get_map_value(env, ret, deckey, &old_val)) {
if(!enif_make_map_put(env, ret, deckey, val, &ret)) {
return 0;
}
}
Expand All @@ -53,6 +106,8 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
if (!make_key(env, key, &deckey, labels))
return 0;
if(dedupe_keys) {
ErlNifBinary bin;
if(!enif_inspect_binary(env, key, &bin)) {
Expand All @@ -61,11 +116,11 @@ make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
std::string skey((char*) bin.data, bin.size);
if(seen.count(skey) == 0) {
seen.insert(skey);
val = enif_make_tuple2(env, key, val);
val = enif_make_tuple2(env, deckey, val);
ret = enif_make_list_cell(env, val, ret);
}
} else {
val = enif_make_tuple2(env, key, val);
val = enif_make_tuple2(env, deckey, val);
ret = enif_make_list_cell(env, val, ret);
}
}
Expand Down
2 changes: 1 addition & 1 deletion c_src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ERL_NIF_TERM
make_atom(ErlNifEnv* env, const char* name)
{
ERL_NIF_TERM ret;
if(enif_make_existing_atom(env, name, &ret, ERL_NIF_LATIN1)) {
if(enif_make_existing_atom(env, name, &ret, ERL_NIF_UTF8)) {
return ret;
}
return enif_make_atom(env, name);
Expand Down
4 changes: 4 additions & 0 deletions src/jiffy.erl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
| return_trailer
| dedupe_keys
| copy_strings
| {labels, binary}
| {labels, atom}
| {labels, attempt_atom}
| {labels, existing_atom}
| {null_term, any()}
| {bytes_per_iter, non_neg_integer()}
| {bytes_per_red, non_neg_integer()}.
Expand Down
Loading