From 1909194cf8bbaf5943b740078d44922a5eba7dbf Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Thu, 26 Jun 2025 08:53:39 +0200 Subject: [PATCH 01/11] Upgrade CRoaring from v4.1.1 to v4.3.5 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 20249 ++++++++++++++++--------------- pyroaring/roaring.h | 245 +- 3 files changed, 10906 insertions(+), 9590 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index fc2e1e7..ae31b3f 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.1.1" \ No newline at end of file +__croaring_version__ = "v4.3.5" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 21489cd..a914df2 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-07-30T19:32:00Z +// Created by amalgamation.sh on 2025-06-05T04:01:50Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -1494,7 +1494,7 @@ bool array_container_validate(const array_container_t *v, const char **reason); * Return the serialized size in bytes of a container having cardinality "card". */ static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { - return card * 2 + 2; + return card * sizeof(uint16_t); } /** @@ -6758,8 +6758,8 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); * chunks _differ_. This means that if there are two entries with different * high 48 bits, then there is only one inner node containing the common key * prefix, and two leaves. - * * Intrusive leaves: the leaf struct is included in user values. This removes - * a layer of indirection. + * * Mostly pointer-free: nodes are referred to by index rather than pointer, + * so that the structure can be deserialized with a backing buffer. */ // Fixed length of keys in the ART. All keys are assumed to be of this length. @@ -6772,25 +6772,33 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; -typedef struct art_node_s art_node_t; + +// Internal node reference type. Contains the node typecode in the low 8 bits, +// and the index in the relevant node array in the high 48 bits. Has a value of +// CROARING_ART_NULL_REF when pointing to a non-existent node. +typedef uint64_t art_ref_t; + +typedef void art_node_t; /** - * Wrapper to allow an empty tree. + * The ART is empty when root is a null ref. + * + * Each node type has its own dynamic array of node structs, indexed by + * art_ref_t. The arrays are expanded as needed, and shrink only when + * `shrink_to_fit` is called. */ typedef struct art_s { - art_node_t *root; + art_ref_t root; + + // Indexed by node typecode, thus 1 larger than they need to be for + // convenience. `first_free` indicates the index where the first free node + // lives, which may be equal to the capacity. + uint64_t first_free[6]; + uint64_t capacities[6]; + art_node_t *nodes[6]; } art_t; -/** - * Values inserted into the tree have to be cast-able to art_val_t. This - * improves performance by reducing indirection. - * - * NOTE: Value pointers must be unique! This is because each value struct - * contains the key corresponding to the value. - */ -typedef struct art_val_s { - art_key_chunk_t key[ART_KEY_BYTES]; -} art_val_t; +typedef uint64_t art_val_t; /** * Compares two keys, returns their relative order: @@ -6802,14 +6810,21 @@ int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]); /** - * Inserts the given key and value. + * Initializes the ART. + */ +void art_init_cleared(art_t *art); + +/** + * Inserts the given key and value. Returns a pointer to the value inserted, + * valid as long as the ART is not modified. */ -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val); /** - * Returns the value erased, NULL if not found. + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. */ -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val); /** * Returns the value associated with the given key, NULL if not found. @@ -6822,42 +6837,39 @@ art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); bool art_is_empty(const art_t *art); /** - * Frees the nodes of the ART except the values, which the user is expected to - * free. + * Frees the contents of the ART. Should not be called when using + * `art_deserialize_frozen_safe`. */ void art_free(art_t *art); -/** - * Returns the size in bytes of the ART. Includes size of pointers to values, - * but not the values themselves. - */ -size_t art_size_in_bytes(const art_t *art); - /** * Prints the ART using printf, useful for debugging. */ void art_printf(const art_t *art); /** - * Callback for validating the value stored in a leaf. + * Callback for validating the value stored in a leaf. `context` is a + * user-provided value passed to the callback without modification. * * Should return true if the value is valid, false otherwise * If false is returned, `*reason` should be set to a static string describing * the reason for the failure. */ -typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason); +typedef bool (*art_validate_cb_t)(const art_val_t val, const char **reason, + void *context); /** - * Validate the ART tree, ensuring it is internally consistent. + * Validate the ART tree, ensuring it is internally consistent. `context` is a + * user-provided value passed to the callback without modification. */ bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb); + art_validate_cb_t validate_cb, void *context); /** * ART-internal iterator bookkeeping. Users should treat this as an opaque type. */ typedef struct art_iterator_frame_s { - art_node_t *node; + art_ref_t ref; uint8_t index_in_node; } art_iterator_frame_t; @@ -6869,6 +6881,8 @@ typedef struct art_iterator_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t *value; + art_t *art; + uint8_t depth; // Key depth uint8_t frame; // Node depth @@ -6882,19 +6896,19 @@ typedef struct art_iterator_s { * depending on `first`. The iterator is not valid if there are no entries in * the ART. */ -art_iterator_t art_init_iterator(const art_t *art, bool first); +art_iterator_t art_init_iterator(art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than * the given key, if it exists. */ -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key); /** * Returns an initialized iterator positioned at a key greater than the given * key, if it exists. */ -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key); /** * The following iterator movement functions return true if a new entry was @@ -6913,14 +6927,49 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, /** * Insert the value and positions the iterator at the key. */ -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val); +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val); /** * Erase the value pointed at by the iterator. Moves the iterator to the next - * leaf. Returns the value erased or NULL if nothing was erased. + * leaf. + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. + */ +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); + +/** + * Shrinks the internal arrays in the ART to remove any unused elements. Returns + * the number of bytes freed. + */ +size_t art_shrink_to_fit(art_t *art); + +/** + * Returns true if the ART has no unused elements. + */ +bool art_is_shrunken(const art_t *art); + +/** + * Returns the serialized size in bytes. + * Requires `art_shrink_to_fit` to be called first. + */ +size_t art_size_in_bytes(const art_t *art); + +/** + * Serializes the ART and returns the number of bytes written. Returns 0 on + * error. Requires `art_shrink_to_fit` to be called first. + */ +size_t art_serialize(const art_t *art, char *buf); + +/** + * Deserializes the ART from a serialized buffer, reading up to `maxbytes` + * bytes. Returns 0 on error. Requires `buf` to be 8 byte aligned. + * + * An ART deserialized in this way should only be used in a readonly context.The + * underlying buffer must not be freed before the ART. `art_free` should not be + * called on the ART deserialized in this way. */ -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art); #ifdef __cplusplus } // extern "C" @@ -9102,37 +9151,36 @@ CROARING_UNTARGET_AVX512 #endif/* end file src/array_util.c */ /* begin file src/art/art.c */ #include +#include #include #include -#define CROARING_ART_NODE4_TYPE 0 -#define CROARING_ART_NODE16_TYPE 1 -#define CROARING_ART_NODE48_TYPE 2 -#define CROARING_ART_NODE256_TYPE 3 -#define CROARING_ART_NUM_TYPES 4 +#define CROARING_ART_NULL_REF 0 + +#define CROARING_ART_LEAF_TYPE 1 +#define CROARING_ART_NODE4_TYPE 2 +#define CROARING_ART_NODE16_TYPE 3 +#define CROARING_ART_NODE48_TYPE 4 +#define CROARING_ART_NODE256_TYPE 5 + +#define CROARING_ART_MIN_TYPE CROARING_ART_LEAF_TYPE +#define CROARING_ART_MAX_TYPE CROARING_ART_NODE256_TYPE // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 +#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) -// We use the least significant bit of node pointers to indicate whether a node -// is a leaf or an inner node. This is never surfaced to the user. -// -// Using pointer tagging to indicate leaves not only saves a bit of memory by -// sparing the typecode, but also allows us to use an intrusive leaf struct. -// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon -// deallocation of the ART, we know not to free the leaves without having to -// dereference the leaf pointers. -// -// All internal operations on leaves should use CROARING_CAST_LEAF before using -// the leaf. The only places that use CROARING_SET_LEAF are locations where a -// field is directly assigned to a leaf pointer. After using CROARING_SET_LEAF, -// the leaf should be treated as a node of unknown type. -#define CROARING_IS_LEAF(p) (((uintptr_t)(p) & 1)) -#define CROARING_SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) -#define CROARING_CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) +#define CROARING_ART_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) -#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) +// Gives the byte difference needed to align the current buffer to the +// alignment, relative to the start of the buffer. +#define CROARING_ART_ALIGN_SIZE_RELATIVE(buf_cur, buf_start, alignment) \ + ((((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) - \ + (ptrdiff_t)((buf_cur) - (buf_start))) #ifdef __cplusplus extern "C" { @@ -9142,30 +9190,20 @@ namespace internal { typedef uint8_t art_typecode_t; -// Aliasing with a "leaf" naming so that its purpose is clearer in the context -// of the trie internals. -typedef art_val_t art_leaf_t; - -typedef struct art_internal_validate_s { - const char **reason; - art_validate_cb_t validate_cb; - - int depth; - art_key_chunk_t current_key[ART_KEY_BYTES]; -} art_internal_validate_t; - -// Set the reason message, and return false for convenience. -static inline bool art_validate_fail(const art_internal_validate_t *validate, - const char *msg) { - *validate->reason = msg; - return false; -} +typedef struct art_leaf_s { + union { + struct { + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t val; + }; + uint64_t next_free; + }; +} art_leaf_t; // Inner node, with prefix. // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { - art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; } art_inner_node_t; @@ -9174,119 +9212,223 @@ typedef struct art_inner_node_s { // Node4: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node4_s { - art_inner_node_t base; - uint8_t count; - uint8_t keys[4]; - art_node_t *children[4]; + union { + struct { + art_inner_node_t base; + uint8_t count; + uint8_t keys[4]; + art_ref_t children[4]; + }; + uint64_t next_free; + }; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { - art_inner_node_t base; - uint8_t count; - uint8_t keys[16]; - art_node_t *children[16]; + union { + struct { + art_inner_node_t base; + uint8_t count; + uint8_t keys[16]; + art_ref_t children[16]; + }; + uint64_t next_free; + }; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != // CROARING_ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct // indexing. typedef struct art_node48_s { - art_inner_node_t base; - uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits are used. - uint64_t available_children; - uint8_t keys[256]; - art_node_t *children[48]; + union { + struct { + art_inner_node_t base; + uint8_t count; + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits + // are used. + uint64_t available_children; + uint8_t keys[256]; + art_ref_t children[48]; + }; + uint64_t next_free; + }; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if // children[i] != NULL. typedef struct art_node256_s { - art_inner_node_t base; - uint16_t count; - art_node_t *children[256]; + union { + struct { + art_inner_node_t base; + uint16_t count; + art_ref_t children[256]; + }; + uint64_t next_free; + }; } art_node256_t; +// Size of each node type, indexed by typecode for convenience. +static const size_t ART_NODE_SIZES[] = { + 0, + sizeof(art_leaf_t), + sizeof(art_node4_t), + sizeof(art_node16_t), + sizeof(art_node48_t), + sizeof(art_node256_t), +}; + // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { - art_node_t *child; + art_ref_t child; uint8_t index; art_key_chunk_t key_chunk; } art_indexed_child_t; -static inline bool art_is_leaf(const art_node_t *node) { - return CROARING_IS_LEAF(node); +typedef struct art_internal_validate_s { + const char **reason; + art_validate_cb_t validate_cb; + void *context; + + int depth; + art_key_chunk_t current_key[ART_KEY_BYTES]; +} art_internal_validate_t; + +// Set the reason message, and return false for convenience. +static inline bool art_validate_fail(const art_internal_validate_t *validate, + const char *msg) { + *validate->reason = msg; + return false; } -static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { - memcpy(leaf->key, key, ART_KEY_BYTES); +static inline art_ref_t art_to_ref(uint64_t index, art_typecode_t typecode) { + return ((art_ref_t)index) << 16 | typecode; +} + +static inline uint64_t art_ref_index(art_ref_t ref) { + return ((uint64_t)ref) >> 16; +} + +static inline art_typecode_t art_ref_typecode(art_ref_t ref) { + return (art_typecode_t)ref; +} + +/** + * Gets a pointer to a node from its reference. The pointer only remains valid + * under non-mutating operations. If any mutating operations occur, this + * function should be called again to get a valid pointer to the node. + */ +static art_node_t *art_deref(const art_t *art, art_ref_t ref) { + assert(ref != CROARING_ART_NULL_REF); + art_typecode_t typecode = art_ref_typecode(ref); + return (art_node_t *)((char *)art->nodes[typecode] + + art_ref_index(ref) * ART_NODE_SIZES[typecode]); +} + +static inline art_node_t *art_get_node(const art_t *art, uint64_t index, + art_typecode_t typecode) { + return art_deref(art, art_to_ref(index, typecode)); +} + +static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + art_node_t *nodes = art->nodes[typecode]; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_leaf_t *)node - (art_leaf_t *)nodes; + case CROARING_ART_NODE4_TYPE: + return (art_node4_t *)node - (art_node4_t *)nodes; + case CROARING_ART_NODE16_TYPE: + return (art_node16_t *)node - (art_node16_t *)nodes; + case CROARING_ART_NODE48_TYPE: + return (art_node48_t *)node - (art_node48_t *)nodes; + case CROARING_ART_NODE256_TYPE: + return (art_node256_t *)node - (art_node256_t *)nodes; + default: + assert(false); + return 0; + } +} + +/** + * Creates a reference from a pointer. + */ +static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + return art_to_ref(art_get_index(art, node, typecode), typecode); } -static inline uint8_t art_get_type(const art_inner_node_t *node) { - return node->typecode; +static inline bool art_is_leaf(art_ref_t ref) { + return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; } static inline void art_init_inner_node(art_inner_node_t *node, - art_typecode_t typecode, const art_key_chunk_t prefix[], uint8_t prefix_size) { - node->typecode = typecode; node->prefix_size = prefix_size; memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); } -static void art_free_node(art_node_t *node); +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode); + +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); // ===================== Start of node-specific functions ====================== -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], + art_val_t val) { + uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); + art_leaf_t *leaf = + ((art_leaf_t *)art->nodes[CROARING_ART_LEAF_TYPE]) + index; + memcpy(leaf->key, key, ART_KEY_BYTES); + leaf->val = val; + return art_to_ref(index, CROARING_ART_LEAF_TYPE); +} + +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key); +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key); -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE4_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); + art_node4_t *node = + ((art_node4_t *)art->nodes[CROARING_ART_NODE4_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node4(art_node4_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); -} - -static inline art_node_t *art_node4_find_child(const art_node4_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key) { if (node->count < 4) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9299,26 +9441,26 @@ static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node16_insert(new_node, node->children[i], node->keys[i]); + art_node16_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node16_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return art_node16_insert(art, new_node, child, key); } -static inline art_node_t *art_node4_erase(art_node4_t *node, - art_key_chunk_t key_chunk) { +static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, + art_key_chunk_t key_chunk) { int idx = -1; for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { @@ -9326,17 +9468,18 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, } } if (idx == -1) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } if (node->count == 2) { // Only one child remains after erasing, so compress the path by // removing this node. uint8_t other_idx = idx ^ 1; - art_node_t *remaining_child = node->children[other_idx]; + art_ref_t remaining_child = node->children[other_idx]; art_key_chunk_t remaining_child_key = node->keys[other_idx]; if (!art_is_leaf(remaining_child)) { // Correct the prefix of the child node. - art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(art, remaining_child); memmove(inner_node->prefix + node->base.prefix_size + 1, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, @@ -9344,7 +9487,7 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } - roaring_free(node); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); return remaining_child; } // Shift other keys to maintain sorted order. @@ -9352,14 +9495,14 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, memmove(node->keys + idx, node->keys + idx + 1, after_next * sizeof(art_key_chunk_t)); memmove(node->children + idx, node->children + idx + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } static inline void art_node4_replace(art_node4_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9373,7 +9516,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9390,7 +9533,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9403,7 +9546,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9423,14 +9566,15 @@ static inline art_indexed_child_t art_node4_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator); -static bool art_node4_internal_validate(const art_node4_t *node, +static bool art_node4_internal_validate(const art_t *art, + const art_node4_t *node, art_internal_validate_t validator) { if (node->count == 0) { return art_validate_fail(&validator, "Node4 has no children"); @@ -9457,41 +9601,36 @@ static bool art_node4_internal_validate(const art_node4_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE16_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); + art_node16_t *node = + ((art_node16_t *)art->nodes[CROARING_ART_NODE16_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node16(art_node16_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); -} - -static inline art_node_t *art_node16_find_child(const art_node16_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key) { if (node->count < 16) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9504,24 +9643,24 @@ static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 16; ++i) { - art_node48_insert(new_node, node->children[i], node->keys[i]); + art_node48_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node48_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_node48_insert(art, new_node, child, key); } -static inline art_node_t *art_node16_erase(art_node16_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, + uint8_t key_chunk) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { // Shift other keys to maintain sorted order. @@ -9529,28 +9668,28 @@ static inline art_node_t *art_node16_erase(art_node16_t *node, memmove(node->keys + i, node->keys + i + 1, after_next * sizeof(key_chunk)); memmove(node->children + i, node->children + i + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; break; } } if (node->count > 4) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node4_t *new_node = - art_node4_create(node->base.prefix, node->base.prefix_size); + art_node4_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node4_insert(new_node, node->children[i], node->keys[i]); + art_node4_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE4_TYPE); } static inline void art_node16_replace(art_node16_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (uint8_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9564,7 +9703,7 @@ static inline art_indexed_child_t art_node16_next_child( art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9581,7 +9720,7 @@ static inline art_indexed_child_t art_node16_prev_child( index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9594,7 +9733,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9614,11 +9753,12 @@ static inline art_indexed_child_t art_node16_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node16_internal_validate(const art_node16_t *node, +static bool art_node16_internal_validate(const art_t *art, + const art_node16_t *node, art_internal_validate_t validator) { if (node->count <= 4) { return art_validate_fail(&validator, "Node16 has too few children"); @@ -9641,18 +9781,20 @@ static bool art_node16_internal_validate(const art_node16_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE48_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); + art_node48_t *node = + ((art_node48_t *)art->nodes[CROARING_ART_NODE48_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; for (size_t i = 0; i < 256; ++i) { @@ -9661,29 +9803,17 @@ static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], return node; } -static void art_free_node48(art_node48_t *node) { - uint64_t used_children = - (node->available_children) ^ CROARING_NODE48_AVAILABLE_CHILDREN_MASK; - while (used_children != 0) { - // We checked above that used_children is not zero - uint8_t child_idx = roaring_trailing_zeroes(used_children); - art_free_node(node->children[child_idx]); - used_children &= ~(UINT64_C(1) << child_idx); - } - roaring_free(node); -} - -static inline art_node_t *art_node48_find_child(const art_node48_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { return node->children[val_idx]; } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key) { if (node->count < 48) { // node->available_children is only zero when the node is full (count == // 48), we just checked count < 48 @@ -9692,48 +9822,48 @@ static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, node->children[val_idx] = child; node->count++; node->available_children &= ~(UINT64_C(1) << val_idx); - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node256_t *new_node = - art_node256_create(node->base.prefix, node->base.prefix_size); + art_node256_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { uint8_t val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node256_insert(new_node, node->children[val_idx], i); + art_node256_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return art_node256_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_node256_insert(art, new_node, child, key); } -static inline art_node_t *art_node48_erase(art_node48_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node48_erase(art_t *art, art_node48_t *node, + uint8_t key_chunk) { uint8_t val_idx = node->keys[key_chunk]; if (val_idx == CROARING_ART_NODE48_EMPTY_VAL) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } node->keys[key_chunk] = CROARING_ART_NODE48_EMPTY_VAL; node->available_children |= UINT64_C(1) << val_idx; node->count--; if (node->count > 16) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node16_insert(new_node, node->children[val_idx], i); + art_node16_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE16_TYPE); } static inline void art_node48_replace(art_node48_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { uint8_t val_idx = node->keys[key_chunk]; assert(val_idx != CROARING_ART_NODE48_EMPTY_VAL); node->children[val_idx] = new_child; @@ -9751,7 +9881,7 @@ static inline art_indexed_child_t art_node48_next_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9770,7 +9900,7 @@ static inline art_indexed_child_t art_node48_prev_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9778,7 +9908,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9798,11 +9928,12 @@ static inline art_indexed_child_t art_node48_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node48_internal_validate(const art_node48_t *node, +static bool art_node48_internal_validate(const art_t *art, + const art_node48_t *node, art_internal_validate_t validator) { if (node->count <= 16) { return art_validate_fail(&validator, "Node48 has too few children"); @@ -9819,8 +9950,8 @@ static bool art_node48_internal_validate(const art_node48_t *node, &validator, "Node48 keys point to the same child index"); } - art_node_t *child = node->children[child_idx]; - if (child == NULL) { + art_ref_t child = node->children[child_idx]; + if (child == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "Node48 has a NULL child"); } used_children |= UINT64_C(1) << child_idx; @@ -9852,7 +9983,7 @@ static bool art_node48_internal_validate(const art_node48_t *node, for (int i = 0; i < 256; ++i) { if (node->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[node->keys[i]], + if (!art_internal_validate_at(art, node->children[node->keys[i]], validator)) { return false; } @@ -9861,62 +9992,54 @@ static bool art_node48_internal_validate(const art_node48_t *node, return true; } -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { + uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); art_node256_t *node = - (art_node256_t *)roaring_malloc(sizeof(art_node256_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE256_TYPE, prefix, - prefix_size); + ((art_node256_t *)art->nodes[CROARING_ART_NODE256_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { - node->children[i] = NULL; + node->children[i] = CROARING_ART_NULL_REF; } return node; } -static void art_free_node256(art_node256_t *node) { - for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_free_node(node->children[i]); - } - } - roaring_free(node); -} - -static inline art_node_t *art_node256_find_child(const art_node256_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { return node->children[key]; } -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key) { node->children[key] = child; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } -static inline art_node_t *art_node256_erase(art_node256_t *node, - uint8_t key_chunk) { - node->children[key_chunk] = NULL; +static inline art_ref_t art_node256_erase(art_t *art, art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = CROARING_ART_NULL_REF; node->count--; if (node->count > 48) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_node48_insert(new_node, node->children[i], i); + if (node->children[i] != CROARING_ART_NULL_REF) { + art_node48_insert(art, new_node, node->children[i], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE48_TYPE); } static inline void art_node256_replace(art_node256_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { node->children[key_chunk] = new_child; } @@ -9925,14 +10048,14 @@ static inline art_indexed_child_t art_node256_next_child( art_indexed_child_t indexed_child; index++; for (size_t i = index; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9944,14 +10067,14 @@ static inline art_indexed_child_t art_node256_prev_child( index--; art_indexed_child_t indexed_child; for (int i = index; i >= 0; --i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9959,7 +10082,7 @@ static inline art_indexed_child_t art_node256_child_at( const art_node256_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9972,18 +10095,19 @@ static inline art_indexed_child_t art_node256_lower_bound( art_node256_t *node, art_key_chunk_t key_chunk) { art_indexed_child_t indexed_child; for (size_t i = key_chunk; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node256_internal_validate(const art_node256_t *node, +static bool art_node256_internal_validate(const art_t *art, + const art_node256_t *node, art_internal_validate_t validator) { if (node->count <= 48) { return art_validate_fail(&validator, "Node256 has too few children"); @@ -9994,7 +10118,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, validator.depth++; int actual_count = 0; for (int i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { actual_count++; for (int j = i + 1; j < 256; ++j) { @@ -10005,7 +10129,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, } validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } @@ -10019,9 +10143,10 @@ static bool art_node256_internal_validate(const art_node256_t *node, // Finds the child with the given key chunk in the inner node, returns NULL if // no such child is found. -static art_node_t *art_find_child(const art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_find_child(const art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: return art_node4_find_child((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10032,14 +10157,14 @@ static art_node_t *art_find_child(const art_inner_node_t *node, return art_node256_find_child((art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Replaces the child with the given key chunk in the inner node. -static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { - switch (art_get_type(node)) { +static void art_replace(art_inner_node_t *node, art_typecode_t typecode, + art_key_chunk_t key_chunk, art_ref_t new_child) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: art_node4_replace((art_node4_t *)node, key_chunk, new_child); break; @@ -10059,78 +10184,112 @@ static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, // Erases the child with the given key chunk from the inner node, returns the // updated node (the same as the initial node if it was not shrunk). -static art_node_t *art_node_erase(art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_node_erase(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_erase((art_node4_t *)node, key_chunk); + return art_node4_erase(art, (art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_erase((art_node16_t *)node, key_chunk); + return art_node16_erase(art, (art_node16_t *)node, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_erase((art_node48_t *)node, key_chunk); + return art_node48_erase(art, (art_node48_t *)node, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_erase((art_node256_t *)node, key_chunk); + return art_node256_erase(art, (art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Inserts the leaf with the given key chunk in the inner node, returns a // pointer to the (possibly expanded) node. -static art_node_t *art_node_insert_leaf(art_inner_node_t *node, - art_key_chunk_t key_chunk, - art_leaf_t *leaf) { - art_node_t *child = (art_node_t *)(CROARING_SET_LEAF(leaf)); - switch (art_get_type(node)) { +static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk, + art_ref_t leaf) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_insert((art_node4_t *)node, child, key_chunk); + return art_node4_insert(art, (art_node4_t *)node, leaf, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_insert((art_node16_t *)node, child, key_chunk); + return art_node16_insert(art, (art_node16_t *)node, leaf, + key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_insert((art_node48_t *)node, child, key_chunk); + return art_node48_insert(art, (art_node48_t *)node, leaf, + key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_insert((art_node256_t *)node, child, key_chunk); + return art_node256_insert(art, (art_node256_t *)node, leaf, + key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } -// Frees the node and its children. Leaves are freed by the user. -static void art_free_node(art_node_t *node) { - if (art_is_leaf(node)) { - // We leave it up to the user to free leaves. - return; +static uint64_t art_node_get_next_free(const art_t *art, art_ref_t ref) { + art_node_t *node = art_deref(art, ref); + art_typecode_t typecode = art_ref_typecode(ref); + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return ((art_leaf_t *)node)->next_free; + case CROARING_ART_NODE4_TYPE: + return ((art_node4_t *)node)->next_free; + case CROARING_ART_NODE16_TYPE: + return ((art_node16_t *)node)->next_free; + case CROARING_ART_NODE48_TYPE: + return ((art_node48_t *)node)->next_free; + case CROARING_ART_NODE256_TYPE: + return ((art_node256_t *)node)->next_free; + default: + assert(false); + return 0; } - switch (art_get_type((art_inner_node_t *)node)) { +} + +static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, + uint64_t next_free) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + ((art_leaf_t *)node)->next_free = next_free; + break; case CROARING_ART_NODE4_TYPE: - art_free_node4((art_node4_t *)node); + ((art_node4_t *)node)->next_free = next_free; break; case CROARING_ART_NODE16_TYPE: - art_free_node16((art_node16_t *)node); + ((art_node16_t *)node)->next_free = next_free; break; case CROARING_ART_NODE48_TYPE: - art_free_node48((art_node48_t *)node); + ((art_node48_t *)node)->next_free = next_free; break; case CROARING_ART_NODE256_TYPE: - art_free_node256((art_node256_t *)node); + ((art_node256_t *)node)->next_free = next_free; break; default: assert(false); } } +// Marks the node as unoccopied and frees its index. +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode) { + uint64_t index = art_get_index(art, node, typecode); + uint64_t next_free = art->first_free[typecode]; + art_node_set_next_free(node, typecode, next_free); + art->first_free[typecode] = index; +} + // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_next_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10148,13 +10307,15 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, // Returns the previous child in key order, or NULL if called on a leaf. // Provided index may be in the range [0, 256]. static art_indexed_child_t art_node_prev_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_prev_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10169,16 +10330,19 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, } } -// Returns the child found at the provided index, or NULL if called on a leaf. -// Provided index is only valid if returned by art_node_(next|prev)_child. +// Returns the child found at the provided index, or NULL if called on a +// leaf. Provided index is only valid if returned by +// art_node_(next|prev)_child. static art_indexed_child_t art_node_child_at(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_child_at((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10193,16 +10357,18 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, } } -// Returns the child with the smallest key equal to or greater than the given -// key chunk, NULL if called on a leaf or no such child was found. +// Returns the child with the smallest key equal to or greater than the +// given key chunk, NULL if called on a leaf or no such child was found. static art_indexed_child_t art_node_lower_bound(const art_node_t *node, + art_typecode_t typecode, art_key_chunk_t key_chunk) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_lower_bound((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10217,7 +10383,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, } } -// ====================== End of node-specific functions ======================= +// ====================== End of node-specific functions ====================== // Compares the given ranges of two keys, returns their relative order: // * Key range 1 < key range 2: a negative value @@ -10255,45 +10421,112 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } -// Returns a pointer to the rootmost node where the value was inserted, may not -// be equal to `node`. -static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], - uint8_t depth, art_leaf_t *new_leaf) { - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); +/** + * Extends the array of nodes of the given typecode. Invalidates pointers into + * the array obtained by `art_deref`. + */ +static void art_extend(art_t *art, art_typecode_t typecode) { + uint64_t size = art->first_free[typecode]; + uint64_t capacity = art->capacities[typecode]; + if (size < capacity) { + return; + } + uint64_t new_capacity; + if (capacity == 0) { + new_capacity = 2; + } else if (capacity < 1024) { + new_capacity = 2 * capacity; + } else { + new_capacity = 5 * capacity / 4; + } + art->capacities[typecode] = new_capacity; + art->nodes[typecode] = roaring_realloc( + art->nodes[typecode], new_capacity * ART_NODE_SIZES[typecode]); + uint64_t increase = new_capacity - capacity; + memset(art_get_node(art, capacity, typecode), 0, + increase * ART_NODE_SIZES[typecode]); + for (uint64_t i = capacity; i < new_capacity; ++i) { + art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); + } +} + +/** + * Returns the next free index for the given typecode, may be equal to the + * capacity of the array. + */ +static uint64_t art_next_free(const art_t *art, art_typecode_t typecode) { + uint64_t index = art->first_free[typecode]; + return art_node_get_next_free(art, art_to_ref(index, typecode)); +} + +/** + * Marks an index for the given typecode as used, expanding the relevant node + * array if necessary. + */ +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode) { + uint64_t first_free = art->first_free[typecode]; + if (first_free == art->capacities[typecode]) { + art_extend(art, typecode); + art->first_free[typecode]++; + return first_free; + } + art->first_free[typecode] = art_next_free(art, typecode); + return first_free; +} + +// Returns a pointer to the rootmost node where the value was inserted, may +// not be equal to `node`. +static art_ref_t art_insert_at(art_t *art, art_ref_t ref, + const art_key_chunk_t key[], uint8_t depth, + art_ref_t new_leaf) { + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix( leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); - // Previously this was a leaf, create an inner node instead and add both - // the existing and new leaf to it. + // Previously this was a leaf, create an inner node instead and add + // both the existing and new leaf to it. art_node_t *new_node = - (art_node_t *)art_node4_create(key + depth, common_prefix); + (art_node_t *)art_node4_create(art, key + depth, common_prefix); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - leaf->key[depth + common_prefix], leaf); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - key[depth + common_prefix], new_leaf); + art_ref_t new_ref = art_node_insert_leaf( + art, (art_inner_node_t *)new_node, CROARING_ART_NODE4_TYPE, + leaf->key[depth + common_prefix], ref); + new_ref = art_node_insert_leaf(art, (art_inner_node_t *)new_node, + CROARING_ART_NODE4_TYPE, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. - return new_node; + return new_ref; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); // Not a leaf: inner node uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { - // Partial prefix match. Create a new internal node to hold the common + // Partial prefix match. Create a new internal node to hold the common // prefix. - art_node4_t *node4 = - art_node4_create(inner_node->prefix, common_prefix); + // We create a copy of the node's prefix as the creation of a new + // node may invalidate the prefix pointer. + art_key_chunk_t *prefix_copy = (art_key_chunk_t *)roaring_malloc( + common_prefix * sizeof(art_key_chunk_t)); + memcpy(prefix_copy, inner_node->prefix, + common_prefix * sizeof(art_key_chunk_t)); + art_node4_t *node4 = art_node4_create(art, prefix_copy, common_prefix); + roaring_free(prefix_copy); + + // Deref as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Make the existing internal node a child of the new internal node. - node4 = (art_node4_t *)art_node4_insert( - node4, node, inner_node->prefix[common_prefix]); + art_node4_insert(art, node4, ref, inner_node->prefix[common_prefix]); - // Correct the prefix of the moved internal node, trimming off the chunk - // inserted into the new internal node. + // Deref again as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); + + // Correct the prefix of the moved internal node, trimming off the + // chunk inserted into the new internal node. inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; if (inner_node->prefix_size > 0) { // Move the remaining prefix to the correct position. @@ -10302,55 +10535,67 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], } // Insert the value in the new internal node. - return art_node_insert_leaf(&node4->base, key[common_prefix + depth], - new_leaf); + return art_node_insert_leaf(art, (art_inner_node_t *)node4, + CROARING_ART_NODE4_TYPE, + key[common_prefix + depth], new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child != NULL) { - art_node_t *new_child = - art_insert_at(child, key, depth + common_prefix + 1, new_leaf); + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child != CROARING_ART_NULL_REF) { + art_ref_t new_child = + art_insert_at(art, child, key, depth + common_prefix + 1, new_leaf); if (new_child != child) { + // Deref again as a new node may have been created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Node type changed. - art_replace(inner_node, key_chunk, new_child); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + new_child); } - return node; + return ref; } - return art_node_insert_leaf(inner_node, key_chunk, new_leaf); + return art_node_insert_leaf(art, inner_node, art_ref_typecode(ref), + key_chunk, new_leaf); } // Erase helper struct. typedef struct art_erase_result_s { - // The rootmost node where the value was erased, may not be equal to `node`. - // If no value was removed, this is null. - art_node_t *rootmost_node; + // The rootmost node where the value was erased, may not be equal to + // the original node. If no value was removed, this is + // CROARING_ART_NULL_REF. + art_ref_t rootmost_node; + + // True if a value was erased. + bool erased; - // Value removed, null if not removed. - art_val_t *value_erased; + // Value removed, if any. + art_val_t value_erased; } art_erase_result_t; // Searches for the given key starting at `node`, erases it if found. -static art_erase_result_t art_erase_at(art_node_t *node, +static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { art_erase_result_t result; - result.rootmost_node = NULL; - result.value_erased = NULL; + result.rootmost_node = CROARING_ART_NULL_REF; + result.erased = false; - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix != ART_KEY_BYTES) { // Leaf key mismatch. return result; } - result.value_erased = (art_val_t *)leaf; + result.erased = true; + result.value_erased = leaf->val; + art_node_free(art, (art_node_t *)leaf, CROARING_ART_LEAF_TYPE); return result; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); @@ -10359,101 +10604,76 @@ static art_erase_result_t art_erase_at(art_node_t *node, return result; } art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child == NULL) { + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child == CROARING_ART_NULL_REF) { // No child with key chunk. return result; } - // Try to erase the key further down. Skip the key chunk associated with the - // child in the node. + // Try to erase the key further down. Skip the key chunk associated with + // the child in the node. art_erase_result_t child_result = - art_erase_at(child, key, depth + common_prefix + 1); - if (child_result.value_erased == NULL) { + art_erase_at(art, child, key, depth + common_prefix + 1); + if (!child_result.erased) { return result; } + result.erased = true; result.value_erased = child_result.value_erased; - result.rootmost_node = node; - if (child_result.rootmost_node == NULL) { + result.rootmost_node = ref; + + // Deref again as nodes may have changed location. + inner_node = (art_inner_node_t *)art_deref(art, ref); + if (child_result.rootmost_node == CROARING_ART_NULL_REF) { // Child node was fully erased, erase it from this node's children. - result.rootmost_node = art_node_erase(inner_node, key_chunk); + result.rootmost_node = + art_node_erase(art, inner_node, art_ref_typecode(ref), key_chunk); } else if (child_result.rootmost_node != child) { // Child node was not fully erased, update the pointer to it in this // node. - art_replace(inner_node, key_chunk, child_result.rootmost_node); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + child_result.rootmost_node); } return result; } -// Searches for the given key starting at `node`, returns NULL if the key was -// not found. -static art_val_t *art_find_at(const art_node_t *node, +// Searches for the given key starting at `node`, returns NULL if the key +// was not found. +static art_val_t *art_find_at(const art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { return NULL; } - art_node_t *child = - art_find_child(inner_node, key[depth + inner_node->prefix_size]); - if (child == NULL) { + art_ref_t child = art_find_child(inner_node, art_ref_typecode(ref), + key[depth + inner_node->prefix_size]); + if (child == CROARING_ART_NULL_REF) { return NULL; } - node = child; + ref = child; // Include both the prefix and the child key chunk in the depth. depth += inner_node->prefix_size + 1; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (depth >= ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix == ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } return NULL; } -// Returns the size in bytes of the subtrie. -size_t art_size_in_bytes_at(const art_node_t *node) { - if (art_is_leaf(node)) { - return 0; - } - size_t size = 0; - switch (art_get_type((art_inner_node_t *)node)) { - case CROARING_ART_NODE4_TYPE: { - size += sizeof(art_node4_t); - } break; - case CROARING_ART_NODE16_TYPE: { - size += sizeof(art_node16_t); - } break; - case CROARING_ART_NODE48_TYPE: { - size += sizeof(art_node48_t); - } break; - case CROARING_ART_NODE256_TYPE: { - size += sizeof(art_node256_t); - } break; - default: - assert(false); - break; - } - art_indexed_child_t indexed_child = art_node_next_child(node, -1); - while (indexed_child.child != NULL) { - size += art_size_in_bytes_at(indexed_child.child); - indexed_child = art_node_next_child(node, indexed_child.index); - } - return size; -} - -static void art_node_print_type(const art_node_t *node) { - if (art_is_leaf(node)) { - printf("Leaf"); - return; - } - switch (art_get_type((art_inner_node_t *)node)) { +static void art_node_print_type(art_ref_t ref) { + switch (art_ref_typecode(ref)) { + case CROARING_ART_LEAF_TYPE: + printf("Leaf"); + return; case CROARING_ART_NODE4_TYPE: printf("Node4"); return; @@ -10472,10 +10692,10 @@ static void art_node_print_type(const art_node_t *node) { } } -void art_node_printf(const art_node_t *node, uint8_t depth) { - if (art_is_leaf(node)) { +void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { + if (art_is_leaf(ref)) { printf("{ type: Leaf, key: "); - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); for (size_t i = 0; i < ART_KEY_BYTES; ++i) { printf("%02x", leaf->key[i]); } @@ -10487,10 +10707,10 @@ void art_node_printf(const art_node_t *node, uint8_t depth) { printf("%*s", depth, ""); printf("type: "); - art_node_print_type(node); + art_node_print_type(ref); printf("\n"); - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); printf("%*s", depth, ""); printf("prefix_size: %d\n", inner_node->prefix_size); @@ -10501,41 +10721,42 @@ void art_node_printf(const art_node_t *node, uint8_t depth) { } printf("\n"); - switch (art_get_type(inner_node)) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)node; + art_node4_t *node4 = (art_node4_t *)inner_node; for (uint8_t i = 0; i < node4->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node4->keys[i]); - art_node_printf(node4->children[i], depth); + art_node_printf(art, node4->children[i], depth); } } break; case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)node; + art_node16_t *node16 = (art_node16_t *)inner_node; for (uint8_t i = 0; i < node16->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node16->keys[i]); - art_node_printf(node16->children[i], depth); + art_node_printf(art, node16->children[i], depth); } } break; case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)node; + art_node48_t *node48 = (art_node48_t *)inner_node; for (int i = 0; i < 256; ++i) { if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { printf("%*s", depth, ""); printf("key: %02x ", i); printf("child: %02x ", node48->keys[i]); - art_node_printf(node48->children[node48->keys[i]], depth); + art_node_printf(art, node48->children[node48->keys[i]], + depth); } } } break; case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)node; + art_node256_t *node256 = (art_node256_t *)inner_node; for (int i = 0; i < 256; ++i) { - if (node256->children[i] != NULL) { + if (node256->children[i] != CROARING_ART_NULL_REF) { printf("%*s", depth, ""); printf("key: %02x ", i); - art_node_printf(node256->children[i], depth); + art_node_printf(art, node256->children[i], depth); } } } break; @@ -10548,118 +10769,310 @@ void art_node_printf(const art_node_t *node, uint8_t depth) { printf("}\n"); } -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { - art_leaf_t *leaf = (art_leaf_t *)val; - art_leaf_populate(leaf, key); - if (art->root == NULL) { - art->root = (art_node_t *)CROARING_SET_LEAF(leaf); - return; - } - art->root = art_insert_at(art->root, key, 0, leaf); -} - -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { - return NULL; - } - art_erase_result_t result = art_erase_at(art->root, key, 0); - if (result.value_erased == NULL) { - return NULL; - } - art->root = result.rootmost_node; - return result.value_erased; -} - -art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { - return NULL; - } - return art_find_at(art->root, key, 0); +/** + * Moves the node at `ref` to the earliest free index before it (if any), + * returns the new ref. Assumes `art->first_free[typecode]` points to the + * smallest free index. + */ +static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { + uint64_t idx = art_ref_index(ref); + art_typecode_t typecode = art_ref_typecode(ref); + uint64_t first_free = art->first_free[typecode]; + assert(idx != first_free); + if (idx < first_free) { + return ref; + } + uint64_t from = idx; + uint64_t to = first_free; + uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); + memcpy(art_get_node(art, to, typecode), art_get_node(art, from, typecode), + ART_NODE_SIZES[typecode]); + + // With an integer representing the next free index, and an `x` representing + // an occupied index, assume the following scenario at the start of this + // function: + // nodes = [1,2,5,x,x] + // first_free = 0 + // + // We just moved a node from index 3 to 0: + // nodes = [x,2,5,?,x] + // + // We need to modify the free list so that the free indices are ascending. + // This can be done by traversing the list until we find a node with a + // `next_free` greater than the index we copied the node from, and inserting + // the new index in between. This leads to the following: + // nodes = [x,2,3,5,x] + // first_free = 1 + uint64_t initial_next_free = next_free; + uint64_t current = next_free; + while (next_free < from) { + current = next_free; + next_free = + art_node_get_next_free(art, art_to_ref(next_free, typecode)); + } + art_node_set_next_free(art_deref(art, ref), typecode, next_free); + if (current < from) { + art_node_set_next_free(art_get_node(art, current, typecode), typecode, + from); + } + art->first_free[typecode] = + from < initial_next_free ? from : initial_next_free; + return art_to_ref(to, typecode); } -bool art_is_empty(const art_t *art) { return art->root == NULL; } - -void art_free(art_t *art) { - if (art->root == NULL) { - return; +/** + * Sorts the free lists pointed to by art->first_free in ascending index order. + */ +static void art_sort_free_lists(art_t *art) { + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + bool *free_indices = + (bool *)roaring_calloc(art->capacities[type], sizeof(bool)); + + for (uint64_t i = art->first_free[type]; i < art->capacities[type]; + i = art_node_get_next_free(art, art_to_ref(i, type))) { + free_indices[i] = true; + } + + uint64_t first_free = art->capacities[type]; + for (uint64_t i = art->capacities[type]; i > 0; --i) { + uint64_t index = i - 1; + if (free_indices[index]) { + art_node_set_next_free(art_get_node(art, index, type), type, + first_free); + first_free = index; + } + } + art->first_free[type] = first_free; + roaring_free(free_indices); } - art_free_node(art->root); } -size_t art_size_in_bytes(const art_t *art) { - size_t size = sizeof(art_t); - if (art->root != NULL) { - size += art_size_in_bytes_at(art->root); +/** + * Shrinks all node arrays to `first_free`. Assumes all indices after + * `first_free` are unused. + */ +static size_t art_shrink_node_arrays(art_t *art) { + size_t freed = 0; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] < art->capacities[t]) { + uint64_t new_capacity = art->first_free[t]; + art->nodes[t] = roaring_realloc(art->nodes[t], + new_capacity * ART_NODE_SIZES[t]); + freed += (art->capacities[t] - new_capacity) * ART_NODE_SIZES[t]; + art->capacities[t] = new_capacity; + } } - return size; + return freed; } -void art_printf(const art_t *art) { - if (art->root == NULL) { +/** + * Traverses the ART, moving nodes to earlier free indices and modifying their + * references along the way. + */ +static void art_shrink_at(art_t *art, art_ref_t ref) { + if (art_is_leaf(ref)) { return; } - art_node_printf(art->root, 0); -} - -// Returns the current node that the iterator is positioned at. -static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { - return iterator->frames[iterator->frame].node; -} - -// Sets the iterator key and value to the leaf's key and value. Always returns -// true for convenience. -static inline bool art_iterator_valid_loc(art_iterator_t *iterator, - art_leaf_t *leaf) { - iterator->frames[iterator->frame].node = CROARING_SET_LEAF(leaf); - iterator->frames[iterator->frame].index_in_node = 0; + switch (art_ref_typecode(ref)) { + case CROARING_ART_NODE4_TYPE: { + art_node4_t *node4 = (art_node4_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node4->count; ++i) { + node4->children[i] = + art_move_node_to_shrink(art, node4->children[i]); + art_shrink_at(art, node4->children[i]); + } + } break; + case CROARING_ART_NODE16_TYPE: { + art_node16_t *node16 = (art_node16_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node16->count; ++i) { + node16->children[i] = + art_move_node_to_shrink(art, node16->children[i]); + art_shrink_at(art, node16->children[i]); + } + } break; + case CROARING_ART_NODE48_TYPE: { + art_node48_t *node48 = (art_node48_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { + uint8_t idx = node48->keys[i]; + node48->children[idx] = + art_move_node_to_shrink(art, node48->children[idx]); + art_shrink_at(art, node48->children[idx]); + } + } + } break; + case CROARING_ART_NODE256_TYPE: { + art_node256_t *node256 = (art_node256_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node256->children[i] != CROARING_ART_NULL_REF) { + node256->children[i] = + art_move_node_to_shrink(art, node256->children[i]); + art_shrink_at(art, node256->children[i]); + } + } + } break; + default: + assert(false); + break; + } +} + +void art_init_cleared(art_t *art) { + art->root = CROARING_ART_NULL_REF; + memset(art->first_free, 0, sizeof(art->first_free)); + memset(art->capacities, 0, sizeof(art->capacities)); + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + art->nodes[t] = NULL; + } +} + +size_t art_shrink_to_fit(art_t *art) { + if (art_is_shrunken(art)) { + return 0; + } + if (art->root != CROARING_ART_NULL_REF) { + art_sort_free_lists(art); + art->root = art_move_node_to_shrink(art, art->root); + art_shrink_at(art, art->root); + } + return art_shrink_node_arrays(art); +} + +bool art_is_shrunken(const art_t *art) { + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] != art->capacities[t]) { + return false; + } + } + return true; +} + +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { + art_ref_t leaf = art_leaf_create(art, key, val); + if (art->root == CROARING_ART_NULL_REF) { + art->root = leaf; + return &((art_leaf_t *)art_deref(art, leaf))->val; + } + art->root = art_insert_at(art, art->root, key, 0, leaf); + return &((art_leaf_t *)art_deref(art, leaf))->val; +} + +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } + if (art->root == CROARING_ART_NULL_REF) { + return false; + } + art_erase_result_t result = art_erase_at(art, art->root, key, 0); + if (!result.erased) { + return false; + } + art->root = result.rootmost_node; + *erased_val = result.value_erased; + return true; +} + +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { + if (art->root == CROARING_ART_NULL_REF) { + return NULL; + } + return art_find_at(art, art->root, key, 0); +} + +bool art_is_empty(const art_t *art) { + return art->root == CROARING_ART_NULL_REF; +} + +void art_free(art_t *art) { + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + roaring_free(art->nodes[t]); + } +} + +void art_printf(const art_t *art) { + if (art->root == CROARING_ART_NULL_REF) { + return; + } + art_node_printf(art, art->root, 0); +} + +// Returns a reference to the current node that the iterator is positioned +// at. +static inline art_ref_t art_iterator_ref(art_iterator_t *iterator) { + return iterator->frames[iterator->frame].ref; +} + +// Returns the current node that the iterator is positioned at. +static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { + return art_deref(iterator->art, art_iterator_ref(iterator)); +} + +// Sets the iterator key and value to the leaf's key and value. Always +// returns true for convenience. +static inline bool art_iterator_valid_loc(art_iterator_t *iterator, + art_ref_t leaf_ref) { + iterator->frames[iterator->frame].ref = leaf_ref; + iterator->frames[iterator->frame].index_in_node = 0; + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, leaf_ref); memcpy(iterator->key, leaf->key, ART_KEY_BYTES); - iterator->value = (art_val_t *)leaf; + iterator->value = &leaf->val; return true; } -// Invalidates the iterator key and value. Always returns false for convenience. +// Invalidates the iterator key and value. Always returns false for +// convenience. static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { memset(iterator->key, 0, ART_KEY_BYTES); iterator->value = NULL; return false; } -// Moves the iterator one level down in the tree, given a node at the current -// level and the index of the child that we're going down to. +// Moves the iterator one level down in the tree, given a node at the +// current level and the index of the child that we're going down to. // // Note: does not set the index at the new level. -static void art_iterator_down(art_iterator_t *iterator, - const art_inner_node_t *node, +static void art_iterator_down(art_iterator_t *iterator, art_ref_t ref, uint8_t index_in_node) { - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = index_in_node; iterator->frame++; - art_indexed_child_t indexed_child = - art_node_child_at((art_node_t *)node, index_in_node); - assert(indexed_child.child != NULL); - iterator->frames[iterator->frame].node = indexed_child.child; + art_inner_node_t *node = (art_inner_node_t *)art_deref(iterator->art, ref); + art_indexed_child_t indexed_child = art_node_child_at( + (art_node_t *)node, art_ref_typecode(ref), index_in_node); + assert(indexed_child.child != CROARING_ART_NULL_REF); + iterator->frames[iterator->frame].ref = indexed_child.child; iterator->depth += node->prefix_size + 1; } -// Moves the iterator to the next/previous child of the current node. Returns -// the child moved to, or NULL if there is no neighboring child. -static art_node_t *art_iterator_neighbor_child( - art_iterator_t *iterator, const art_inner_node_t *inner_node, - bool forward) { +// Moves the iterator to the next/previous child of the current node. +// Returns the child moved to, or NULL if there is no neighboring child. +static art_ref_t art_iterator_neighbor_child(art_iterator_t *iterator, + bool forward) { art_iterator_frame_t frame = iterator->frames[iterator->frame]; + art_node_t *node = art_deref(iterator->art, frame.ref); art_indexed_child_t indexed_child; if (forward) { - indexed_child = art_node_next_child(frame.node, frame.index_in_node); + indexed_child = art_node_next_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } else { - indexed_child = art_node_prev_child(frame.node, frame.index_in_node); + indexed_child = art_node_prev_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } - if (indexed_child.child != NULL) { - art_iterator_down(iterator, inner_node, indexed_child.index); + if (indexed_child.child != CROARING_ART_NULL_REF) { + art_iterator_down(iterator, frame.ref, indexed_child.index); } return indexed_child.child; } -// Moves the iterator one level up in the tree, returns false if not possible. +// Moves the iterator one level up in the tree, returns false if not +// possible. static bool art_iterator_up(art_iterator_t *iterator) { if (iterator->frame == 0) { return false; @@ -10671,8 +11084,8 @@ static bool art_iterator_up(art_iterator_t *iterator) { return true; } -// Moves the iterator one level, followed by a move to the next / previous leaf. -// Sets the status of the iterator. +// Moves the iterator one level, followed by a move to the next / previous +// leaf. Sets the status of the iterator. static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { if (!art_iterator_up(iterator)) { // We're at the root. @@ -10683,27 +11096,29 @@ static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { // Initializes the iterator at the first / last leaf of the given node. // Returns true for convenience. -static bool art_node_init_iterator(const art_node_t *node, - art_iterator_t *iterator, bool first) { - while (!art_is_leaf(node)) { +static bool art_node_init_iterator(art_ref_t ref, art_iterator_t *iterator, + bool first) { + while (!art_is_leaf(ref)) { + art_node_t *node = art_deref(iterator->art, ref); art_indexed_child_t indexed_child; if (first) { - indexed_child = art_node_next_child(node, -1); + indexed_child = + art_node_next_child(node, art_ref_typecode(ref), -1); } else { - indexed_child = art_node_prev_child(node, 256); + indexed_child = + art_node_prev_child(node, art_ref_typecode(ref), 256); } - art_iterator_down(iterator, (art_inner_node_t *)node, - indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } // We're at a leaf. - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. - return art_iterator_valid_loc(iterator, CROARING_CAST_LEAF(node)); + return art_iterator_valid_loc(iterator, ref); } bool art_iterator_move(art_iterator_t *iterator, bool forward) { - if (art_is_leaf(art_iterator_node(iterator))) { + if (art_is_leaf(art_iterator_ref(iterator))) { bool went_up = art_iterator_up(iterator); if (!went_up) { // This leaf is the root, we're done. @@ -10711,67 +11126,69 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { } } // Advance within inner node. - art_node_t *neighbor_child = art_iterator_neighbor_child( - iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); - if (neighbor_child != NULL) { - // There is another child at this level, go down to the first or last - // leaf. + art_ref_t neighbor_child = art_iterator_neighbor_child(iterator, forward); + if (neighbor_child != CROARING_ART_NULL_REF) { + // There is another child at this level, go down to the first or + // last leaf. return art_node_init_iterator(neighbor_child, iterator, forward); } // No more children at this level, go up. return art_iterator_up_and_move(iterator, forward); } -// Assumes the iterator is positioned at a node with an equal prefix path up to -// the depth of the iterator. -static bool art_node_iterator_lower_bound(const art_node_t *node, +// Assumes the iterator is positioned at a node with an equal prefix path up +// to the depth of the iterator. +static bool art_node_iterator_lower_bound(art_ref_t ref, art_iterator_t *iterator, const art_key_chunk_t key[]) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(iterator->art, ref); int prefix_comparison = art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. - // Since we take the lower bound within each node, we can return the - // next leaf. + // Since we take the lower bound within each node, we can return + // the next leaf. return art_iterator_up_and_move(iterator, true); } else if (prefix_comparison > 0) { - // No key equal to the key we're looking for, return the first leaf. - return art_node_init_iterator(node, iterator, true); + // No key equal to the key we're looking for, return the first + // leaf. + return art_node_init_iterator(ref, iterator, true); } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = key[iterator->depth + inner_node->prefix_size]; - art_indexed_child_t indexed_child = - art_node_lower_bound(node, key_chunk); - if (indexed_child.child == NULL) { + art_indexed_child_t indexed_child = art_node_lower_bound( + (art_node_t *)inner_node, art_ref_typecode(ref), key_chunk); + if (indexed_child.child == CROARING_ART_NULL_REF) { // Only smaller keys among children. return art_iterator_up_and_move(iterator, true); } if (indexed_child.key_chunk > key_chunk) { // Only larger children, return the first larger child. - art_iterator_down(iterator, inner_node, indexed_child.index); + art_iterator_down(iterator, ref, indexed_child.index); return art_node_init_iterator(indexed_child.child, iterator, true); } // We found a child with an equal prefix. - art_iterator_down(iterator, inner_node, indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, ref); if (art_compare_keys(leaf->key, key) >= 0) { // Leaf has an equal or larger key. - return art_iterator_valid_loc(iterator, leaf); + return art_iterator_valid_loc(iterator, ref); } - // Leaf has an equal prefix, but the full key is smaller. Move to the next - // leaf. + // Leaf has an equal prefix, but the full key is smaller. Move to the + // next leaf. return art_iterator_up_and_move(iterator, true); } -art_iterator_t art_init_iterator(const art_t *art, bool first) { +art_iterator_t art_init_iterator(art_t *art, bool first) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root == NULL) { + iterator.art = art; + if (art->root == CROARING_ART_NULL_REF) { return iterator; } art_node_init_iterator(art->root, &iterator, first); @@ -10789,12 +11206,12 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { if (iterator->value == NULL) { - // We're beyond the end / start of the ART so the iterator does not have - // a valid key. Start from the root. + // We're beyond the end / start of the ART so the iterator does not + // have a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - art_node_t *root = art_iterator_node(iterator); - if (root == NULL) { + art_ref_t root = art_iterator_ref(iterator); + if (root == CROARING_ART_NULL_REF) { return false; } return art_node_iterator_lower_bound(root, iterator, key); @@ -10809,7 +11226,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // Only smaller keys found. return art_iterator_invalid_loc(iterator); } else { - return art_node_init_iterator(art_iterator_node(iterator), + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } } @@ -10822,24 +11239,26 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, iterator->depth + inner_node->prefix_size); } if (compare_result > 0) { - return art_node_init_iterator(art_iterator_node(iterator), iterator, + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } - return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, + return art_node_iterator_lower_bound(art_iterator_ref(iterator), iterator, key); } -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { art_node_iterator_lower_bound(art->root, &iterator, key); } return iterator; } -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); @@ -10848,90 +11267,100 @@ art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { return iterator; } -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val) { +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val) { // TODO: This can likely be faster. - art_insert(art, key, val); - assert(art->root != NULL); + art_insert(iterator->art, key, val); + assert(iterator->art->root != CROARING_ART_NULL_REF); iterator->frame = 0; iterator->depth = 0; - art_node_iterator_lower_bound(art->root, iterator, key); + art_node_iterator_lower_bound(iterator->art->root, iterator, key); } -// TODO: consider keeping `art_t *art` in the iterator. -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } if (iterator->value == NULL) { - return NULL; + return false; } art_key_chunk_t initial_key[ART_KEY_BYTES]; memcpy(initial_key, iterator->key, ART_KEY_BYTES); - art_val_t *value_erased = iterator->value; + *erased_val = *iterator->value; + // Erase the leaf. + art_node_free(iterator->art, art_iterator_node(iterator), + art_ref_typecode(art_iterator_ref(iterator))); bool went_up = art_iterator_up(iterator); if (!went_up) { // We're erasing the root. - art->root = NULL; + iterator->art->root = CROARING_ART_NULL_REF; art_iterator_invalid_loc(iterator); - return value_erased; + return true; } - // Erase the leaf. + // Erase the leaf in its parent. + art_ref_t parent_ref = art_iterator_ref(iterator); art_inner_node_t *parent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_parent = iterator->key[iterator->depth + parent_node->prefix_size]; - art_node_t *new_parent_node = - art_node_erase(parent_node, key_chunk_in_parent); + art_ref_t new_parent_ref = + art_node_erase(iterator->art, parent_node, art_ref_typecode(parent_ref), + key_chunk_in_parent); - if (new_parent_node != ((art_node_t *)parent_node)) { + if (new_parent_ref != parent_ref) { // Replace the pointer to the inner node we erased from in its // parent (it may be a leaf now). - iterator->frames[iterator->frame].node = new_parent_node; + iterator->frames[iterator->frame].ref = new_parent_ref; went_up = art_iterator_up(iterator); if (went_up) { + art_ref_t grandparent_ref = art_iterator_ref(iterator); art_inner_node_t *grandparent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_grandparent = iterator->key[iterator->depth + grandparent_node->prefix_size]; - art_replace(grandparent_node, key_chunk_in_grandparent, - new_parent_node); + art_replace(grandparent_node, art_ref_typecode(grandparent_ref), + key_chunk_in_grandparent, new_parent_ref); } else { // We were already at the rootmost node. - art->root = new_parent_node; + iterator->art->root = new_parent_ref; } } iterator->frame = 0; iterator->depth = 0; - // Do a lower bound search for the initial key, which will find the first - // greater key if it exists. This can likely be mildly faster if we instead - // start from the current position. - art_node_iterator_lower_bound(art->root, iterator, initial_key); - return value_erased; + // Do a lower bound search for the initial key, which will find the + // first greater key if it exists. This can likely be mildly faster if + // we instead start from the current position. + art_node_iterator_lower_bound(iterator->art->root, iterator, initial_key); + return true; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator) { - if (node == NULL) { + if (ref == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "node is null"); } - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (art_compare_prefix(leaf->key, 0, validator.current_key, 0, validator.depth) != 0) { - return art_validate_fail( - &validator, - "leaf key does not match its position's prefix in the tree"); + return art_validate_fail(&validator, + "leaf key does not match its " + "position's prefix in the tree"); } if (validator.validate_cb != NULL && - !validator.validate_cb(leaf, validator.reason)) { + !validator.validate_cb(leaf->val, validator.reason, + validator.context)) { if (*validator.reason == NULL) { *validator.reason = "leaf validation failed"; } return false; } } else { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) { return art_validate_fail(&validator, @@ -10941,28 +11370,28 @@ static bool art_internal_validate_at(const art_node_t *node, inner_node->prefix_size); validator.depth += inner_node->prefix_size; - switch (inner_node->typecode) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: - if (!art_node4_internal_validate((art_node4_t *)inner_node, + if (!art_node4_internal_validate(art, (art_node4_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE16_TYPE: - if (!art_node16_internal_validate((art_node16_t *)inner_node, - validator)) { + if (!art_node16_internal_validate( + art, (art_node16_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE48_TYPE: - if (!art_node48_internal_validate((art_node48_t *)inner_node, - validator)) { + if (!art_node48_internal_validate( + art, (art_node48_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE256_TYPE: - if (!art_node256_internal_validate((art_node256_t *)inner_node, - validator)) { + if (!art_node256_internal_validate( + art, (art_node256_t *)inner_node, validator)) { return false; } break; @@ -10974,551 +11403,190 @@ static bool art_internal_validate_at(const art_node_t *node, } bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb) { + art_validate_cb_t validate_cb, void *context) { const char *reason_local; if (reason == NULL) { // Always allow assigning through *reason reason = &reason_local; } *reason = NULL; - if (art->root == NULL) { + if (art->root == CROARING_ART_NULL_REF) { return true; } art_internal_validate_t validator = { .reason = reason, .validate_cb = validate_cb, + .context = context, .depth = 0, - .current_key = {0}, + .current_key = CROARING_ZERO_INITIALIZER, }; - return art_internal_validate_at(art->root, validator); -} - -#ifdef __cplusplus -} // extern "C" -} // namespace roaring -} // namespace internal -#endif -/* end file src/art/art.c */ -/* begin file src/bitset.c */ -#include -#include -#include -#include -#include - - -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif - -extern inline void bitset_print(const bitset_t *b); -extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, - void *ptr); -extern inline size_t bitset_next_set_bits(const bitset_t *bitset, - size_t *buffer, size_t capacity, - size_t *startfrom); -extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag); -extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i); -extern inline void bitset_set(bitset_t *bitset, size_t i); -extern inline bool bitset_get(const bitset_t *bitset, size_t i); -extern inline size_t bitset_size_in_words(const bitset_t *bitset); -extern inline size_t bitset_size_in_bits(const bitset_t *bitset); -extern inline size_t bitset_size_in_bytes(const bitset_t *bitset); - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_t *bitset_create(void) { - bitset_t *bitset = NULL; - /* Allocate the bitset itself. */ - if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + uint64_t capacity = art->capacities[type]; + for (uint64_t i = 0; i < capacity; ++i) { + uint64_t first_free = art->first_free[type]; + if (first_free > capacity) { + return art_validate_fail(&validator, "first_free > capacity"); + } + } } - bitset->array = NULL; - bitset->arraysize = 0; - bitset->capacity = 0; - return bitset; + return art_internal_validate_at(art, art->root, validator); } -/* Create a new bitset able to contain size bits. Return NULL in case of - * failure. */ -bitset_t *bitset_create_with_capacity(size_t size) { - bitset_t *bitset = NULL; - /* Allocate the bitset itself. */ - if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; +CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node4_t), + "Serialization assumes node type alignment is equal"); +CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node16_t), + "Serialization assumes node type alignment is equal"); +CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node48_t), + "Serialization assumes node type alignment is equal"); +CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node256_t), + "Serialization assumes node type alignment is equal"); + +size_t art_size_in_bytes(const art_t *art) { + if (!art_is_shrunken(art)) { + return 0; } - bitset->arraysize = - (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); - bitset->capacity = bitset->arraysize; - if ((bitset->array = (uint64_t *)roaring_calloc( - bitset->arraysize, sizeof(uint64_t))) == NULL) { - roaring_free(bitset); - return NULL; + // Root. + size_t size = sizeof(art->root); + // Node counts. + size += sizeof(art->capacities); + // Alignment for leaves. The rest of the nodes are aligned the same way. + size += + ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + size += art->capacities[t] * ART_NODE_SIZES[t]; } - return bitset; + return size; } -/* Create a copy */ -bitset_t *bitset_copy(const bitset_t *bitset) { - bitset_t *copy = NULL; - /* Allocate the bitset itself. */ - if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; +size_t art_serialize(const art_t *art, char *buf) { + if (buf == NULL) { + return 0; } - memcpy(copy, bitset, sizeof(bitset_t)); - copy->capacity = copy->arraysize; - if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) * - bitset->arraysize)) == NULL) { - roaring_free(copy); - return NULL; + if (!art_is_shrunken(art)) { + return 0; } - memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize); - return copy; -} + const char *initial_buf = buf; -void bitset_clear(bitset_t *bitset) { - memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize); -} + // Root. + memcpy(buf, &art->root, sizeof(art->root)); + buf += sizeof(art->root); -void bitset_fill(bitset_t *bitset) { - memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize); -} + // Node counts. + memcpy(buf, art->capacities, sizeof(art->capacities)); + buf += sizeof(art->capacities); -void bitset_shift_left(bitset_t *bitset, size_t s) { - size_t extra_words = s / 64; - int inword_shift = s % 64; - size_t as = bitset->arraysize; - if (inword_shift == 0) { - bitset_resize(bitset, as + extra_words, false); - // could be done with a memmove - for (size_t i = as + extra_words; i > extra_words; i--) { - bitset->array[i - 1] = bitset->array[i - 1 - extra_words]; - } - } else { - bitset_resize(bitset, as + extra_words + 1, true); - bitset->array[as + extra_words] = - bitset->array[as - 1] >> (64 - inword_shift); - for (size_t i = as + extra_words; i >= extra_words + 2; i--) { - bitset->array[i - 1] = - (bitset->array[i - 1 - extra_words] << inword_shift) | - (bitset->array[i - 2 - extra_words] >> (64 - inword_shift)); + // Alignment for leaves. The rest of the nodes are aligned the same way. + size_t align_bytes = + CROARING_ART_ALIGN_SIZE_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); + memset(buf, 0, align_bytes); + buf += align_bytes; + + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + memcpy(buf, art->nodes[t], size); + buf += size; } - bitset->array[extra_words] = bitset->array[0] << inword_shift; } - for (size_t i = 0; i < extra_words; i++) { - bitset->array[i] = 0; - } -} -void bitset_shift_right(bitset_t *bitset, size_t s) { - size_t extra_words = s / 64; - int inword_shift = s % 64; - size_t as = bitset->arraysize; - if (inword_shift == 0) { - // could be done with a memmove - for (size_t i = 0; i < as - extra_words; i++) { - bitset->array[i] = bitset->array[i + extra_words]; - } - bitset_resize(bitset, as - extra_words, false); + return buf - initial_buf; +} - } else { - for (size_t i = 0; i + extra_words + 1 < as; i++) { - bitset->array[i] = - (bitset->array[i + extra_words] >> inword_shift) | - (bitset->array[i + extra_words + 1] << (64 - inword_shift)); - } - bitset->array[as - extra_words - 1] = - (bitset->array[as - 1] >> inword_shift); - bitset_resize(bitset, as - extra_words, false); +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { + if (buf == NULL || art == NULL) { + return 0; } -} + const char *initial_buf = buf; + art_init_cleared(art); -/* Free memory. */ -void bitset_free(bitset_t *bitset) { - if (bitset == NULL) { - return; + if (maxbytes < sizeof(art->root)) { + return 0; } - roaring_free(bitset->array); - roaring_free(bitset); -} + memcpy(&art->root, buf, sizeof(art->root)); + buf += sizeof(art->root); + maxbytes -= sizeof(art->root); -/* Resize the bitset so that it can support newarraysize * 64 bits. Return true - * in case of success, false for failure. */ -bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) { - if (newarraysize > SIZE_MAX / 64) { - return false; + if (maxbytes < sizeof(art->capacities)) { + return 0; } - size_t smallest = - newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize; - if (bitset->capacity < newarraysize) { - uint64_t *newarray; - size_t newcapacity = bitset->capacity; - if (newcapacity == 0) { - newcapacity = 1; - } - while (newcapacity < newarraysize) { - newcapacity *= 2; - } - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { - return false; - } - bitset->capacity = newcapacity; - bitset->array = newarray; + CROARING_STATIC_ASSERT(sizeof(art->first_free) == sizeof(art->capacities), + "first_free is read from capacities"); + memcpy(art->first_free, buf, sizeof(art->capacities)); + memcpy(art->capacities, buf, sizeof(art->capacities)); + buf += sizeof(art->capacities); + maxbytes -= sizeof(art->capacities); + + // Alignment for leaves. The rest of the nodes are aligned the same way. + const char *before_align = buf; + buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); + if (maxbytes < (size_t)(buf - before_align)) { + return 0; } - if (padwithzeroes && (newarraysize > smallest)) - memset(bitset->array + smallest, 0, - sizeof(uint64_t) * (newarraysize - smallest)); - bitset->arraysize = newarraysize; - return true; // success! -} - -size_t bitset_count(const bitset_t *bitset) { - size_t card = 0; - size_t k = 0; - for (; k + 7 < bitset->arraysize; k += 8) { - card += roaring_hamming(bitset->array[k]); - card += roaring_hamming(bitset->array[k + 1]); - card += roaring_hamming(bitset->array[k + 2]); - card += roaring_hamming(bitset->array[k + 3]); - card += roaring_hamming(bitset->array[k + 4]); - card += roaring_hamming(bitset->array[k + 5]); - card += roaring_hamming(bitset->array[k + 6]); - card += roaring_hamming(bitset->array[k + 7]); - } - for (; k + 3 < bitset->arraysize; k += 4) { - card += roaring_hamming(bitset->array[k]); - card += roaring_hamming(bitset->array[k + 1]); - card += roaring_hamming(bitset->array[k + 2]); - card += roaring_hamming(bitset->array[k + 3]); - } - for (; k < bitset->arraysize; k++) { - card += roaring_hamming(bitset->array[k]); - } - return card; -} - -bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - for (size_t k = 0; k < minlength; ++k) { - b1->array[k] |= b2->array[k]; - } - if (b2->arraysize > b1->arraysize) { - size_t oldsize = b1->arraysize; - if (!bitset_resize(b1, b2->arraysize, false)) return false; - memcpy(b1->array + oldsize, b2->array + oldsize, - (b2->arraysize - oldsize) * sizeof(uint64_t)); - } - return true; -} - -size_t bitset_minimum(const bitset_t *bitset) { - for (size_t k = 0; k < bitset->arraysize; k++) { - uint64_t w = bitset->array[k]; - if (w != 0) { - return roaring_trailing_zeroes(w) + k * 64; - } - } - return 0; -} - -bool bitset_grow(bitset_t *bitset, size_t newarraysize) { - if (newarraysize < bitset->arraysize) { - return false; - } - if (newarraysize > SIZE_MAX / 64) { - return false; - } - if (bitset->capacity < newarraysize) { - uint64_t *newarray; - size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> - roaring_leading_zeroes(newarraysize)) + - 1; - while (newcapacity < newarraysize) { - newcapacity *= 2; - } - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { - return false; - } - bitset->capacity = newcapacity; - bitset->array = newarray; - } - memset(bitset->array + bitset->arraysize, 0, - sizeof(uint64_t) * (newarraysize - bitset->arraysize)); - bitset->arraysize = newarraysize; - return true; // success! -} + maxbytes -= buf - before_align; -size_t bitset_maximum(const bitset_t *bitset) { - for (size_t k = bitset->arraysize; k > 0; k--) { - uint64_t w = bitset->array[k - 1]; - if (w != 0) { - return 63 - roaring_leading_zeroes(w) + (k - 1) * 64; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + if (maxbytes < size) { + return 0; + } + art->nodes[t] = (char *)buf; + buf += size; + maxbytes -= size; } } - return 0; -} - -/* Returns true if bitsets share no common elements, false otherwise. - * - * Performs early-out if common element found. */ -bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - - for (size_t k = 0; k < minlength; k++) { - if ((b1->array[k] & b2->array[k]) != 0) return false; - } - return true; -} - -/* Returns true if bitsets contain at least 1 common element, false if they are - * disjoint. - * - * Performs early-out if common element found. */ -bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - - for (size_t k = 0; k < minlength; k++) { - if ((b1->array[k] & b2->array[k]) != 0) return true; - } - return false; + return buf - initial_buf; } -/* Returns true if b has any bits set in or after b->array[starting_loc]. */ -static bool any_bits_set(const bitset_t *b, size_t starting_loc) { - if (starting_loc >= b->arraysize) { - return false; - } - for (size_t k = starting_loc; k < b->arraysize; k++) { - if (b->array[k] != 0) return true; - } - return false; -} +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace internal +#endif +/* end file src/art/art.c */ +/* begin file src/bitset_util.c */ +#include +#include +#include +#include +#include -/* Returns true if b1 has all of b2's bits set. - * - * Performs early out if a bit is found in b2 that is not found in b1. */ -bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t min_size = b1->arraysize; - if (b1->arraysize > b2->arraysize) { - min_size = b2->arraysize; - } - for (size_t k = 0; k < min_size; k++) { - if ((b1->array[k] & b2->array[k]) != b2->array[k]) { - return false; - } - } - if (b2->arraysize > b1->arraysize) { - /* Need to check if b2 has any bits set beyond b1's array */ - return !any_bits_set(b2, b1->arraysize); - } - return true; -} -size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t answer = 0; - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k + 3 < minlength; k += 4) { - answer += roaring_hamming(b1->array[k] | b2->array[k]); - answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]); - answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]); - answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]); - } - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] | b2->array[k]); - } - if (b2->arraysize > b1->arraysize) { - // k is equal to b1->arraysize - for (; k + 3 < b2->arraysize; k += 4) { - answer += roaring_hamming(b2->array[k]); - answer += roaring_hamming(b2->array[k + 1]); - answer += roaring_hamming(b2->array[k + 2]); - answer += roaring_hamming(b2->array[k + 3]); - } - for (; k < b2->arraysize; ++k) { - answer += roaring_hamming(b2->array[k]); - } - } else { - // k is equal to b2->arraysize - for (; k + 3 < b1->arraysize; k += 4) { - answer += roaring_hamming(b1->array[k]); - answer += roaring_hamming(b1->array[k + 1]); - answer += roaring_hamming(b1->array[k + 2]); - answer += roaring_hamming(b1->array[k + 3]); - } - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); - } - } - return answer; -} +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#ifdef __cplusplus +using namespace ::roaring::internal; +extern "C" { +namespace roaring { +namespace api { +#endif -void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] &= b2->array[k]; - } - for (; k < b1->arraysize; ++k) { - b1->array[k] = 0; // memset could, maybe, be a tiny bit faster - } -} - -size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t answer = 0; - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - for (size_t k = 0; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] & b2->array[k]); - } - return answer; -} - -void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] &= ~(b2->array[k]); - } -} - -size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - size_t answer = 0; - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] & ~(b2->array[k])); - } - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); - } - return answer; -} - -bool bitset_inplace_symmetric_difference( - bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] ^= b2->array[k]; - } - if (b2->arraysize > b1->arraysize) { - size_t oldsize = b1->arraysize; - if (!bitset_resize(b1, b2->arraysize, false)) return false; - memcpy(b1->array + oldsize, b2->array + oldsize, - (b2->arraysize - oldsize) * sizeof(uint64_t)); - } - return true; -} - -size_t bitset_symmetric_difference_count( - const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - size_t answer = 0; - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] ^ b2->array[k]); - } - if (b2->arraysize > b1->arraysize) { - for (; k < b2->arraysize; ++k) { - answer += roaring_hamming(b2->array[k]); - } - } else { - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); - } - } - return answer; -} - -bool bitset_trim(bitset_t *bitset) { - size_t newsize = bitset->arraysize; - while (newsize > 0) { - if (bitset->array[newsize - 1] == 0) - newsize -= 1; - else - break; - } - if (bitset->capacity == newsize) return true; // nothing to do - uint64_t *newarray; - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newsize)) == NULL) { - return false; - } - bitset->array = newarray; - bitset->capacity = newsize; - bitset->arraysize = newsize; - return true; -} - -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/bitset.c */ -/* begin file src/bitset_util.c */ -#include -#include -#include -#include -#include - - -#if CROARING_IS_X64 -#ifndef CROARING_COMPILER_SUPPORTS_AVX512 -#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." -#endif // CROARING_COMPILER_SUPPORTS_AVX512 -#endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wuninitialized" -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif -#ifdef __cplusplus -using namespace ::roaring::internal; -extern "C" { -namespace roaring { -namespace api { -#endif - -#if CROARING_IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif +#if CROARING_IS_X64 +static uint8_t lengthTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; +#endif #if CROARING_IS_X64 ALIGNED(32) @@ -12045,602 +12113,1078 @@ static uint16_t vecDecodeTable_uint16[256][8] = { {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ }; -#endif +#endif + +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +const uint8_t vbmi2_table[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; +size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, + uint32_t *vout, size_t outcapacity, + uint32_t base) { + uint32_t *out = (uint32_t *)vout; + uint32_t *initout = out; + uint32_t *safeout = out + outcapacity; + __m512i base_v = _mm512_set1_epi32(base); + __m512i index_table = _mm512_loadu_si512(vbmi2_table); + size_t i = 0; + + for (; (i < length) && ((out + 64) < safeout); i += 1) { + uint64_t v = words[i]; + __m512i vec = _mm512_maskz_compress_epi8(v, index_table); + + uint8_t advance = (uint8_t)roaring_hamming(v); + + __m512i vbase = + _mm512_add_epi32(base_v, _mm512_set1_epi32((int)(i * 64))); + __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 0)); + __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 1)); + __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 2)); + __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 3)); + + r1 = _mm512_add_epi32(r1, vbase); + r2 = _mm512_add_epi32(r2, vbase); + r3 = _mm512_add_epi32(r3, vbase); + r4 = _mm512_add_epi32(r4, vbase); + _mm512_storeu_si512((__m512i *)out, r1); + _mm512_storeu_si512((__m512i *)(out + 16), r2); + _mm512_storeu_si512((__m512i *)(out + 32), r3); + _mm512_storeu_si512((__m512i *)(out + 48), r4); + + out += advance; + } + + base += i * 64; + + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w &= (w - 1); + } + base += 64; + } + + return out - initout; +} + +// Reference: +// https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/ +size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, + size_t length, uint16_t *vout, + size_t capacity, uint16_t base) { + uint16_t *out = (uint16_t *)vout; + uint16_t *initout = out; + uint16_t *safeout = vout + capacity; + + __m512i base_v = _mm512_set1_epi16(base); + __m512i index_table = _mm512_loadu_si512(vbmi2_table); + size_t i = 0; + + for (; (i < length) && ((out + 64) < safeout); i++) { + uint64_t v = array[i]; + __m512i vec = _mm512_maskz_compress_epi8(v, index_table); + + uint8_t advance = (uint8_t)roaring_hamming(v); + + __m512i vbase = + _mm512_add_epi16(base_v, _mm512_set1_epi16((short)(i * 64))); + __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 0)); + __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 1)); + + r1 = _mm512_add_epi16(r1, vbase); + r2 = _mm512_add_epi16(r2, vbase); + + _mm512_storeu_si512((__m512i *)out, r1); + _mm512_storeu_si512((__m512i *)(out + 32), r2); + out += advance; + } + + base += i * 64; + + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = array[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, sizeof(uint16_t)); + out++; + w &= (w - 1); + } + base += 64; + } + + return out - initout; +} +CROARING_UNTARGET_AVX512 +#endif + +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, + uint32_t *out, size_t outcapacity, + uint32_t base) { + uint32_t *initout = out; + __m256i baseVec = _mm256_set1_epi32(base - 1); + __m256i incVec = _mm256_set1_epi32(64); + __m256i add8 = _mm256_set1_epi32(8); + uint32_t *safeout = out + outcapacity; + size_t i = 0; + for (; (i < length) && (out + 64 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm256_add_epi32(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m256i vecA = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); + __m256i vecB = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm256_add_epi32(baseVec, vecA); + baseVec = _mm256_add_epi32(baseVec, add8); + vecB = _mm256_add_epi32(baseVec, vecB); + baseVec = _mm256_add_epi32(baseVec, add8); + _mm256_storeu_si256((__m256i *)out, vecA); + out += advanceA; + _mm256_storeu_si256((__m256i *)out, vecB); + out += advanceB; + } + } + } + base += i * 64; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w &= (w - 1); + } + base += 64; + } + return out - initout; +} +CROARING_UNTARGET_AVX2 +#endif // CROARING_IS_X64 + +size_t bitset_extract_setbits(const uint64_t *words, size_t length, + uint32_t *out, uint32_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + w &= (w - 1); + } + base += 64; + } + return outpos; +} + +size_t bitset_extract_intersection_setbits_uint16( + const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2, + size_t length, uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words1[i] & words2[i]; + while (w != 0) { + int r = roaring_trailing_zeroes(w); + out[outpos++] = (uint16_t)(r + base); + w &= (w - 1); + } + base += 64; + } + return outpos; +} + +#if CROARING_IS_X64 +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function uses SSE decoding. + */ +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, + uint16_t *out, size_t outcapacity, + uint16_t base) { + uint16_t *initout = out; + __m128i baseVec = _mm_set1_epi16(base - 1); + __m128i incVec = _mm_set1_epi16(64); + __m128i add8 = _mm_set1_epi16(8); + uint16_t *safeout = out + outcapacity; + const int numberofbytes = 2; // process two bytes at a time + size_t i = 0; + for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm_add_epi16(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m128i vecA = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteA]); + __m128i vecB = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm_add_epi16(baseVec, vecA); + baseVec = _mm_add_epi16(baseVec, add8); + vecB = _mm_add_epi16(baseVec, vecB); + baseVec = _mm_add_epi16(baseVec, add8); + _mm_storeu_si128((__m128i *)out, vecA); + out += advanceA; + _mm_storeu_si128((__m128i *)out, vecB); + out += advanceB; + } + } + } + base += (uint16_t)(i * 64); + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = roaring_trailing_zeroes(w); + *out = (uint16_t)(r + base); + out++; + w &= (w - 1); + } + base += 64; + } + return out - initout; +} +CROARING_UNTARGET_AVX2 +#endif + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" (can be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, + uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + int r = roaring_trailing_zeroes(w); + out[outpos++] = (uint16_t)(r + base); + w &= (w - 1); + } + base += 64; + } + return outpos; +} + +#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) -#if CROARING_IS_X64 -#if CROARING_COMPILER_SUPPORTS_AVX512 -CROARING_TARGET_AVX512 -const uint8_t vbmi2_table[64] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; -size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, - uint32_t *vout, size_t outcapacity, - uint32_t base) { - uint32_t *out = (uint32_t *)vout; - uint32_t *initout = out; - uint32_t *safeout = out + outcapacity; - __m512i base_v = _mm512_set1_epi32(base); - __m512i index_table = _mm512_loadu_si512(vbmi2_table); - size_t i = 0; +static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, + uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // TODO: could unroll for performance, see bitset_set_list + // bts is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $-1, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); + return card; +} - for (; (i < length) && ((out + 64) < safeout); i += 1) { - uint64_t v = words[i]; - __m512i vec = _mm512_maskz_compress_epi8(v, index_table); +static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, + uint64_t length) { + uint64_t pos; + const uint16_t *end = list + length; - uint8_t advance = (uint8_t)roaring_hamming(v); + uint64_t shift = 6; + uint64_t offset; + uint64_t load; + for (; list + 3 < end; list += 4) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[1]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[2]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[3]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + } - __m512i vbase = - _mm512_add_epi32(base_v, _mm512_set1_epi32((int)(i * 64))); - __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 0)); - __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 1)); - __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 2)); - __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 3)); + while (list != end) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + list++; + } +} - r1 = _mm512_add_epi32(r1, vbase); - r2 = _mm512_add_epi32(r2, vbase); - r3 = _mm512_add_epi32(r3, vbase); - r4 = _mm512_add_epi32(r4, vbase); - _mm512_storeu_si512((__m512i *)out, r1); - _mm512_storeu_si512((__m512i *)(out + 16), r2); - _mm512_storeu_si512((__m512i *)(out + 32), r3); - _mm512_storeu_si512((__m512i *)(out + 48), r4); +static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // btr is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "btr %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $0, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) + : + /* clobbers */ "memory"); + return card; +} - out += advance; +static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; } + return card; +} - base += i * 64; +static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, + uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; - } - base += 64; +static inline void _scalar_bitset_set_list(uint64_t *words, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; + } +} + +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _asm_bitset_clear_list(words, card, list, length); + } else { + return _scalar_bitset_clear_list(words, card, list, length); + } +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _asm_bitset_set_list_withcard(words, card, list, length); + } else { + return _scalar_bitset_set_list_withcard(words, card, list, length); + } +} + +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + _asm_bitset_set_list(words, list, length); + } else { + _scalar_bitset_set_list(words, list, length); + } +} +#else +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; + } + return card; +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; } + return card; +} - return out - initout; +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; + } } -// Reference: -// https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/ -size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, - size_t length, uint16_t *vout, - size_t capacity, uint16_t base) { - uint16_t *out = (uint16_t *)vout; - uint16_t *initout = out; - uint16_t *safeout = vout + capacity; +#endif - __m512i base_v = _mm512_set1_epi16(base); - __m512i index_table = _mm512_loadu_si512(vbmi2_table); - size_t i = 0; +/* flip specified bits */ +/* TODO: consider whether worthwhile to make an asm version */ - for (; (i < length) && ((out + 64) < safeout); i++) { - uint64_t v = array[i]; - __m512i vec = _mm512_maskz_compress_epi8(v, index_table); +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + // todo: is a branch here all that bad? + card += + (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 + words[offset] = newload; + list++; + } + return card; +} - uint8_t advance = (uint8_t)roaring_hamming(v); +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + words[offset] = newload; + list++; + } +} - __m512i vbase = - _mm512_add_epi16(base_v, _mm512_set1_epi16((short)(i * 64))); - __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 0)); - __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 1)); +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace api { +#endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +/* end file src/bitset_util.c */ +/* begin file src/bitset.c */ +#include +#include +#include +#include +#include - r1 = _mm512_add_epi16(r1, vbase); - r2 = _mm512_add_epi16(r2, vbase); - _mm512_storeu_si512((__m512i *)out, r1); - _mm512_storeu_si512((__m512i *)(out + 32), r2); - out += advance; - } +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif - base += i * 64; +extern inline void bitset_print(const bitset_t *b); +extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, + void *ptr); +extern inline size_t bitset_next_set_bits(const bitset_t *bitset, + size_t *buffer, size_t capacity, + size_t *startfrom); +extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag); +extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i); +extern inline void bitset_set(bitset_t *bitset, size_t i); +extern inline bool bitset_get(const bitset_t *bitset, size_t i); +extern inline size_t bitset_size_in_words(const bitset_t *bitset); +extern inline size_t bitset_size_in_bits(const bitset_t *bitset); +extern inline size_t bitset_size_in_bytes(const bitset_t *bitset); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = array[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, sizeof(uint16_t)); - out++; - w ^= t; - } - base += 64; +/* Create a new bitset. Return NULL in case of failure. */ +bitset_t *bitset_create(void) { + bitset_t *bitset = NULL; + /* Allocate the bitset itself. */ + if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; } + bitset->array = NULL; + bitset->arraysize = 0; + bitset->capacity = 0; + return bitset; +} - return out - initout; +/* Create a new bitset able to contain size bits. Return NULL in case of + * failure. */ +bitset_t *bitset_create_with_capacity(size_t size) { + bitset_t *bitset = NULL; + /* Allocate the bitset itself. */ + if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; + } + bitset->arraysize = + (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); + bitset->capacity = bitset->arraysize; + if ((bitset->array = (uint64_t *)roaring_calloc( + bitset->arraysize, sizeof(uint64_t))) == NULL) { + roaring_free(bitset); + return NULL; + } + return bitset; } -CROARING_UNTARGET_AVX512 -#endif -CROARING_TARGET_AVX2 -size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, - uint32_t *out, size_t outcapacity, - uint32_t base) { - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; - } - } +/* Create a copy */ +bitset_t *bitset_copy(const bitset_t *bitset) { + bitset_t *copy = NULL; + /* Allocate the bitset itself. */ + if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; - } - base += 64; + memcpy(copy, bitset, sizeof(bitset_t)); + copy->capacity = copy->arraysize; + if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) * + bitset->arraysize)) == NULL) { + roaring_free(copy); + return NULL; } - return out - initout; + memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize); + return copy; } -CROARING_UNTARGET_AVX2 -#endif // CROARING_IS_X64 -size_t bitset_extract_setbits(const uint64_t *words, size_t length, - uint32_t *out, uint32_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - w ^= t; +void bitset_clear(bitset_t *bitset) { + memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize); +} + +void bitset_fill(bitset_t *bitset) { + memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize); +} + +void bitset_shift_left(bitset_t *bitset, size_t s) { + size_t extra_words = s / 64; + int inword_shift = s % 64; + size_t as = bitset->arraysize; + if (inword_shift == 0) { + bitset_resize(bitset, as + extra_words, false); + // could be done with a memmove + for (size_t i = as + extra_words; i > extra_words; i--) { + bitset->array[i - 1] = bitset->array[i - 1 - extra_words]; + } + } else { + bitset_resize(bitset, as + extra_words + 1, true); + bitset->array[as + extra_words] = + bitset->array[as - 1] >> (64 - inword_shift); + for (size_t i = as + extra_words; i >= extra_words + 2; i--) { + bitset->array[i - 1] = + (bitset->array[i - 1 - extra_words] << inword_shift) | + (bitset->array[i - 2 - extra_words] >> (64 - inword_shift)); } - base += 64; + bitset->array[extra_words] = bitset->array[0] << inword_shift; } - return outpos; -} - -size_t bitset_extract_intersection_setbits_uint16( - const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2, - size_t length, uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words1[i] & words2[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = roaring_trailing_zeroes(w); - out[outpos++] = (uint16_t)(r + base); - w ^= t; - } - base += 64; + for (size_t i = 0; i < extra_words; i++) { + bitset->array[i] = 0; } - return outpos; } -#if CROARING_IS_X64 -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function uses SSE decoding. - */ -CROARING_TARGET_AVX2 -size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_loadu_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_loadu_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; - } +void bitset_shift_right(bitset_t *bitset, size_t s) { + size_t extra_words = s / 64; + int inword_shift = s % 64; + size_t as = bitset->arraysize; + if (inword_shift == 0) { + // could be done with a memmove + for (size_t i = 0; i < as - extra_words; i++) { + bitset->array[i] = bitset->array[i + extra_words]; } - } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); - int r = roaring_trailing_zeroes(w); - *out = (uint16_t)(r + base); - out++; - w ^= t; + bitset_resize(bitset, as - extra_words, false); + + } else { + for (size_t i = 0; i + extra_words + 1 < as; i++) { + bitset->array[i] = + (bitset->array[i + extra_words] >> inword_shift) | + (bitset->array[i + extra_words + 1] << (64 - inword_shift)); } - base += 64; + bitset->array[as - extra_words - 1] = + (bitset->array[as - 1] >> inword_shift); + bitset_resize(bitset, as - extra_words, false); } - return out - initout; } -CROARING_UNTARGET_AVX2 -#endif -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = roaring_trailing_zeroes(w); - out[outpos++] = (uint16_t)(r + base); - w ^= t; - } - base += 64; +/* Free memory. */ +void bitset_free(bitset_t *bitset) { + if (bitset == NULL) { + return; } - return outpos; + roaring_free(bitset->array); + roaring_free(bitset); } -#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) - -static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, - uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); - return card; +/* Resize the bitset so that it can support newarraysize * 64 bits. Return true + * in case of success, false for failure. */ +bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) { + if (newarraysize > SIZE_MAX / 64) { + return false; + } + size_t smallest = + newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize; + if (bitset->capacity < newarraysize) { + uint64_t *newarray; + size_t newcapacity = bitset->capacity; + if (newcapacity == 0) { + newcapacity = 1; + } + while (newcapacity < newarraysize) { + newcapacity *= 2; + } + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { + return false; + } + bitset->capacity = newcapacity; + bitset->array = newarray; + } + if (padwithzeroes && (newarraysize > smallest)) + memset(bitset->array + smallest, 0, + sizeof(uint64_t) * (newarraysize - smallest)); + bitset->arraysize = newarraysize; + return true; // success! } -static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, - uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; - - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +size_t bitset_count(const bitset_t *bitset) { + size_t card = 0; + size_t k = 0; + for (; k + 7 < bitset->arraysize; k += 8) { + card += roaring_hamming(bitset->array[k]); + card += roaring_hamming(bitset->array[k + 1]); + card += roaring_hamming(bitset->array[k + 2]); + card += roaring_hamming(bitset->array[k + 3]); + card += roaring_hamming(bitset->array[k + 4]); + card += roaring_hamming(bitset->array[k + 5]); + card += roaring_hamming(bitset->array[k + 6]); + card += roaring_hamming(bitset->array[k + 7]); + } + for (; k + 3 < bitset->arraysize; k += 4) { + card += roaring_hamming(bitset->array[k]); + card += roaring_hamming(bitset->array[k + 1]); + card += roaring_hamming(bitset->array[k + 2]); + card += roaring_hamming(bitset->array[k + 3]); + } + for (; k < bitset->arraysize; k++) { + card += roaring_hamming(bitset->array[k]); + } + return card; +} + +bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + for (size_t k = 0; k < minlength; ++k) { + b1->array[k] |= b2->array[k]; + } + if (b2->arraysize > b1->arraysize) { + size_t oldsize = b1->arraysize; + if (!bitset_resize(b1, b2->arraysize, false)) return false; + memcpy(b1->array + oldsize, b2->array + oldsize, + (b2->arraysize - oldsize) * sizeof(uint64_t)); } + return true; +} - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - list++; +bool bitset_empty(const bitset_t *bitset) { + for (size_t k = 0; k < bitset->arraysize; k++) { + if (bitset->array[k] != 0) { + return false; + } } + return true; } -static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; +size_t bitset_minimum(const bitset_t *bitset) { + for (size_t k = 0; k < bitset->arraysize; k++) { + uint64_t w = bitset->array[k]; + if (w != 0) { + return roaring_trailing_zeroes(w) + k * 64; + } + } + return SIZE_MAX; } -static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; +bool bitset_grow(bitset_t *bitset, size_t newarraysize) { + if (newarraysize < bitset->arraysize) { + return false; } - return card; + if (newarraysize > SIZE_MAX / 64) { + return false; + } + if (bitset->capacity < newarraysize) { + uint64_t *newarray; + size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> + roaring_leading_zeroes(newarraysize)) + + 1; + while (newcapacity < newarraysize) { + newcapacity *= 2; + } + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { + return false; + } + bitset->capacity = newcapacity; + bitset->array = newarray; + } + memset(bitset->array + bitset->arraysize, 0, + sizeof(uint64_t) * (newarraysize - bitset->arraysize)); + bitset->arraysize = newarraysize; + return true; // success! } -static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, - uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; +size_t bitset_maximum(const bitset_t *bitset) { + for (size_t k = bitset->arraysize; k > 0; k--) { + uint64_t w = bitset->array[k - 1]; + if (w != 0) { + return 63 - roaring_leading_zeroes(w) + (k - 1) * 64; + } } - return card; + return 0; } -static inline void _scalar_bitset_set_list(uint64_t *words, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; +/* Returns true if bitsets share no common elements, false otherwise. + * + * Performs early-out if common element found. */ +bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + + for (size_t k = 0; k < minlength; k++) { + if ((b1->array[k] & b2->array[k]) != 0) return false; } + return true; } -uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _asm_bitset_clear_list(words, card, list, length); - } else { - return _scalar_bitset_clear_list(words, card, list, length); +/* Returns true if bitsets contain at least 1 common element, false if they are + * disjoint. + * + * Performs early-out if common element found. */ +bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + + for (size_t k = 0; k < minlength; k++) { + if ((b1->array[k] & b2->array[k]) != 0) return true; } + return false; } -uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _asm_bitset_set_list_withcard(words, card, list, length); - } else { - return _scalar_bitset_set_list_withcard(words, card, list, length); +/* Returns true if b has any bits set in or after b->array[starting_loc]. */ +static bool any_bits_set(const bitset_t *b, size_t starting_loc) { + if (starting_loc >= b->arraysize) { + return false; + } + for (size_t k = starting_loc; k < b->arraysize; k++) { + if (b->array[k] != 0) return true; } + return false; } -void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - _asm_bitset_set_list(words, list, length); +/* Returns true if b1 has all of b2's bits set. + * + * Performs early out if a bit is found in b2 that is not found in b1. */ +bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t min_size = b1->arraysize; + if (b1->arraysize > b2->arraysize) { + min_size = b2->arraysize; + } + for (size_t k = 0; k < min_size; k++) { + if ((b1->array[k] & b2->array[k]) != b2->array[k]) { + return false; + } + } + if (b2->arraysize > b1->arraysize) { + /* Need to check if b2 has any bits set beyond b1's array */ + return !any_bits_set(b2, b1->arraysize); + } + return true; +} + +size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t answer = 0; + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k + 3 < minlength; k += 4) { + answer += roaring_hamming(b1->array[k] | b2->array[k]); + answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]); + answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]); + answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]); + } + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] | b2->array[k]); + } + if (b2->arraysize > b1->arraysize) { + // k is equal to b1->arraysize + for (; k + 3 < b2->arraysize; k += 4) { + answer += roaring_hamming(b2->array[k]); + answer += roaring_hamming(b2->array[k + 1]); + answer += roaring_hamming(b2->array[k + 2]); + answer += roaring_hamming(b2->array[k + 3]); + } + for (; k < b2->arraysize; ++k) { + answer += roaring_hamming(b2->array[k]); + } } else { - _scalar_bitset_set_list(words, list, length); + // k is equal to b2->arraysize + for (; k + 3 < b1->arraysize; k += 4) { + answer += roaring_hamming(b1->array[k]); + answer += roaring_hamming(b1->array[k + 1]); + answer += roaring_hamming(b1->array[k + 2]); + answer += roaring_hamming(b1->array[k + 3]); + } + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); + } + } + return answer; +} + +void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] &= b2->array[k]; + } + for (; k < b1->arraysize; ++k) { + b1->array[k] = 0; // memset could, maybe, be a tiny bit faster } } -#else -uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; + +size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t answer = 0; + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + for (size_t k = 0; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] & b2->array[k]); } - return card; + return answer; } -uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; +void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] &= ~(b2->array[k]); } - return card; } -void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; +size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + size_t answer = 0; + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] & ~(b2->array[k])); + } + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); } + return answer; } -#endif - -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ +bool bitset_inplace_symmetric_difference( + bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] ^= b2->array[k]; + } + if (b2->arraysize > b1->arraysize) { + size_t oldsize = b1->arraysize; + if (!bitset_resize(b1, b2->arraysize, false)) return false; + memcpy(b1->array + oldsize, b2->array + oldsize, + (b2->arraysize - oldsize) * sizeof(uint64_t)); + } + return true; +} -uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - words[offset] = newload; - list++; +size_t bitset_symmetric_difference_count( + const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + size_t answer = 0; + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] ^ b2->array[k]); } - return card; + if (b2->arraysize > b1->arraysize) { + for (; k < b2->arraysize; ++k) { + answer += roaring_hamming(b2->array[k]); + } + } else { + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); + } + } + return answer; } -void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - words[offset] = newload; - list++; +bool bitset_trim(bitset_t *bitset) { + size_t newsize = bitset->arraysize; + while (newsize > 0) { + if (bitset->array[newsize - 1] == 0) + newsize -= 1; + else + break; + } + if (bitset->capacity == newsize) return true; // nothing to do + uint64_t *newarray; + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newsize)) == NULL) { + return false; } + bitset->array = newarray; + bitset->capacity = newsize; + bitset->arraysize = newsize; + return true; } #ifdef __cplusplus } } -} // extern "C" { namespace roaring { namespace api { +} // extern "C" { namespace roaring { namespace internal { #endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif/* end file src/bitset_util.c */ +/* end file src/bitset.c */ /* begin file src/containers/array.c */ /* * array.c @@ -13215,9 +13759,6 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base, * bitset.c * */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif #include #include #include @@ -14111,7 +14652,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ } \ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ const bitset_container_t *src_2) { \ - printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_1 = src_1->words; \ const uint64_t * __restrict__ words_2 = src_2->words; \ int32_t sum = 0; \ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ @@ -16522,215 +17063,67 @@ bool bitset_bitset_container_intersection_inplace( if (*dst != NULL) { CAST_array(*dst)->cardinality = newCardinality; bitset_extract_intersection_setbits_uint16( - src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS, - CAST_array(*dst)->array, 0); - } - return false; // not a bitset -} - -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/mixed_intersection.c */ -/* begin file src/containers/mixed_negation.c */ -/* - * mixed_negation.c - * - */ - -#include -#include - - -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif - -// TODO: make simplified and optimized negation code across -// the full range. - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap -' * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst) { - uint64_t card = UINT64_C(1 << 16); - bitset_container_set_all(dst); - - if (src->cardinality == 0) { - return; - } - - dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, - (uint64_t)src->cardinality); -} - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, - container_t **dst) { - return bitset_container_negation_range(src, 0, (1 << 16), dst); -} - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, - container_t **dst) { - return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); -} - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, container_t **dst) { - return run_container_negation_range(src, 0, (1 << 16), dst); -} - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, container_t **dst) { - return run_container_negation_range_inplace(src, 0, (1 << 16), dst); + src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS, + CAST_array(*dst)->array, 0); + } + return false; // not a bitset } -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_intersection.c */ +/* begin file src/containers/mixed_negation.c */ +/* + * mixed_negation.c + * */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - container_t **dst) { - /* close port of the Java implementation */ - if (range_start >= range_end) { - *dst = array_container_clone(src); - return false; - } - int32_t start_index = - binarySearch(src->array, src->cardinality, (uint16_t)range_start); - if (start_index < 0) start_index = -start_index - 1; +#include +#include - int32_t last_index = - binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); - if (last_index < 0) last_index = -last_index - 2; - const int32_t current_values_in_range = last_index - start_index + 1; - const int32_t span_to_be_flipped = range_end - range_start; - const int32_t new_values_in_range = - span_to_be_flipped - current_values_in_range; - const int32_t cardinality_change = - new_values_in_range - current_values_in_range; - const int32_t new_cardinality = src->cardinality + cardinality_change; +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif - if (new_cardinality > DEFAULT_MAX_SIZE) { - bitset_container_t *temp = bitset_container_from_array(src); - bitset_flip_range(temp->words, (uint32_t)range_start, - (uint32_t)range_end); - temp->cardinality = new_cardinality; - *dst = temp; - return true; - } +// TODO: make simplified and optimized negation code across +// the full range. - array_container_t *arr = - array_container_create_given_capacity(new_cardinality); - *dst = (container_t *)arr; - if (new_cardinality == 0) { - arr->cardinality = new_cardinality; - return false; // we are done. - } - // copy stuff before the active area - memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap +' * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, + bitset_container_t *dst) { + uint64_t card = UINT64_C(1 << 16); + bitset_container_set_all(dst); - // work on the range - int32_t out_pos = start_index, in_pos = start_index; - int32_t val_in_range = range_start; - for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { - if ((uint16_t)val_in_range != src->array[in_pos]) { - arr->array[out_pos++] = (uint16_t)val_in_range; - } else { - ++in_pos; - } + if (src->cardinality == 0) { + return; } - for (; val_in_range < range_end; ++val_in_range) - arr->array[out_pos++] = (uint16_t)val_in_range; - - // content after the active range - memcpy(arr->array + out_pos, src->array + (last_index + 1), - (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); - arr->cardinality = new_cardinality; - return false; -} -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. - */ - -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, - container_t **dst) { - bool ans = array_container_negation_range(src, range_start, range_end, dst); - // TODO : try a real inplace version - array_container_free(src); - return ans; + dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, + (uint64_t)src->cardinality); } -/* Negation across a range of the container +/* Negation across the entire range of the container * Compute the negation of src and write the result * to *dst. A true return value indicates a bitset result, * otherwise the result is an array container. * We assume that dst is not pre-allocated. In * case of failure, *dst will be NULL. */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - container_t **dst) { - // TODO maybe consider density-based estimate - // and sometimes build result directly as array, with - // conversion back to bitset if wrong. Or determine - // actual result cardinality, then go directly for the known final cont. - - // keep computation using bitsets as long as possible. - bitset_container_t *t = bitset_container_clone(src); - bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); - t->cardinality = bitset_container_compute_cardinality(t); - - if (t->cardinality > DEFAULT_MAX_SIZE) { - *dst = t; - return true; - } else { - *dst = array_container_from_bitset(t); - bitset_container_free(t); - return false; - } +bool bitset_container_negation(const bitset_container_t *src, + container_t **dst) { + return bitset_container_negation_range(src, 0, (1 << 16), dst); } /* inplace version */ @@ -16742,655 +17135,433 @@ bool bitset_container_negation_range(const bitset_container_t *src, * to free the container. * In all cases, the result is in *dst. */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, - container_t **dst) { - bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); - src->cardinality = bitset_container_compute_cardinality(src); - if (src->cardinality > DEFAULT_MAX_SIZE) { - *dst = src; - return true; - } - *dst = array_container_from_bitset(src); - bitset_container_free(src); - return false; -} - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - container_t **dst) { - uint8_t return_typecode; - - // follows the Java implementation - if (range_end <= range_start) { - *dst = run_container_clone(src); - return RUN_CONTAINER_TYPE; - } - - run_container_t *ans = run_container_create_given_capacity( - src->n_runs + 1); // src->n_runs + 1); - int k = 0; - for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { - ans->runs[k] = src->runs[k]; - ans->n_runs++; - } - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - - for (; k < src->n_runs; ++k) { - run_container_smart_append_exclusive(ans, src->runs[k].value, - src->runs[k].length); - } - - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); - - return return_typecode; +bool bitset_container_negation_inplace(bitset_container_t *src, + container_t **dst) { + return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); } -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, - container_t **dst) { - uint8_t return_typecode; - - if (range_end <= range_start) { - *dst = src; - return RUN_CONTAINER_TYPE; - } - - // TODO: efficient special case when range is 0 to 65535 inclusive - - if (src->capacity == src->n_runs) { - // no excess room. More checking to see if result can fit - bool last_val_before_range = false; - bool first_val_in_range = false; - bool last_val_in_range = false; - bool first_val_past_range = false; - - if (range_start > 0) - last_val_before_range = - run_container_contains(src, (uint16_t)(range_start - 1)); - first_val_in_range = run_container_contains(src, (uint16_t)range_start); - - if (last_val_before_range == first_val_in_range) { - last_val_in_range = - run_container_contains(src, (uint16_t)(range_end - 1)); - if (range_end != 0x10000) - first_val_past_range = - run_container_contains(src, (uint16_t)range_end); - - if (last_val_in_range == - first_val_past_range) { // no space for inplace - int ans = run_container_negation_range(src, range_start, - range_end, dst); - run_container_free(src); - return ans; - } - } - } - // all other cases: result will fit - - run_container_t *ans = src; - int my_nbr_runs = src->n_runs; - - ans->n_runs = 0; - int k = 0; - for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { - // ans->runs[k] = src->runs[k]; (would be self-copy) - ans->n_runs++; - } - - // as with Java implementation, use locals to give self a buffer of depth 1 - rle16_t buffered = CROARING_MAKE_RLE16(0, 0); - rle16_t next = buffered; - if (k < my_nbr_runs) buffered = src->runs[k]; - - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, container_t **dst) { + return run_container_negation_range(src, 0, (1 << 16), dst); +} - for (; k < my_nbr_runs; ++k) { - if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, container_t **dst) { + return run_container_negation_range_inplace(src, 0, (1 << 16), dst); +} - run_container_smart_append_exclusive(ans, buffered.value, - buffered.length); - buffered = next; +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range(const array_container_t *src, + const int range_start, const int range_end, + container_t **dst) { + /* close port of the Java implementation */ + if (range_start >= range_end) { + *dst = array_container_clone(src); + return false; } - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); + int32_t start_index = + binarySearch(src->array, src->cardinality, (uint16_t)range_start); + if (start_index < 0) start_index = -start_index - 1; - return return_typecode; -} + int32_t last_index = + binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); + if (last_index < 0) last_index = -last_index - 2; -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/mixed_negation.c */ -/* begin file src/containers/mixed_subset.c */ + const int32_t current_values_in_range = last_index - start_index + 1; + const int32_t span_to_be_flipped = range_end - range_start; + const int32_t new_values_in_range = + span_to_be_flipped - current_values_in_range; + const int32_t cardinality_change = + new_values_in_range - current_values_in_range; + const int32_t new_cardinality = src->cardinality + cardinality_change; -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif + if (new_cardinality > DEFAULT_MAX_SIZE) { + bitset_container_t *temp = bitset_container_from_array(src); + bitset_flip_range(temp->words, (uint32_t)range_start, + (uint32_t)range_end); + temp->cardinality = new_cardinality; + *dst = temp; + return true; + } -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < container1->cardinality) { - return false; - } + array_container_t *arr = + array_container_create_given_capacity(new_cardinality); + *dst = (container_t *)arr; + if (new_cardinality == 0) { + arr->cardinality = new_cardinality; + return false; // we are done. } - for (int i = 0; i < container1->cardinality; ++i) { - if (!bitset_container_contains(container2, container1->array[i])) { - return false; + // copy stuff before the active area + memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); + + // work on the range + int32_t out_pos = start_index, in_pos = start_index; + int32_t val_in_range = range_start; + for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { + if ((uint16_t)val_in_range != src->array[in_pos]) { + arr->array[out_pos++] = (uint16_t)val_in_range; + } else { + ++in_pos; } } - return true; + for (; val_in_range < range_end; ++val_in_range) + arr->array[out_pos++] = (uint16_t)val_in_range; + + // content after the active range + memcpy(arr->array + out_pos, src->array + (last_index + 1), + (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); + arr->cardinality = new_cardinality; + return false; } -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) > container2->cardinality) - return false; - int32_t start_pos = -1, stop_pos = -1; - for (int i = 0; i < container1->n_runs; ++i) { - int32_t start = container1->runs[i].value; - int32_t stop = start + container1->runs[i].length; - start_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, start); - stop_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, stop); - if (stop_pos == container2->cardinality) { - return false; - } else if (stop_pos - start_pos != stop - start || - container2->array[start_pos] != start || - container2->array[stop_pos] != stop) { - return false; - } - } - return true; +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. + */ + +bool array_container_negation_range_inplace(array_container_t *src, + const int range_start, + const int range_end, + container_t **dst) { + bool ans = array_container_negation_range(src, range_start, range_end, dst); + // TODO : try a real inplace version + array_container_free(src); + return ans; } -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2) { - if (container1->cardinality > run_container_cardinality(container2)) - return false; - int i_array = 0, i_run = 0; - while (i_array < container1->cardinality && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - if (container1->array[i_array] < start) { - return false; - } else if (container1->array[i_array] > stop) { - i_run++; - } else { // the value of the array is in the run - i_array++; - } - } - if (i_array == container1->cardinality) { +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range(const bitset_container_t *src, + const int range_start, const int range_end, + container_t **dst) { + // TODO maybe consider density-based estimate + // and sometimes build result directly as array, with + // conversion back to bitset if wrong. Or determine + // actual result cardinality, then go directly for the known final cont. + + // keep computation using bitsets as long as possible. + bitset_container_t *t = bitset_container_clone(src); + bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); + t->cardinality = bitset_container_compute_cardinality(t); + + if (t->cardinality > DEFAULT_MAX_SIZE) { + *dst = t; return true; } else { + *dst = array_container_from_bitset(t); + bitset_container_free(t); return false; } } -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - // todo: this code could be much faster - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card < run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; -} - -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2) { - // todo: this code could be much faster - if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container1->cardinality > run_container_cardinality(container2)) { - return false; - } - } - int32_t i_bitset = 0, i_run = 0; - while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && - i_run < container2->n_runs) { - uint64_t w = container1->words[i_bitset]; - while (w != 0 && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - uint64_t t = w & (~w + 1); - uint16_t r = i_bitset * 64 + roaring_trailing_zeroes(w); - if (r < start) { - return false; - } else if (r > stop) { - i_run++; - continue; - } else { - w ^= t; - } - } - if (w == 0) { - i_bitset++; - } else { - return false; - } - } - if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { - // terminated iterating on the run containers, check that rest of bitset - // is empty - for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { - if (container1->words[i_bitset] != 0) { - return false; - } - } +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace(bitset_container_t *src, + const int range_start, + const int range_end, + container_t **dst) { + bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); + src->cardinality = bitset_container_compute_cardinality(src); + if (src->cardinality > DEFAULT_MAX_SIZE) { + *dst = src; + return true; } - return true; + *dst = array_container_from_bitset(src); + bitset_container_free(src); + return false; } -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/containers/mixed_subset.c */ -/* begin file src/containers/mixed_union.c */ -/* - * mixed_union.c - * +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. */ +int run_container_negation_range(const run_container_t *src, + const int range_start, const int range_end, + container_t **dst) { + uint8_t return_typecode; -#include -#include + // follows the Java implementation + if (range_end <= range_start) { + *dst = run_container_clone(src); + return RUN_CONTAINER_TYPE; + } + run_container_t *ans = run_container_create_given_capacity( + src->n_runs + 1); // src->n_runs + 1); + int k = 0; + for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { + ans->runs[k] = src->runs[k]; + ans->n_runs++; + } -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); -/* Compute the union of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - dst->cardinality = (int32_t)bitset_set_list_withcard( - dst->words, dst->cardinality, src_1->array, src_1->cardinality); -} + for (; k < src->n_runs; ++k) { + run_container_smart_append_exclusive(ans, src->runs[k].value, + src->runs[k].length); + } -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_set_list(dst->words, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->words, rle.value, rle.length); - } - dst->cardinality = bitset_container_compute_cardinality(dst); + return return_typecode; } -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->words, rle.value, rle.length); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; -} +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_range_inplace(run_container_t *src, + const int range_start, + const int range_end, + container_t **dst) { + uint8_t return_typecode; -// why do we leave the result as a run container?? -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - if (run_container_is_full(src_2)) { - run_container_copy(src_2, dst); - return; - } - // TODO: see whether the "2*" is spurious - run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t previousrle; - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(dst, src_2->runs[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(dst, src_1->array[arraypos]); - arraypos++; - } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } + if (range_end <= range_start) { + *dst = src; + return RUN_CONTAINER_TYPE; } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; + + // TODO: efficient special case when range is 0 to 65535 inclusive + + if (src->capacity == src->n_runs) { + // no excess room. More checking to see if result can fit + bool last_val_before_range = false; + bool first_val_in_range = false; + bool last_val_in_range = false; + bool first_val_past_range = false; + + if (range_start > 0) + last_val_before_range = + run_container_contains(src, (uint16_t)(range_start - 1)); + first_val_in_range = run_container_contains(src, (uint16_t)range_start); + + if (last_val_before_range == first_val_in_range) { + last_val_in_range = + run_container_contains(src, (uint16_t)(range_end - 1)); + if (range_end != 0x10000) + first_val_past_range = + run_container_contains(src, (uint16_t)range_end); + + if (last_val_in_range == + first_val_past_range) { // no space for inplace + int ans = run_container_negation_range(src, range_start, + range_end, dst); + run_container_free(src); + return ans; + } } } -} + // all other cases: result will fit -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return; + run_container_t *ans = src; + int my_nbr_runs = src->n_runs; + + ans->n_runs = 0; + int k = 0; + for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { + // ans->runs[k] = src->runs[k]; (would be self-copy) + ans->n_runs++; } - const int32_t maxoutput = src_1->cardinality + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_2->n_runs; - if (src_2->capacity < neededcapacity) - run_container_grow(src_2, neededcapacity, true); - memmove(src_2->runs + maxoutput, src_2->runs, - src_2->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc2 = src_2->runs + maxoutput; - int32_t rlepos = 0; - int32_t arraypos = 0; - int src2nruns = src_2->n_runs; - src_2->n_runs = 0; - rle16_t previousrle; + // as with Java implementation, use locals to give self a buffer of depth 1 + rle16_t buffered = CROARING_MAKE_RLE16(0, 0); + rle16_t next = buffered; + if (k < my_nbr_runs) buffered = src->runs[k]; - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(src_2, src_1->array[arraypos]); - arraypos++; - } + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src2nruns) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } + for (; k < my_nbr_runs; ++k) { + if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; + + run_container_smart_append_exclusive(ans, buffered.value, + buffered.length); + buffered = next; } + + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); + + return return_typecode; } -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_negation.c */ +/* begin file src/containers/mixed_subset.c */ + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif + +bool array_container_is_subset_bitset(const array_container_t* container1, + const bitset_container_t* container2) { + if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container2->cardinality < container1->cardinality) { + return false; } - return false; // not a bitset } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->words, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset + for (int i = 0; i < container1->cardinality; ++i) { + if (!bitset_container_contains(container2, container1->array[i])) { + return false; } } - return returnval; + return true; } -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - if (src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity( - 2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, - src_1->cardinality * sizeof(uint16_t)); - // In theory, we could use fast_union_uint16, but it is unsafe. It - // fails with Intel compilers in particular. - // https://github.com/RoaringBitmap/CRoaring/pull/452 - // See report https://github.com/RoaringBitmap/CRoaring/issues/476 - src_1->cardinality = (int32_t)union_uint16( - src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->words, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - if (src_1->capacity < ourbitset->cardinality) { - array_container_grow(src_1, ourbitset->cardinality, false); - } - - bitset_extract_setbits_uint16(ourbitset->words, - BITSET_CONTAINER_SIZE_IN_WORDS, - src_1->array, 0); - src_1->cardinality = ourbitset->cardinality; - *dst = src_1; - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset +bool run_container_is_subset_array(const run_container_t* container1, + const array_container_t* container2) { + if (run_container_cardinality(container1) > container2->cardinality) + return false; + int32_t start_pos = -1, stop_pos = -1; + for (int i = 0; i < container1->n_runs; ++i) { + int32_t start = container1->runs[i].value; + int32_t stop = start + container1->runs[i].length; + start_pos = advanceUntil(container2->array, stop_pos, + container2->cardinality, start); + stop_pos = advanceUntil(container2->array, stop_pos, + container2->cardinality, stop); + if (stop_pos == container2->cardinality) { + return false; + } else if (stop_pos - start_pos != stop - start || + container2->array[start_pos] != start || + container2->array[stop_pos] != stop) { + return false; } } - return returnval; + return true; } -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - // - // We assume that operations involving bitset containers will be faster than - // operations involving solely array containers, except maybe when array - // containers are small. Indeed, for example, it is cheap to compute the - // union between an array and a bitset container, generally more so than - // between a large array and another array. So it is advantageous to favour - // bitset containers during the computation. Of course, if we convert array - // containers eagerly to bitset containers, we may later need to revert the - // bitset containers to array containerr to satisfy the Roaring format - // requirements, but such one-time conversions at the end may not be overly - // expensive. We arrived to this design based on extensive benchmarking. - // - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught +bool array_container_is_subset_run(const array_container_t* container1, + const run_container_t* container2) { + if (container1->cardinality > run_container_cardinality(container2)) + return false; + int i_array = 0, i_run = 0; + while (i_array < container1->cardinality && i_run < container2->n_runs) { + uint32_t start = container2->runs[i_run].value; + uint32_t stop = start + container2->runs[i_run].length; + if (container1->array[i_array] < start) { + return false; + } else if (container1->array[i_array] > stop) { + i_run++; + } else { // the value of the array is in the run + i_array++; } - return false; // not a bitset } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + if (i_array == container1->cardinality) { + return true; + } else { + return false; } - return returnval; } -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - // - // We assume that operations involving bitset containers will be faster than - // operations involving solely array containers, except maybe when array - // containers are small. Indeed, for example, it is cheap to compute the - // union between an array and a bitset container, generally more so than - // between a large array and another array. So it is advantageous to favour - // bitset containers during the computation. Of course, if we convert array - // containers eagerly to bitset containers, we may later need to revert the - // bitset containers to array containerr to satisfy the Roaring format - // requirements, but such one-time conversions at the end may not be overly - // expensive. We arrived to this design based on extensive benchmarking. - // - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - if (src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity( - 2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, CAST_array(*dst)); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, - src_1->cardinality * sizeof(uint16_t)); - /* - Next line is safe: - - We just need to focus on the reading and writing performed on - array1. In `union_vector16`, both vectorized and scalar code still - obey the basic rule: read from two inputs, do the union, and then - write the output. - - Let's say the length(cardinality) of input2 is L2: - ``` - |<- L2 ->| - array1: [output--- |input 1---|---] - array2: [input 2---] - ``` - Let's define 3 __m128i pointers, `pos1` starts from `input1`, - `pos2` starts from `input2`, these 2 point at the next byte to - read, `out` starts from `output`, pointing at the next byte to - overwrite. - ``` - array1: [output--- |input 1---|---] - ^ ^ - out pos1 - array2: [input 2---] - ^ - pos2 - ``` - The union output always contains less or equal number of elements - than all inputs added, so we have: - ``` - out <= pos1 + pos2 - ``` - therefore: - ``` - out <= pos1 + L2 - ``` - which means you will not overwrite data beyond pos1, so the data - haven't read is safe, and we don't care the data already read. - */ - src_1->cardinality = (int32_t)fast_union_uint16( - src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset +bool run_container_is_subset_bitset(const run_container_t* container1, + const bitset_container_t* container2) { + // todo: this code could be much faster + if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container2->cardinality < run_container_cardinality(container1)) { + return false; + } + } else { + int32_t card = bitset_container_compute_cardinality( + container2); // modify container2? + if (card < run_container_cardinality(container1)) { + return false; + } + } + for (int i = 0; i < container1->n_runs; ++i) { + uint32_t run_start = container1->runs[i].value; + uint32_t le = container1->runs[i].length; + for (uint32_t j = run_start; j <= run_start + le; ++j) { + if (!bitset_container_contains(container2, j)) { + return false; + } } } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + return true; +} + +bool bitset_container_is_subset_run(const bitset_container_t* container1, + const run_container_t* container2) { + // todo: this code could be much faster + if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container1->cardinality > run_container_cardinality(container2)) { + return false; + } } - return returnval; + int32_t i_bitset = 0, i_run = 0; + while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && + i_run < container2->n_runs) { + uint64_t w = container1->words[i_bitset]; + while (w != 0 && i_run < container2->n_runs) { + uint32_t start = container2->runs[i_run].value; + uint32_t stop = start + container2->runs[i_run].length; + uint64_t t = w & (~w + 1); + uint16_t r = i_bitset * 64 + roaring_trailing_zeroes(w); + if (r < start) { + return false; + } else if (r > stop) { + i_run++; + continue; + } else { + w ^= t; + } + } + if (w == 0) { + i_bitset++; + } else { + return false; + } + } + if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { + // terminated iterating on the run containers, check that rest of bitset + // is empty + for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { + if (container1->words[i_bitset] != 0) { + return false; + } + } + } + return true; } #ifdef __cplusplus @@ -17398,10 +17569,11 @@ bool array_array_container_lazy_inplace_union(array_container_t *src_1, } } // extern "C" { namespace roaring { namespace internal { #endif -/* end file src/containers/mixed_union.c */ -/* begin file src/containers/mixed_xor.c */ +/* end file src/containers/mixed_subset.c */ +/* begin file src/containers/mixed_union.c */ /* - * mixed_xor.c + * mixed_union.c + * */ #include @@ -17414,3193 +17586,3333 @@ namespace roaring { namespace internal { #endif -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_2, result); - result->cardinality = (int32_t)bitset_flip_list_withcard( - result->words, result->cardinality, src_1->array, src_1->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset +/* Compute the union of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + dst->cardinality = (int32_t)bitset_set_list_withcard( + dst->words, dst->cardinality, src_1->array, src_1->cardinality); } -/* Compute the xor of src_1 and src_2 and write the result to +/* Compute the union of src_1 and src_2 and write the result to * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_flip_list(dst->words, src_1->array, src_1->cardinality); + bitset_set_list(dst->words, src_1->array, src_1->cardinality); dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - bitset_container_t *result = bitset_container_create(); - - bitset_container_copy(src_2, result); +void run_bitset_container_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream + if (src_2 != dst) bitset_container_copy(src_2, dst); for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(result->words, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); - - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset + bitset_set_lenrange(dst->words, rle.value, rle.length); } - *dst = result; - return true; // bitset + dst->cardinality = bitset_container_compute_cardinality(dst); } -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { +void run_bitset_container_lazy_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream if (src_2 != dst) bitset_container_copy(src_2, dst); for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(dst->words, rle.value, - rle.value + rle.length + UINT32_C(1)); + bitset_set_lenrange(dst->words, rle.value, rle.length); } dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, container_t **dst) { - // semi following Java XOR implementation as of May 2016 - // the C OR implementation works quite differently and can return a run - // container - // TODO could optimize for full run containers. - - // use of lazy following Java impl. - const int arbitrary_threshold = 32; - if (src_1->cardinality < arbitrary_threshold) { - run_container_t *ans = run_container_create(); - array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. - uint8_t typecode_after; - *dst = - convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; +// why do we leave the result as a run container?? +void array_run_container_union(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + if (run_container_is_full(src_2)) { + run_container_copy(src_2, dst); + return; } - - int card = run_container_cardinality(src_2); - if (card <= DEFAULT_MAX_SIZE) { - // Java implementation works with the array, xoring the run elements via - // iterator - array_container_t *temp = array_container_from_run(src_2); - bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); - array_container_free(temp); - return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE; - - } else { // guess that it will end up as a bitset - bitset_container_t *result = bitset_container_from_run(src_2); - bool is_bitset = bitset_array_container_ixor(result, src_1, dst); - // any necessary type conversion has been done by the ixor - int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE); - return retval; + // TODO: see whether the "2*" is spurious + run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t previousrle; + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(dst, src_2->runs[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(dst, src_1->array[arraypos]); + arraypos++; + } + while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } else { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } } } -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); +void array_run_container_inplace_union(const array_container_t *src_1, + run_container_t *src_2) { + if (run_container_is_full(src_2)) { + return; + } + const int32_t maxoutput = src_1->cardinality + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_2->n_runs; + if (src_2->capacity < neededcapacity) + run_container_grow(src_2, neededcapacity, true); + memmove(src_2->runs + maxoutput, src_2->runs, + src_2->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc2 = src_2->runs + maxoutput; int32_t rlepos = 0; int32_t arraypos = 0; - dst->n_runs = 0; + int src2nruns = src_2->n_runs; + src_2->n_runs = 0; + + rle16_t previousrle; + + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(src_2, src_1->array[arraypos]); + arraypos++; + } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); + while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); rlepos++; } else { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], - 0); + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); arraypos++; } } - while (arraypos < src_1->cardinality) { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); - arraypos++; - } - while (rlepos < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src2nruns) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); + rlepos++; + } } } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, container_t **dst) { - run_container_t *ans = run_container_create(); - run_container_xor(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; -} - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * Could adopt the mixed_union.c approach instead (ie, using - * smart_append_exclusive) - * - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int totalCardinality = - src_1->cardinality + src_2->cardinality; // upper bound +bool array_array_container_union(const array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; if (totalCardinality <= DEFAULT_MAX_SIZE) { *dst = array_container_create_given_capacity(totalCardinality); - array_container_xor(src_1, src_2, CAST_array(*dst)); + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } return false; // not a bitset } - *dst = bitset_container_from_array(src_1); + *dst = bitset_container_create(); bool returnval = true; // expect a bitset - bitset_container_t *ourbitset = CAST_bitset(*dst); - ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( - ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->words, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + *dst = array_container_from_bitset(ourbitset); + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } } + return returnval; +} + +bool array_array_container_inplace_union(array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + if (totalCardinality <= DEFAULT_MAX_SIZE) { + if (src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity( + 2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, + src_1->cardinality * sizeof(uint16_t)); + // In theory, we could use fast_union_uint16, but it is unsafe. It + // fails with Intel compilers in particular. + // https://github.com/RoaringBitmap/CRoaring/pull/452 + // See report https://github.com/RoaringBitmap/CRoaring/issues/476 + src_1->cardinality = (int32_t)union_uint16( + src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->words, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + if (src_1->capacity < ourbitset->cardinality) { + array_container_grow(src_1, ourbitset->cardinality, false); + } + bitset_extract_setbits_uint16(ourbitset->words, + BITSET_CONTAINER_SIZE_IN_WORDS, + src_1->array, 0); + src_1->cardinality = ourbitset->cardinality; + *dst = src_1; + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + } return returnval; } -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { +bool array_array_container_lazy_union(const array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { int totalCardinality = src_1->cardinality + src_2->cardinality; // // We assume that operations involving bitset containers will be faster than // operations involving solely array containers, except maybe when array // containers are small. Indeed, for example, it is cheap to compute the - // exclusive union between an array and a bitset container, generally more - // so than between a large array and another array. So it is advantageous to - // favour bitset containers during the computation. Of course, if we convert - // array containers eagerly to bitset containers, we may later need to - // revert the bitset containers to array containerr to satisfy the Roaring - // format requirements, but such one-time conversions at the end may not be - // overly expensive. We arrived to this design based on extensive - // benchmarking on unions. For XOR/exclusive union, we simply followed the - // heuristic used by the unions (see mixed_union.c). Further tuning is - // possible. + // union between an array and a bitset container, generally more so than + // between a large array and another array. So it is advantageous to favour + // bitset containers during the computation. Of course, if we convert array + // containers eagerly to bitset containers, we may later need to revert the + // bitset containers to array containerr to satisfy the Roaring format + // requirements, but such one-time conversions at the end may not be overly + // expensive. We arrived to this design based on extensive benchmarking. // if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) array_container_xor(src_1, src_2, CAST_array(*dst)); + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } return false; // not a bitset } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset (maybe, for XOR??) + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} + +bool array_array_container_lazy_inplace_union(array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + // + // We assume that operations involving bitset containers will be faster than + // operations involving solely array containers, except maybe when array + // containers are small. Indeed, for example, it is cheap to compute the + // union between an array and a bitset container, generally more so than + // between a large array and another array. So it is advantageous to favour + // bitset containers during the computation. Of course, if we convert array + // containers eagerly to bitset containers, we may later need to revert the + // bitset containers to array containerr to satisfy the Roaring format + // requirements, but such one-time conversions at the end may not be overly + // expensive. We arrived to this design based on extensive benchmarking. + // + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + if (src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity( + 2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, CAST_array(*dst)); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, + src_1->cardinality * sizeof(uint16_t)); + /* + Next line is safe: + + We just need to focus on the reading and writing performed on + array1. In `union_vector16`, both vectorized and scalar code still + obey the basic rule: read from two inputs, do the union, and then + write the output. + + Let's say the length(cardinality) of input2 is L2: + ``` + |<- L2 ->| + array1: [output--- |input 1---|---] + array2: [input 2---] + ``` + Let's define 3 __m128i pointers, `pos1` starts from `input1`, + `pos2` starts from `input2`, these 2 point at the next byte to + read, `out` starts from `output`, pointing at the next byte to + overwrite. + ``` + array1: [output--- |input 1---|---] + ^ ^ + out pos1 + array2: [input 2---] + ^ + pos2 + ``` + The union output always contains less or equal number of elements + than all inputs added, so we have: + ``` + out <= pos1 + pos2 + ``` + therefore: + ``` + out <= pos1 + L2 + ``` + which means you will not overwrite data beyond pos1, so the data + haven't read is safe, and we don't care the data already read. + */ + src_1->cardinality = (int32_t)fast_union_uint16( + src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset if (*dst != NULL) { bitset_container_t *ourbitset = CAST_bitset(*dst); - bitset_flip_list(ourbitset->words, src_2->array, src_2->cardinality); + bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; } return returnval; } -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_xor(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - *dst = src_1; - src_1->cardinality = (uint32_t)bitset_flip_list_withcard( - src_1->words, src_1->cardinality, src_2->array, src_2->cardinality); - - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} - -/* a bunch of in-place, some of which may not *really* be inplace. - * TODO: write actual inplace routine if efficiency warrants it - * Anything inplace with a bitset is a good candidate - */ - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - int card = bitset_container_xor(src_1, src_2, src_1); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else { - *dst = src_1; - return true; - } -} - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - bool ans = array_bitset_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, - container_t **dst) { - bool ans = run_bitset_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, - container_t **dst) { - bool ans = run_bitset_container_xor(src_2, src_1, dst); - bitset_container_free(src_1); - return ans; -} - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, container_t **dst) { - int ans = array_run_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - int ans = array_run_container_xor(src_2, src_1, dst); - run_container_free(src_1); - return ans; -} - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, - container_t **dst) { - bool ans = array_array_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; -} - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - container_t **dst) { - int ans = run_run_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; -} - #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif -/* end file src/containers/mixed_xor.c */ -/* begin file src/containers/run.c */ -#include -#include +/* end file src/containers/mixed_union.c */ +/* begin file src/containers/mixed_xor.c */ +/* + * mixed_xor.c + */ + +#include +#include -#if CROARING_IS_X64 -#ifndef CROARING_COMPILER_SUPPORTS_AVX512 -#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." -#endif // CROARING_COMPILER_SUPPORTS_AVX512 -#endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wuninitialized" -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif #ifdef __cplusplus extern "C" { namespace roaring { namespace internal { #endif -extern inline uint16_t run_container_minimum(const run_container_t *run); -extern inline uint16_t run_container_maximum(const run_container_t *run); -extern inline int32_t interleavedBinarySearch(const rle16_t *array, - int32_t lenarray, uint16_t ikey); -extern inline bool run_container_contains(const run_container_t *run, - uint16_t pos); -extern inline int run_container_index_equalorlarger(const run_container_t *arr, - uint16_t x); -extern inline bool run_container_is_full(const run_container_t *run); -extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); -extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); -extern inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop); -extern inline int run_container_cardinality(const run_container_t *run); +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). + * Result is true iff dst is a bitset */ +bool array_bitset_container_xor(const array_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + bitset_container_t *result = bitset_container_create(); + bitset_container_copy(src_2, result); + result->cardinality = (int32_t)bitset_flip_list_withcard( + result->words, result->cardinality, src_1->array, src_1->cardinality); -bool run_container_add(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return false; // already there - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return false; // already there - if (offset == le + 1) { - // we may need to fuse - if (index + 1 < run->n_runs) { - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index].length = run->runs[index + 1].value + - run->runs[index + 1].length - - run->runs[index].value; - recoverRoomAtIndex(run, (uint16_t)(index + 1)); - return true; - } - } - run->runs[index].length++; - return true; - } - if (index + 1 < run->n_runs) { - // we may need to fuse - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index + 1].value = pos; - run->runs[index + 1].length = run->runs[index + 1].length + 1; - return true; - } - } - } - if (index == -1) { - // we may need to extend the first run - if (0 < run->n_runs) { - if (run->runs[0].value == pos + 1) { - run->runs[0].length++; - run->runs[0].value--; - return true; - } - } + // do required type conversions. + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; // not bitset } - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = pos; - run->runs[index + 1].length = 0; - return true; + *dst = result; + return true; // bitset +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + */ + +void array_bitset_container_lazy_xor(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + bitset_flip_list(dst->words, src_1->array, src_1->cardinality); + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create_given_capacity(int32_t size) { - run_container_t *run; - /* Allocate the run container itself. */ - if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == - NULL) { - return NULL; +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_xor(const run_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + bitset_container_t *result = bitset_container_create(); + + bitset_container_copy(src_2, result); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_flip_range(result->words, rle.value, + rle.value + rle.length + UINT32_C(1)); } - if (size <= 0) { // we don't want to rely on malloc(0) - run->runs = NULL; - } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * - size)) == NULL) { - roaring_free(run); - return NULL; + result->cardinality = bitset_container_compute_cardinality(result); + + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; // not bitset } - run->capacity = size; - run->n_runs = 0; - return run; + *dst = result; + return true; // bitset } -int run_container_shrink_to_fit(run_container_t *src) { - if (src->n_runs == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->n_runs; - src->capacity = src->n_runs; - rle16_t *oldruns = src->runs; - src->runs = - (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t)); - if (src->runs == NULL) roaring_free(oldruns); // should never happen? - return savings; -} -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void) { - return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); -} +/* lazy xor. Dst is initialized and may be equal to src_2. + * Result is left as a bitset container, even if actual + * cardinality would dictate an array container. + */ -ALLOW_UNALIGNED -run_container_t *run_container_clone(const run_container_t *src) { - run_container_t *run = run_container_create_given_capacity(src->capacity); - if (run == NULL) return NULL; - run->capacity = src->capacity; - run->n_runs = src->n_runs; - memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); - return run; +void run_bitset_container_lazy_xor(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_flip_range(dst->words, rle.value, + rle.value + rle.length + UINT32_C(1)); + } + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -void run_container_offset(const run_container_t *c, container_t **loc, - container_t **hic, uint16_t offset) { - run_container_t *lo = NULL, *hi = NULL; - - bool split; - int lo_cap, hi_cap; - int top, pivot; +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ - top = (1 << 16) - offset; - pivot = run_container_index_equalorlarger(c, top); +int array_run_container_xor(const array_container_t *src_1, + const run_container_t *src_2, container_t **dst) { + // semi following Java XOR implementation as of May 2016 + // the C OR implementation works quite differently and can return a run + // container + // TODO could optimize for full run containers. - if (pivot == -1) { - split = false; - lo_cap = c->n_runs; - hi_cap = 0; - } else { - split = c->runs[pivot].value < top; - lo_cap = pivot + (split ? 1 : 0); - hi_cap = c->n_runs - pivot; + // use of lazy following Java impl. + const int arbitrary_threshold = 32; + if (src_1->cardinality < arbitrary_threshold) { + run_container_t *ans = run_container_create(); + array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. + uint8_t typecode_after; + *dst = + convert_run_to_efficient_container_and_free(ans, &typecode_after); + return typecode_after; } - if (loc && lo_cap) { - lo = run_container_create_given_capacity(lo_cap); - memcpy(lo->runs, c->runs, lo_cap * sizeof(rle16_t)); - lo->n_runs = lo_cap; - for (int i = 0; i < lo_cap; ++i) { - lo->runs[i].value += offset; - } - *loc = (container_t *)lo; + int card = run_container_cardinality(src_2); + if (card <= DEFAULT_MAX_SIZE) { + // Java implementation works with the array, xoring the run elements via + // iterator + array_container_t *temp = array_container_from_run(src_2); + bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); + array_container_free(temp); + return ret_is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE; + + } else { // guess that it will end up as a bitset + bitset_container_t *result = bitset_container_from_run(src_2); + bool is_bitset = bitset_array_container_ixor(result, src_1, dst); + // any necessary type conversion has been done by the ixor + int retval = (is_bitset ? BITSET_CONTAINER_TYPE : ARRAY_CONTAINER_TYPE); + return retval; } +} - if (hic && hi_cap) { - hi = run_container_create_given_capacity(hi_cap); - memcpy(hi->runs, c->runs + pivot, hi_cap * sizeof(rle16_t)); - hi->n_runs = hi_cap; - for (int i = 0; i < hi_cap; ++i) { - hi->runs[i].value += offset; +/* Dst is a valid run container. (Can it be src_2? Let's say not.) + * Leaves result as run container, even if other options are + * smaller. + */ + +void array_run_container_lazy_xor(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); + int32_t rlepos = 0; + int32_t arraypos = 0; + dst->n_runs = 0; + + while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, + src_2->runs[rlepos].length); + rlepos++; + } else { + run_container_smart_append_exclusive(dst, src_1->array[arraypos], + 0); + arraypos++; } - *hic = (container_t *)hi; } + while (arraypos < src_1->cardinality) { + run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); + arraypos++; + } + while (rlepos < src_2->n_runs) { + run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, + src_2->runs[rlepos].length); + rlepos++; + } +} - // Fix the split. - if (split) { - if (lo != NULL) { - // Add the missing run to 'lo', exhausting length. - lo->runs[lo->n_runs - 1].length = - (1 << 16) - lo->runs[lo->n_runs - 1].value - 1; - } +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ - if (hi != NULL) { - // Fix the first run in 'hi'. - hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1; - hi->runs[0].value = 0; - } +int run_run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, container_t **dst) { + run_container_t *ans = run_container_create(); + run_container_xor(src_1, src_2, ans); + uint8_t typecode_after; + *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); + return typecode_after; +} + +/* + * Java implementation (as of May 2016) for array_run, run_run + * and bitset_run don't do anything different for inplace. + * Could adopt the mixed_union.c approach instead (ie, using + * smart_append_exclusive) + * + */ + +bool array_array_container_xor(const array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int totalCardinality = + src_1->cardinality + src_2->cardinality; // upper bound + if (totalCardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_create_given_capacity(totalCardinality); + array_container_xor(src_1, src_2, CAST_array(*dst)); + return false; // not a bitset + } + *dst = bitset_container_from_array(src_1); + bool returnval = true; // expect a bitset + bitset_container_t *ourbitset = CAST_bitset(*dst); + ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( + ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + *dst = array_container_from_bitset(ourbitset); + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset } + + return returnval; } -/* Free memory. */ -void run_container_free(run_container_t *run) { - if (run == NULL) return; - roaring_free(run->runs); - roaring_free(run); +bool array_array_container_lazy_xor(const array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + // + // We assume that operations involving bitset containers will be faster than + // operations involving solely array containers, except maybe when array + // containers are small. Indeed, for example, it is cheap to compute the + // exclusive union between an array and a bitset container, generally more + // so than between a large array and another array. So it is advantageous to + // favour bitset containers during the computation. Of course, if we convert + // array containers eagerly to bitset containers, we may later need to + // revert the bitset containers to array containerr to satisfy the Roaring + // format requirements, but such one-time conversions at the end may not be + // overly expensive. We arrived to this design based on extensive + // benchmarking on unions. For XOR/exclusive union, we simply followed the + // heuristic used by the unions (see mixed_union.c). Further tuning is + // possible. + // + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) array_container_xor(src_1, src_2, CAST_array(*dst)); + return false; // not a bitset + } + *dst = bitset_container_from_array(src_1); + bool returnval = true; // expect a bitset (maybe, for XOR??) + if (*dst != NULL) { + bitset_container_t *ourbitset = CAST_bitset(*dst); + bitset_flip_list(ourbitset->words, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; } -void run_container_grow(run_container_t *run, int32_t min, bool copy) { - int32_t newCapacity = (run->capacity == 0) ? RUN_DEFAULT_INIT_SIZE - : run->capacity < 64 ? run->capacity * 2 - : run->capacity < 1024 ? run->capacity * 3 / 2 - : run->capacity * 5 / 4; - if (newCapacity < min) newCapacity = min; - run->capacity = newCapacity; - assert(run->capacity >= min); - if (copy) { - rle16_t *oldruns = run->runs; - run->runs = (rle16_t *)roaring_realloc(oldruns, - run->capacity * sizeof(rle16_t)); - if (run->runs == NULL) roaring_free(oldruns); +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_xor(const bitset_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + bitset_container_t *ans = bitset_container_create(); + int card = bitset_container_xor(src_1, src_2, ans); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(ans); + bitset_container_free(ans); + return false; // not bitset } else { - roaring_free(run->runs); - run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t)); + *dst = ans; + return true; } - // We may have run->runs == NULL. } -/* copy one container into another */ -void run_container_copy(const run_container_t *src, run_container_t *dst) { - const int32_t n_runs = src->n_runs; - if (src->n_runs > dst->capacity) { - run_container_grow(dst, n_runs, false); - } - dst->n_runs = n_runs; - memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); -} +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // TODO: this could be a lot more efficient +bool bitset_array_container_ixor(bitset_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + *dst = src_1; + src_1->cardinality = (uint32_t)bitset_flip_list_withcard( + src_1->words, src_1->cardinality, src_2->array, src_2->cardinality); - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_1, dst); - return; - } - if (if2) { - run_container_copy(src_2, dst); - return; - } - } - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; + if (src_1->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else + return true; +} - rle16_t previousrle; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(dst, src_1->runs[rlepos]); - rlepos++; +/* a bunch of in-place, some of which may not *really* be inplace. + * TODO: write actual inplace routine if efficiency warrants it + * Anything inplace with a bitset is a good candidate + */ + +bool bitset_bitset_container_ixor(bitset_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + int card = bitset_container_xor(src_1, src_2, src_1); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset } else { - previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); - xrlepos++; + *dst = src_1; + return true; } +} - while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { - rle16_t newrl; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = src_1->runs[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(dst, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < src_1->n_runs) { - run_container_append(dst, src_1->runs[rlepos], &previousrle); - rlepos++; - } +bool array_bitset_container_ixor(array_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + bool ans = array_bitset_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; } -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return; - } - if (if2) { - run_container_copy(src_2, src_1); - return; - } - } - // we move the data to the end of the current array - const int32_t maxoutput = src_1->n_runs + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_1->n_runs; - if (src_1->capacity < neededcapacity) - run_container_grow(src_1, neededcapacity, true); - memmove(src_1->runs + maxoutput, src_1->runs, - src_1->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc1 = src_1->runs + maxoutput; - const int32_t input1nruns = src_1->n_runs; - src_1->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - rle16_t previousrle; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { - rle16_t newrl; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = inputsrc1[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(src_1, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(src_1, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < input1nruns) { - run_container_append(src_1, inputsrc1[rlepos], &previousrle); - rlepos++; - } +bool run_bitset_container_ixor(run_container_t *src_1, + const bitset_container_t *src_2, + container_t **dst) { + bool ans = run_bitset_container_xor(src_1, src_2, dst); + run_container_free(src_1); + return ans; } -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // don't bother to convert xor with full range into negation - // since negation is implemented similarly - - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); +bool bitset_run_container_ixor(bitset_container_t *src_1, + const run_container_t *src_2, + container_t **dst) { + bool ans = run_bitset_container_xor(src_2, src_1, dst); + bitset_container_free(src_1); + return ans; +} - int32_t pos1 = 0; - int32_t pos2 = 0; - dst->n_runs = 0; +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ - while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { - if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } else { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } - } - while (pos1 < src_1->n_runs) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } +int array_run_container_ixor(array_container_t *src_1, + const run_container_t *src_2, container_t **dst) { + int ans = array_run_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; +} - while (pos2 < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } +int run_array_container_ixor(run_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + int ans = array_run_container_xor(src_2, src_1, dst); + run_container_free(src_1); + return ans; } -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_2, dst); - return; - } - if (if2) { - run_container_copy(src_1, dst); - return; - } - } - // TODO: this could be a lot more efficient, could use SIMD optimizations - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } +bool array_array_container_ixor(array_container_t *src_1, + const array_container_t *src_2, + container_t **dst) { + bool ans = array_array_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; +} - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - dst->runs[dst->n_runs].value = (uint16_t)lateststart; - dst->runs[dst->n_runs].length = - (uint16_t)(earliestend - lateststart - 1); - dst->n_runs++; - } - } +int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, + container_t **dst) { + int ans = run_run_container_xor(src_1, src_2, dst); + run_container_free(src_1); + return ans; } -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return run_container_cardinality(src_2); - } - if (if2) { - return run_container_cardinality(src_1); - } - } - int answer = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_xor.c */ +/* begin file src/containers/run.c */ +#include +#include - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif + +extern inline uint16_t run_container_minimum(const run_container_t *run); +extern inline uint16_t run_container_maximum(const run_container_t *run); +extern inline int32_t interleavedBinarySearch(const rle16_t *array, + int32_t lenarray, uint16_t ikey); +extern inline bool run_container_contains(const run_container_t *run, + uint16_t pos); +extern inline int run_container_index_equalorlarger(const run_container_t *arr, + uint16_t x); +extern inline bool run_container_is_full(const run_container_t *run); +extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); +extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); +extern inline run_container_t *run_container_create_range(uint32_t start, + uint32_t stop); +extern inline int run_container_cardinality(const run_container_t *run); + +bool run_container_add(run_container_t *run, uint16_t pos) { + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); + if (index >= 0) return false; // already there + index = -index - 2; // points to preceding value, possibly -1 + if (index >= 0) { // possible match + int32_t offset = pos - run->runs[index].value; + int32_t le = run->runs[index].length; + if (offset <= le) return false; // already there + if (offset == le + 1) { + // we may need to fuse + if (index + 1 < run->n_runs) { + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index].length = run->runs[index + 1].value + + run->runs[index + 1].length - + run->runs[index].value; + recoverRoomAtIndex(run, (uint16_t)(index + 1)); + return true; } } - answer += earliestend - lateststart; - } - } - return answer; -} - -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return !run_container_empty(src_2); + run->runs[index].length++; + return true; } - if (if2) { - return !run_container_empty(src_1); + if (index + 1 < run->n_runs) { + // we may need to fuse + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index + 1].value = pos; + run->runs[index + 1].length = run->runs[index + 1].length + 1; + return true; + } } } - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; + if (index == -1) { + // we may need to extend the first run + if (0 < run->n_runs) { + if (run->runs[0].value == pos + 1) { + run->runs[0].length++; + run->runs[0].value--; + return true; } - } else { // they overlap - return true; } } - return false; + makeRoomAtIndex(run, (uint16_t)(index + 1)); + run->runs[index + 1].value = pos; + run->runs[index + 1].length = 0; + return true; } -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // following Java implementation as of June 2016 +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create_given_capacity(int32_t size) { + run_container_t *run; + /* Allocate the run container itself. */ + if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == + NULL) { + return NULL; + } + if (size <= 0) { // we don't want to rely on malloc(0) + run->runs = NULL; + } else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * + size)) == NULL) { + roaring_free(run); + return NULL; + } + run->capacity = size; + run->n_runs = 0; + return run; +} - if (dst->capacity < src_1->n_runs + src_2->n_runs) - run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); +int run_container_shrink_to_fit(run_container_t *src) { + if (src->n_runs == src->capacity) return 0; // nothing to do + int savings = src->capacity - src->n_runs; + src->capacity = src->n_runs; + rle16_t *oldruns = src->runs; + src->runs = + (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t)); + if (src->runs == NULL) roaring_free(oldruns); // should never happen? + return savings; +} +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void) { + return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); +} - dst->n_runs = 0; +ALLOW_UNALIGNED +run_container_t *run_container_clone(const run_container_t *src) { + run_container_t *run = run_container_create_given_capacity(src->capacity); + if (run == NULL) return NULL; + run->capacity = src->capacity; + run->n_runs = src->n_runs; + memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); + return run; +} - int rlepos1 = 0; - int rlepos2 = 0; - int32_t start = src_1->runs[rlepos1].value; - int32_t end = start + src_1->runs[rlepos1].length + 1; - int32_t start2 = src_2->runs[rlepos2].value; - int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; +void run_container_offset(const run_container_t *c, container_t **loc, + container_t **hic, uint16_t offset) { + run_container_t *lo = NULL, *hi = NULL; - while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { - if (end <= start2) { - // output the first run - dst->runs[dst->n_runs++] = - CROARING_MAKE_RLE16(start, end - start - 1); - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } else if (end2 <= start) { - // exit the second run - rlepos2++; - if (rlepos2 < src_2->n_runs) { - start2 = src_2->runs[rlepos2].value; - end2 = start2 + src_2->runs[rlepos2].length + 1; - } - } else { - if (start < start2) { - dst->runs[dst->n_runs++] = - CROARING_MAKE_RLE16(start, start2 - start - 1); - } - if (end2 < end) { - start = end2; - } else { - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } + bool split; + unsigned int lo_cap, hi_cap; + int top, pivot; + + top = (1 << 16) - offset; + pivot = run_container_index_equalorlarger(c, top); + // pivot is the index of the first run that is >= top or -1 if no such run + + if (pivot >= 0) { + split = c->runs[pivot].value < top; + lo_cap = pivot + (split ? 1 : 0); + hi_cap = c->n_runs - pivot; + } else { + // here pivot < 0 + split = false; + lo_cap = c->n_runs; + hi_cap = 0; + } + + if (loc && lo_cap) { + lo = run_container_create_given_capacity(lo_cap); + memcpy(lo->runs, c->runs, lo_cap * sizeof(rle16_t)); + lo->n_runs = lo_cap; + for (unsigned int i = 0; i < lo_cap; ++i) { + lo->runs[i].value += offset; } + *loc = (container_t *)lo; } - if (rlepos1 < src_1->n_runs) { - dst->runs[dst->n_runs++] = CROARING_MAKE_RLE16(start, end - start - 1); - rlepos1++; - if (rlepos1 < src_1->n_runs) { - memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, - sizeof(rle16_t) * (src_1->n_runs - rlepos1)); - dst->n_runs += src_1->n_runs - rlepos1; + + if (hic && hi_cap) { + hi = run_container_create_given_capacity(hi_cap); + memcpy(hi->runs, c->runs + pivot, hi_cap * sizeof(rle16_t)); + hi->n_runs = hi_cap; + for (unsigned int i = 0; i < hi_cap; ++i) { + hi->runs[i].value += offset; } + *hic = (container_t *)hi; } -} -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *cont) { - for (int i = 0; i < cont->n_runs; ++i) { - uint16_t run_start = cont->runs[i].value; - uint16_t le = cont->runs[i].length; - printf("[%d,%d]", run_start, run_start + le); + // Fix the split. + if (split) { + if (lo != NULL) { + // Add the missing run to 'lo', exhausting length. + lo->runs[lo->n_runs - 1].length = + (1 << 16) - lo->runs[lo->n_runs - 1].value - 1; + } + + if (hi != NULL) { + // Fix the first run in 'hi'. + hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1; + hi->runs[0].value = 0; + } } } -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *cont, - uint32_t base) { - if (cont->n_runs == 0) return; - { - uint32_t run_start = base + cont->runs[0].value; - uint16_t le = cont->runs[0].length; - printf("%u", run_start); - for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); - } - for (int32_t i = 1; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); - } +/* Free memory. */ +void run_container_free(run_container_t *run) { + if (run == NULL) return; + roaring_free(run->runs); + roaring_free(run); } -/* - * Validate the container. Returns true if valid. - */ -bool run_container_validate(const run_container_t *run, const char **reason) { - if (run->n_runs < 0) { - *reason = "negative run count"; - return false; - } - if (run->capacity < 0) { - *reason = "negative run capacity"; - return false; - } - if (run->capacity < run->n_runs) { - *reason = "capacity less than run count"; - return false; +void run_container_grow(run_container_t *run, int32_t min, bool copy) { + int32_t newCapacity = (run->capacity == 0) ? RUN_DEFAULT_INIT_SIZE + : run->capacity < 64 ? run->capacity * 2 + : run->capacity < 1024 ? run->capacity * 3 / 2 + : run->capacity * 5 / 4; + if (newCapacity < min) newCapacity = min; + run->capacity = newCapacity; + assert(run->capacity >= min); + if (copy) { + rle16_t *oldruns = run->runs; + run->runs = (rle16_t *)roaring_realloc(oldruns, + run->capacity * sizeof(rle16_t)); + if (run->runs == NULL) roaring_free(oldruns); + } else { + roaring_free(run->runs); + run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t)); } + // We may have run->runs == NULL. +} - if (run->n_runs == 0) { - *reason = "zero run count"; - return false; - } - if (run->runs == NULL) { - *reason = "NULL runs"; - return false; +/* copy one container into another */ +void run_container_copy(const run_container_t *src, run_container_t *dst) { + const int32_t n_runs = src->n_runs; + if (src->n_runs > dst->capacity) { + run_container_grow(dst, n_runs, false); } + dst->n_runs = n_runs; + memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); +} - // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX. - uint32_t last_end = 0; - for (int i = 0; i < run->n_runs; ++i) { - uint32_t start = run->runs[i].value; - uint32_t end = start + run->runs[i].length + 1; - if (end <= start) { - *reason = "run start + length overflow"; - return false; - } - if (end > (1 << 16)) { - *reason = "run start + length too large"; - return false; - } - if (start < last_end) { - *reason = "run start less than last end"; - return false; +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // TODO: this could be a lot more efficient + + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_1, dst); + return; } - if (start == last_end && last_end != 0) { - *reason = "run start equal to last end, should have combined"; - return false; + if (if2) { + run_container_copy(src_2, dst); + return; } - last_end = end; } - return true; -} - -int32_t run_container_write(const run_container_t *container, char *buf) { - uint16_t cast_16 = container->n_runs; - memcpy(buf, &cast_16, sizeof(uint16_t)); - memcpy(buf + sizeof(uint16_t), container->runs, - container->n_runs * sizeof(rle16_t)); - return run_container_size_in_bytes(container); -} + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf) { - (void)cardinality; - uint16_t cast_16; - memcpy(&cast_16, buf, sizeof(uint16_t)); - container->n_runs = cast_16; - if (container->n_runs > container->capacity) - run_container_grow(container, container->n_runs, false); - if (container->n_runs > 0) { - memcpy(container->runs, buf + sizeof(uint16_t), - container->n_runs * sizeof(rle16_t)); + rle16_t previousrle; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(dst, src_1->runs[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); + xrlepos++; } - return run_container_size_in_bytes(container); -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) - if (!iterator(run_start + j, ptr)) return false; + while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { + rle16_t newrl; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = src_1->runs[rlepos]; + rlepos++; + } else { + newrl = src_2->runs[xrlepos]; + xrlepos++; + } + run_container_append(dst, newrl, &previousrle); + } + while (xrlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < src_1->n_runs) { + run_container_append(dst, src_1->runs[rlepos], &previousrle); + rlepos++; } - return true; } -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' + */ +void run_container_union_inplace(run_container_t *src_1, + const run_container_t *src_2) { + // TODO: this could be a lot more efficient - for (int j = 0; j <= le; ++j) - if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) - return false; + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return; + } + if (if2) { + run_container_copy(src_2, src_1); + return; + } } - return true; -} + // we move the data to the end of the current array + const int32_t maxoutput = src_1->n_runs + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_1->n_runs; + if (src_1->capacity < neededcapacity) + run_container_grow(src_1, neededcapacity, true); + memmove(src_1->runs + maxoutput, src_1->runs, + src_1->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc1 = src_1->runs + maxoutput; + const int32_t input1nruns = src_1->n_runs; + src_1->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2) { - int i1 = 0, i2 = 0; - while (i1 < container1->n_runs && i2 < container2->n_runs) { - int start1 = container1->runs[i1].value; - int stop1 = start1 + container1->runs[i1].length; - int start2 = container2->runs[i2].value; - int stop2 = start2 + container2->runs[i2].length; - if (start1 < start2) { - return false; - } else { // start1 >= start2 - if (stop1 < stop2) { - i1++; - } else if (stop1 == stop2) { - i1++; - i2++; - } else { // stop1 > stop2 - i2++; - } + rle16_t previousrle; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); + xrlepos++; + } + while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { + rle16_t newrl; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = inputsrc1[rlepos]; + rlepos++; + } else { + newrl = src_2->runs[xrlepos]; + xrlepos++; } + run_container_append(src_1, newrl, &previousrle); } - if (i1 == container1->n_runs) { - return true; - } else { - return false; + while (xrlepos < src_2->n_runs) { + run_container_append(src_1, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < input1nruns) { + run_container_append(src_1, inputsrc1[rlepos], &previousrle); + rlepos++; } } -// TODO: write smart_append_exclusive version to match the overloaded 1 param -// Java version (or is it even used?) +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // don't bother to convert xor with full range into negation + // since negation is implemented similarly -// follows the Java implementation closely -// length is the rle-value. Ie, run [10,12) uses a length value 1. -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length) { - int old_end; - rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; - rle16_t *appended_last_run = src->runs + src->n_runs; + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); - if (!src->n_runs || - (start > (old_end = last_run->value + last_run->length + 1))) { - *appended_last_run = CROARING_MAKE_RLE16(start, length); - src->n_runs++; - return; + int32_t pos1 = 0; + int32_t pos2 = 0; + dst->n_runs = 0; + + while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { + if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; + } else { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } } - if (old_end == start) { - // we merge - last_run->length += (length + 1); - return; + while (pos1 < src_1->n_runs) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; } - int new_end = start + length + 1; - if (start == last_run->value) { - // wipe out previous - if (new_end < old_end) { - *last_run = CROARING_MAKE_RLE16(new_end, old_end - new_end - 1); - return; - } else if (new_end > old_end) { - *last_run = CROARING_MAKE_RLE16(old_end, new_end - old_end - 1); - return; - } else { - src->n_runs--; - return; + while (pos2 < src_2->n_runs) { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } +} + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_2, dst); + return; + } + if (if2) { + run_container_copy(src_1, dst); + return; + } + } + // TODO: this could be a lot more efficient, could use SIMD optimizations + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } + dst->runs[dst->n_runs].value = (uint16_t)lateststart; + dst->runs[dst->n_runs].length = + (uint16_t)(earliestend - lateststart - 1); + dst->n_runs++; } } - last_run->length = start - last_run->value - 1; - if (new_end < old_end) { - *appended_last_run = - CROARING_MAKE_RLE16(new_end, old_end - new_end - 1); - src->n_runs++; - } else if (new_end > old_end) { - *appended_last_run = - CROARING_MAKE_RLE16(old_end, new_end - old_end - 1); - src->n_runs++; - } -} - -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - for (int i = 0; i < container->n_runs; i++) { - uint16_t length = container->runs[i].length; - if (rank <= *start_rank + length) { - uint16_t value = container->runs[i].value; - *element = value + rank - (*start_rank); - return true; - } else - *start_rank += length + 1; - } - return false; } -int run_container_rank(const run_container_t *container, uint16_t x) { - int sum = 0; - uint32_t x32 = x; - for (int i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint) + 1; - } else { - sum += length + 1; +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return run_container_cardinality(src_2); + } + if (if2) { + return run_container_cardinality(src_1); } } - return sum; -} -uint32_t run_container_rank_many(const run_container_t *container, - uint64_t start_rank, const uint32_t *begin, - const uint32_t *end, uint64_t *ans) { - const uint16_t high = (uint16_t)((*begin) >> 16); - const uint32_t *iter = begin; - int sum = 0; - int i = 0; - for (; iter != end; iter++) { - uint32_t x = *iter; - uint16_t xhigh = (uint16_t)(x >> 16); - if (xhigh != high) return iter - begin; // stop at next container + int answer = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } - uint32_t x32 = x & 0xFFFF; - while (i < container->n_runs) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x32 <= endpoint) { - if (x32 < startpoint) { - *(ans++) = start_rank + sum; - } else { - *(ans++) = start_rank + sum + (x32 - startpoint) + 1; + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; } - break; - } else { - sum += length + 1; - i++; } + answer += earliestend - lateststart; } - if (i >= container->n_runs) *(ans++) = start_rank + sum; } - - return iter - begin; + return answer; } -int run_container_get_index(const run_container_t *container, uint16_t x) { - if (run_container_contains(container, x)) { - int sum = 0; - uint32_t x32 = x; - for (int i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint); - } else { - sum += length + 1; - } +bool run_container_intersect(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return !run_container_empty(src_2); } - return sum - 1; - } else { - return -1; - } -} - -#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512 - -CROARING_TARGET_AVX512 -ALLOW_UNALIGNED -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int _avx512_run_container_cardinality( - const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m512i) / sizeof(rle16_t); - if (n_runs > step) { - __m512i total = _mm512_setzero_si512(); - for (; k + step <= n_runs; k += step) { - __m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k)); - __m512i justlengths = _mm512_srli_epi32(ymm1, 16); - total = _mm512_add_epi32(total, justlengths); + if (if2) { + return !run_container_empty(src_1); } - - __m256i lo = _mm512_extracti32x8_epi32(total, 0); - __m256i hi = _mm512_extracti32x8_epi32(total, 1); - - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, lo); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - - _mm256_storeu_si256((__m256i *)buffer, hi); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); } - for (; k < n_runs; ++k) { - sum += runs[k].length; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + return true; + } } - - return sum; + return false; } -CROARING_UNTARGET_AVX512 - -CROARING_TARGET_AVX2 -ALLOW_UNALIGNED -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int _avx2_run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // following Java implementation as of June 2016 - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m256i) / sizeof(rle16_t); - if (n_runs > step) { - __m256i total = _mm256_setzero_si256(); - for (; k + step <= n_runs; k += step) { - __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); - __m256i justlengths = _mm256_srli_epi32(ymm1, 16); - total = _mm256_add_epi32(total, justlengths); - } - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, total); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - } - for (; k < n_runs; ++k) { - sum += runs[k].length; - } + if (dst->capacity < src_1->n_runs + src_2->n_runs) + run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); - return sum; -} + dst->n_runs = 0; -ALLOW_UNALIGNED -int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; + int rlepos1 = 0; + int rlepos2 = 0; + int32_t start = src_1->runs[rlepos1].value; + int32_t end = start + src_1->runs[rlepos1].length + 1; + int32_t start2 = src_2->runs[rlepos2].value; + int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - if (le < 8) { - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; + while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { + if (end <= start2) { + // output the first run + dst->runs[dst->n_runs++] = + CROARING_MAKE_RLE16(start, end - start - 1); + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } + } else if (end2 <= start) { + // exit the second run + rlepos2++; + if (rlepos2 < src_2->n_runs) { + start2 = src_2->runs[rlepos2].value; + end2 = start2 + src_2->runs[rlepos2].length + 1; } } else { - int j = 0; - __m256i run_start_v = _mm256_set1_epi32(run_start); - // [8,8,8,8....] - __m256i inc = _mm256_set1_epi32(8); - // used for generate sequence: - // [0, 1, 2, 3...], [8, 9, 10,...] - __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - for (j = 0; j + 8 <= le; j += 8) { - __m256i val_v = _mm256_add_epi32(run_start_v, delta); - _mm256_storeu_si256((__m256i *)(out + outpos), val_v); - delta = _mm256_add_epi32(inc, delta); - outpos += 8; + if (start < start2) { + dst->runs[dst->n_runs++] = + CROARING_MAKE_RLE16(start, start2 - start - 1); } - for (; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; + if (end2 < end) { + start = end2; + } else { + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } } } } - return outpos; -} - -CROARING_UNTARGET_AVX2 - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int _scalar_run_container_cardinality( - const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; + if (rlepos1 < src_1->n_runs) { + dst->runs[dst->n_runs++] = CROARING_MAKE_RLE16(start, end - start - 1); + rlepos1++; + if (rlepos1 < src_1->n_runs) { + memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, + sizeof(rle16_t) * (src_1->n_runs - rlepos1)); + dst->n_runs += src_1->n_runs - rlepos1; + } } - - return sum; } -int run_container_cardinality(const run_container_t *run) { -#if CROARING_COMPILER_SUPPORTS_AVX512 - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) { - return _avx512_run_container_cardinality(run); - } else -#endif - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _avx2_run_container_cardinality(run); - } else { - return _scalar_run_container_cardinality(run); +/* + * Print this container using printf (useful for debugging). + */ +void run_container_printf(const run_container_t *cont) { + for (int i = 0; i < cont->n_runs; ++i) { + uint16_t run_start = cont->runs[i].value; + uint16_t le = cont->runs[i].length; + printf("[%d,%d]", run_start, run_start + le); } } -int _scalar_run_container_to_uint32_array(void *vout, - const run_container_t *cont, +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void run_container_printf_as_uint32_array(const run_container_t *cont, uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->n_runs; ++i) { + if (cont->n_runs == 0) return; + { + uint32_t run_start = base + cont->runs[0].value; + uint16_t le = cont->runs[0].length; + printf("%u", run_start); + for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); + } + for (int32_t i = 1; i < cont->n_runs; ++i) { uint32_t run_start = base + cont->runs[i].value; uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } + for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); } - return outpos; } -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _avx2_run_container_to_uint32_array(vout, cont, base); - } else { - return _scalar_run_container_to_uint32_array(vout, cont, base); +/* + * Validate the container. Returns true if valid. + */ +bool run_container_validate(const run_container_t *run, const char **reason) { + if (run->n_runs < 0) { + *reason = "negative run count"; + return false; + } + if (run->capacity < 0) { + *reason = "negative run capacity"; + return false; + } + if (run->capacity < run->n_runs) { + *reason = "capacity less than run count"; + return false; } -} -#else + if (run->n_runs == 0) { + *reason = "zero run count"; + return false; + } + if (run->runs == NULL) { + *reason = "NULL runs"; + return false; + } -/* Get the cardinality of `run'. Requires an actual computation. */ -ALLOW_UNALIGNED -int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; + // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX. + uint32_t last_end = 0; + for (int i = 0; i < run->n_runs; ++i) { + uint32_t start = run->runs[i].value; + uint32_t end = start + run->runs[i].length + 1; + if (end <= start) { + *reason = "run start + length overflow"; + return false; + } + if (end > (1 << 16)) { + *reason = "run start + length too large"; + return false; + } + if (start < last_end) { + *reason = "run start less than last end"; + return false; + } + if (start == last_end && last_end != 0) { + *reason = "run start equal to last end, should have combined"; + return false; + } + last_end = end; + } + return true; +} - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; +int32_t run_container_write(const run_container_t *container, char *buf) { + uint16_t cast_16 = container->n_runs; + memcpy(buf, &cast_16, sizeof(uint16_t)); + memcpy(buf + sizeof(uint16_t), container->runs, + container->n_runs * sizeof(rle16_t)); + return run_container_size_in_bytes(container); +} + +int32_t run_container_read(int32_t cardinality, run_container_t *container, + const char *buf) { + (void)cardinality; + uint16_t cast_16; + memcpy(&cast_16, buf, sizeof(uint16_t)); + container->n_runs = cast_16; + if (container->n_runs > container->capacity) + run_container_grow(container, container->n_runs, false); + if (container->n_runs > 0) { + memcpy(container->runs, buf + sizeof(uint16_t), + container->n_runs * sizeof(rle16_t)); } - - return sum; + return run_container_size_in_bytes(container); } -ALLOW_UNALIGNED -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; +bool run_container_iterate(const run_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr) { for (int i = 0; i < cont->n_runs; ++i) { uint32_t run_start = base + cont->runs[i].value; uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } + + for (int j = 0; j <= le; ++j) + if (!iterator(run_start + j, ptr)) return false; } - return outpos; + return true; } -#endif +bool run_container_iterate64(const run_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr) { + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; -#ifdef __cplusplus -} + for (int j = 0; j <= le; ++j) + if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) + return false; + } + return true; } -} // extern "C" { namespace roaring { namespace internal { -#endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif/* end file src/containers/run.c */ -/* begin file src/isadetection.c */ - -/* From -https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h -Highly modified. - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, -Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute -(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, -Samy Bengio, Johnny Mariethoz) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories -America and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ +bool run_container_is_subset(const run_container_t *container1, + const run_container_t *container2) { + int i1 = 0, i2 = 0; + while (i1 < container1->n_runs && i2 < container2->n_runs) { + int start1 = container1->runs[i1].value; + int stop1 = start1 + container1->runs[i1].length; + int start2 = container2->runs[i2].value; + int stop2 = start2 + container2->runs[i2].length; + if (start1 < start2) { + return false; + } else { // start1 >= start2 + if (stop1 < stop2) { + i1++; + } else if (stop1 == stop2) { + i1++; + i2++; + } else { // stop1 > stop2 + i2++; + } + } + } + if (i1 == container1->n_runs) { + return true; + } else { + return false; + } +} -#include -#include -#include +// TODO: write smart_append_exclusive version to match the overloaded 1 param +// Java version (or is it even used?) -// Binaries produced by Visual Studio 19.38 with solely AVX2 routines -// can compile to AVX-512 thus causing crashes on non-AVX-512 systems. -// This appears to affect VS 17.8 and 17.9. We disable AVX-512 and AVX2 -// on these systems. It seems that ClangCL is not affected. -// https://github.com/RoaringBitmap/CRoaring/pull/603 -#ifndef __clang__ -#if _MSC_VER == 1938 -#define ROARING_DISABLE_AVX 1 -#endif // _MSC_VER == 1938 -#endif // __clang__ +// follows the Java implementation closely +// length is the rle-value. Ie, run [10,12) uses a length value 1. +void run_container_smart_append_exclusive(run_container_t *src, + const uint16_t start, + const uint16_t length) { + int old_end; + rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; + rle16_t *appended_last_run = src->runs + src->n_runs; -// We need portability.h to be included first, see -// https://github.com/RoaringBitmap/CRoaring/issues/394 -#if CROARING_REGULAR_VISUAL_STUDIO -#include -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) -#include -#endif // CROARING_REGULAR_VISUAL_STUDIO + if (!src->n_runs || + (start > (old_end = last_run->value + last_run->length + 1))) { + *appended_last_run = CROARING_MAKE_RLE16(start, length); + src->n_runs++; + return; + } + if (old_end == start) { + // we merge + last_run->length += (length + 1); + return; + } + int new_end = start + length + 1; -#if CROARING_IS_X64 -#ifndef CROARING_COMPILER_SUPPORTS_AVX512 -#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." -#endif // CROARING_COMPILER_SUPPORTS_AVX512 -#endif + if (start == last_run->value) { + // wipe out previous + if (new_end < old_end) { + *last_run = CROARING_MAKE_RLE16(new_end, old_end - new_end - 1); + return; + } else if (new_end > old_end) { + *last_run = CROARING_MAKE_RLE16(old_end, new_end - old_end - 1); + return; + } else { + src->n_runs--; + return; + } + } + last_run->length = start - last_run->value - 1; + if (new_end < old_end) { + *appended_last_run = + CROARING_MAKE_RLE16(new_end, old_end - new_end - 1); + src->n_runs++; + } else if (new_end > old_end) { + *appended_last_run = + CROARING_MAKE_RLE16(old_end, new_end - old_end - 1); + src->n_runs++; + } +} -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif -enum croaring_instruction_set { - CROARING_DEFAULT = 0x0, - CROARING_NEON = 0x1, - CROARING_AVX2 = 0x4, - CROARING_SSE42 = 0x8, - CROARING_PCLMULQDQ = 0x10, - CROARING_BMI1 = 0x20, - CROARING_BMI2 = 0x40, - CROARING_ALTIVEC = 0x80, - CROARING_AVX512F = 0x100, - CROARING_AVX512DQ = 0x200, - CROARING_AVX512BW = 0x400, - CROARING_AVX512VBMI2 = 0x800, - CROARING_AVX512BITALG = 0x1000, - CROARING_AVX512VPOPCNTDQ = 0x2000, - CROARING_UNINITIALIZED = 0x8000 -}; +bool run_container_select(const run_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element) { + for (int i = 0; i < container->n_runs; i++) { + uint16_t length = container->runs[i].length; + if (rank <= *start_rank + length) { + uint16_t value = container->runs[i].value; + *element = value + rank - (*start_rank); + return true; + } else + *start_rank += length + 1; + } + return false; +} -#if CROARING_COMPILER_SUPPORTS_AVX512 -unsigned int CROARING_AVX512_REQUIRED = - (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | - CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ); -#endif +int run_container_rank(const run_container_t *container, uint16_t x) { + int sum = 0; + uint32_t x32 = x; + for (int i = 0; i < container->n_runs; i++) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x <= endpoint) { + if (x < startpoint) break; + return sum + (x32 - startpoint) + 1; + } else { + sum += length + 1; + } + } + return sum; +} +uint32_t run_container_rank_many(const run_container_t *container, + uint64_t start_rank, const uint32_t *begin, + const uint32_t *end, uint64_t *ans) { + const uint16_t high = (uint16_t)((*begin) >> 16); + const uint32_t *iter = begin; + int sum = 0; + int i = 0; + for (; iter != end; iter++) { + uint32_t x = *iter; + uint16_t xhigh = (uint16_t)(x >> 16); + if (xhigh != high) return iter - begin; // stop at next container -#if defined(__x86_64__) || defined(_M_AMD64) // x64 + uint32_t x32 = x & 0xFFFF; + while (i < container->n_runs) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x32 <= endpoint) { + if (x32 < startpoint) { + *(ans++) = start_rank + sum; + } else { + *(ans++) = start_rank + sum + (x32 - startpoint) + 1; + } + break; + } else { + sum += length + 1; + i++; + } + } + if (i >= container->n_runs) *(ans++) = start_rank + sum; + } -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, - uint32_t *edx) { -#if CROARING_REGULAR_VISUAL_STUDIO - int cpu_info[4]; - __cpuidex(cpu_info, *eax, *ecx); - *eax = cpu_info[0]; - *ebx = cpu_info[1]; - *ecx = cpu_info[2]; - *edx = cpu_info[3]; -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid(level, eax, ebx, ecx, edx); -#else - uint32_t a = *eax, b, c = *ecx, d; - __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; -#endif + return iter - begin; } -static inline uint64_t xgetbv(void) { -#if defined(_MSC_VER) - return _xgetbv(0); -#else - uint32_t xcr0_lo, xcr0_hi; - __asm__("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0)); - return xcr0_lo | ((uint64_t)xcr0_hi << 32); -#endif +int run_container_get_index(const run_container_t *container, uint16_t x) { + if (run_container_contains(container, x)) { + int sum = 0; + uint32_t x32 = x; + for (int i = 0; i < container->n_runs; i++) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x <= endpoint) { + if (x < startpoint) break; + return sum + (x32 - startpoint); + } else { + sum += length + 1; + } + } + return sum - 1; + } else { + return -1; + } } -/** - * This is a relatively expensive function but it will get called at most - * *once* per compilation units. Normally, the CRoaring library is built - * as one compilation unit. - */ -static inline uint32_t dynamic_croaring_detect_supported_architectures(void) { - uint32_t eax, ebx, ecx, edx; - uint32_t host_isa = 0x0; - // Can be found on Intel ISA Reference for CPUID - static uint32_t cpuid_avx2_bit = - 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 - static uint32_t cpuid_bmi1_bit = - 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 - static uint32_t cpuid_bmi2_bit = - 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 - static uint32_t cpuid_avx512f_bit = - 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 - static uint32_t cpuid_avx512dq_bit = - 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 - static uint32_t cpuid_avx512bw_bit = - 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 - static uint32_t cpuid_avx512vbmi2_bit = - 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 - static uint32_t cpuid_avx512bitalg_bit = - 1 << 12; ///< @private bit 12 of ECX for EAX=0x7 - static uint32_t cpuid_avx512vpopcntdq_bit = - 1 << 14; ///< @private bit 14 of ECX for EAX=0x7 - static uint64_t cpuid_avx256_saved = 1 << 2; ///< @private bit 2 = AVX - static uint64_t cpuid_avx512_saved = - 7 << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM - static uint32_t cpuid_sse42_bit = - 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 - static uint32_t cpuid_osxsave = - (1 << 26) | (1 << 27); ///< @private bits 26+27 of ECX for EAX=0x1 - static uint32_t cpuid_pclmulqdq_bit = - 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512 - // EBX for EAX=0x1 - eax = 0x1; - ecx = 0x0; - cpuid(&eax, &ebx, &ecx, &edx); +CROARING_TARGET_AVX512 +ALLOW_UNALIGNED +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _avx512_run_container_cardinality( + const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; - if (ecx & cpuid_sse42_bit) { - host_isa |= CROARING_SSE42; - } else { - return host_isa; // everything after is redundant - } + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + int32_t k = 0; + const int32_t step = sizeof(__m512i) / sizeof(rle16_t); + if (n_runs > step) { + __m512i total = _mm512_setzero_si512(); + for (; k + step <= n_runs; k += step) { + __m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k)); + __m512i justlengths = _mm512_srli_epi32(ymm1, 16); + total = _mm512_add_epi32(total, justlengths); + } - if (ecx & cpuid_pclmulqdq_bit) { - host_isa |= CROARING_PCLMULQDQ; - } + __m256i lo = _mm512_extracti32x8_epi32(total, 0); + __m256i hi = _mm512_extracti32x8_epi32(total, 1); - if ((ecx & cpuid_osxsave) != cpuid_osxsave) { - return host_isa; + // a store might be faster than extract? + uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; + _mm256_storeu_si256((__m256i *)buffer, lo); + sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + + (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); + + _mm256_storeu_si256((__m256i *)buffer, hi); + sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + + (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); + } + for (; k < n_runs; ++k) { + sum += runs[k].length; } - // xgetbv for checking if the OS saves registers - uint64_t xcr0 = xgetbv(); + return sum; +} - if ((xcr0 & cpuid_avx256_saved) == 0) { - return host_isa; - } +CROARING_UNTARGET_AVX512 - // ECX for EAX=0x7 - eax = 0x7; - ecx = 0x0; - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_avx2_bit) { - host_isa |= CROARING_AVX2; +CROARING_TARGET_AVX2 +ALLOW_UNALIGNED +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _avx2_run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + int32_t k = 0; + const int32_t step = sizeof(__m256i) / sizeof(rle16_t); + if (n_runs > step) { + __m256i total = _mm256_setzero_si256(); + for (; k + step <= n_runs; k += step) { + __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); + __m256i justlengths = _mm256_srli_epi32(ymm1, 16); + total = _mm256_add_epi32(total, justlengths); + } + // a store might be faster than extract? + uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; + _mm256_storeu_si256((__m256i *)buffer, total); + sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + + (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); } - if (ebx & cpuid_bmi1_bit) { - host_isa |= CROARING_BMI1; + for (; k < n_runs; ++k) { + sum += runs[k].length; } - if (ebx & cpuid_bmi2_bit) { - host_isa |= CROARING_BMI2; - } + return sum; +} - if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) { - return host_isa; - } +ALLOW_UNALIGNED +int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; - if (ebx & cpuid_avx512f_bit) { - host_isa |= CROARING_AVX512F; + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + if (le < 8) { + for (int j = 0; j <= le; ++j) { + uint32_t val = run_start + j; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } + } else { + int j = 0; + __m256i run_start_v = _mm256_set1_epi32(run_start); + // [8,8,8,8....] + __m256i inc = _mm256_set1_epi32(8); + // used for generate sequence: + // [0, 1, 2, 3...], [8, 9, 10,...] + __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + for (j = 0; j + 8 <= le; j += 8) { + __m256i val_v = _mm256_add_epi32(run_start_v, delta); + _mm256_storeu_si256((__m256i *)(out + outpos), val_v); + delta = _mm256_add_epi32(inc, delta); + outpos += 8; + } + for (; j <= le; ++j) { + uint32_t val = run_start + j; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } + } } + return outpos; +} - if (ebx & cpuid_avx512bw_bit) { - host_isa |= CROARING_AVX512BW; - } +CROARING_UNTARGET_AVX2 - if (ebx & cpuid_avx512dq_bit) { - host_isa |= CROARING_AVX512DQ; +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _scalar_run_container_cardinality( + const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + for (int k = 0; k < n_runs; ++k) { + sum += runs[k].length; } - if (ecx & cpuid_avx512vbmi2_bit) { - host_isa |= CROARING_AVX512VBMI2; - } + return sum; +} - if (ecx & cpuid_avx512bitalg_bit) { - host_isa |= CROARING_AVX512BITALG; +int run_container_cardinality(const run_container_t *run) { +#if CROARING_COMPILER_SUPPORTS_AVX512 + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX512) { + return _avx512_run_container_cardinality(run); + } else +#endif + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _avx2_run_container_cardinality(run); + } else { + return _scalar_run_container_cardinality(run); } +} - if (ecx & cpuid_avx512vpopcntdq_bit) { - host_isa |= CROARING_AVX512VPOPCNTDQ; +int _scalar_run_container_to_uint32_array(void *vout, + const run_container_t *cont, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (int j = 0; j <= le; ++j) { + uint32_t val = run_start + j; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } } - - return host_isa; + return outpos; } -#endif // end SIMD extension detection code - -#if defined(__x86_64__) || defined(_M_AMD64) // x64 - -#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP -static inline uint32_t croaring_detect_supported_architectures(void) { - // thread-safe as per the C++11 standard. - static uint32_t buffer = dynamic_croaring_detect_supported_architectures(); - return buffer; -} -#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C -static uint32_t croaring_detect_supported_architectures(void) { - // we use an atomic for thread safety - static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; - if (buffer == CROARING_UNINITIALIZED) { - // atomicity is sufficient - buffer = dynamic_croaring_detect_supported_architectures(); +int run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _avx2_run_container_to_uint32_array(vout, cont, base); + } else { + return _scalar_run_container_to_uint32_array(vout, cont, base); } - return buffer; } + #else -// If we do not have atomics, we do the best we can. -static inline uint32_t croaring_detect_supported_architectures(void) { - static uint32_t buffer = CROARING_UNINITIALIZED; - if (buffer == CROARING_UNINITIALIZED) { - buffer = dynamic_croaring_detect_supported_architectures(); - } - return buffer; -} -#endif // CROARING_C_ATOMIC -#ifdef ROARING_DISABLE_AVX +/* Get the cardinality of `run'. Requires an actual computation. */ +ALLOW_UNALIGNED +int run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; -int croaring_hardware_support(void) { return 0; } + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + for (int k = 0; k < n_runs; ++k) { + sum += runs[k].length; + } -#elif defined(__AVX512F__) && defined(__AVX512DQ__) && \ - defined(__AVX512BW__) && defined(__AVX512VBMI2__) && \ - defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__) -int croaring_hardware_support(void) { - return ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512; + return sum; } -#elif defined(__AVX2__) -int croaring_hardware_support(void) { - static -#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C - _Atomic -#endif - int support = 0xFFFFFFF; - if (support == 0xFFFFFFF) { - bool avx512_support = false; -#if CROARING_COMPILER_SUPPORTS_AVX512 - avx512_support = - ((croaring_detect_supported_architectures() & - CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED); -#endif - support = ROARING_SUPPORTS_AVX2 | - (avx512_support ? ROARING_SUPPORTS_AVX512 : 0); +ALLOW_UNALIGNED +int run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (int j = 0; j <= le; ++j) { + uint32_t val = run_start + j; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } } - return support; + return outpos; } -#else -int croaring_hardware_support(void) { - static -#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C - _Atomic -#endif - int support = 0xFFFFFFF; - if (support == 0xFFFFFFF) { - bool has_avx2 = (croaring_detect_supported_architectures() & - CROARING_AVX2) == CROARING_AVX2; - bool has_avx512 = false; -#if CROARING_COMPILER_SUPPORTS_AVX512 - has_avx512 = (croaring_detect_supported_architectures() & - CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED; -#endif // CROARING_COMPILER_SUPPORTS_AVX512 - support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) | - (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0); - } - return support; -} #endif -#endif // defined(__x86_64__) || defined(_M_AMD64) // x64 #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif -/* end file src/isadetection.c */ -/* begin file src/memory.c */ -#include - - -// without the following, we get lots of warnings about posix_memalign -#ifndef __cplusplus -extern int posix_memalign(void** __memptr, size_t __alignment, size_t __size); -#endif //__cplusplus // C++ does not have a well defined signature - -// portable version of posix_memalign -static void* roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { - void* p; -#ifdef _MSC_VER - p = _aligned_malloc(size, alignment); -#elif defined(__MINGW32__) || defined(__MINGW64__) - p = __mingw_aligned_malloc(size, alignment); -#else - // somehow, if this is used before including "x86intrin.h", it creates an - // implicit defined warning. - if (posix_memalign(&p, alignment, size) != 0) return NULL; -#endif - return p; -} +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif/* end file src/containers/run.c */ +/* begin file src/isadetection.c */ -static void roaring_bitmap_aligned_free(void* memblock) { -#ifdef _MSC_VER - _aligned_free(memblock); -#elif defined(__MINGW32__) || defined(__MINGW64__) - __mingw_aligned_free(memblock); -#else - free(memblock); -#endif -} +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. -static roaring_memory_t global_memory_hook = { - .malloc = malloc, - .realloc = realloc, - .calloc = calloc, - .free = free, - .aligned_malloc = roaring_bitmap_aligned_malloc, - .aligned_free = roaring_bitmap_aligned_free, -}; +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) -void roaring_init_memory_hook(roaring_memory_t memory_hook) { - global_memory_hook = memory_hook; -} +All rights reserved. -void* roaring_malloc(size_t n) { return global_memory_hook.malloc(n); } +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -void* roaring_realloc(void* p, size_t new_sz) { - return global_memory_hook.realloc(p, new_sz); -} +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. -void* roaring_calloc(size_t n_elements, size_t element_size) { - return global_memory_hook.calloc(n_elements, element_size); -} +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. -void roaring_free(void* p) { global_memory_hook.free(p); } +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. -void* roaring_aligned_malloc(size_t alignment, size_t size) { - return global_memory_hook.aligned_malloc(alignment, size); -} +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ -void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); } -/* end file src/memory.c */ -/* begin file src/roaring.c */ -#include -#include -#include +#include #include -#include -#include +#include + +// Binaries produced by Visual Studio 19.38 with solely AVX2 routines +// can compile to AVX-512 thus causing crashes on non-AVX-512 systems. +// This appears to affect VS 17.8 and 17.9. We disable AVX-512 and AVX2 +// on these systems. It seems that ClangCL is not affected. +// https://github.com/RoaringBitmap/CRoaring/pull/603 +#ifndef __clang__ +#if _MSC_VER == 1938 +#define ROARING_DISABLE_AVX 1 +#endif // _MSC_VER == 1938 +#endif // __clang__ +// We need portability.h to be included first, see +// https://github.com/RoaringBitmap/CRoaring/issues/394 +#if CROARING_REGULAR_VISUAL_STUDIO +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif // CROARING_REGULAR_VISUAL_STUDIO -// Include after roaring.h +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif #ifdef __cplusplus -using namespace ::roaring::internal; - extern "C" { namespace roaring { -namespace api { +namespace internal { +#endif +enum croaring_instruction_set { + CROARING_DEFAULT = 0x0, + CROARING_NEON = 0x1, + CROARING_AVX2 = 0x4, + CROARING_SSE42 = 0x8, + CROARING_PCLMULQDQ = 0x10, + CROARING_BMI1 = 0x20, + CROARING_BMI2 = 0x40, + CROARING_ALTIVEC = 0x80, + CROARING_AVX512F = 0x100, + CROARING_AVX512DQ = 0x200, + CROARING_AVX512BW = 0x400, + CROARING_AVX512VBMI2 = 0x800, + CROARING_AVX512BITALG = 0x1000, + CROARING_AVX512VPOPCNTDQ = 0x2000, + CROARING_UNINITIALIZED = 0x8000 +}; + +#if CROARING_COMPILER_SUPPORTS_AVX512 +unsigned int CROARING_AVX512_REQUIRED = + (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | + CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ); #endif -#define CROARING_SERIALIZATION_ARRAY_UINT32 1 -#define CROARING_SERIALIZATION_CONTAINER 2 -extern inline int roaring_trailing_zeroes(unsigned long long input_num); -extern inline int roaring_leading_zeroes(unsigned long long input_num); -extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r); -extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r); -extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, - bool cow); -extern inline roaring_bitmap_t *roaring_bitmap_create(void); -extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, - uint64_t max); -extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, - uint64_t min, uint64_t max); +#if defined(__x86_64__) || defined(_M_AMD64) // x64 -static inline bool is_cow(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_COW; +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { +#if CROARING_REGULAR_VISUAL_STUDIO + int cpu_info[4]; + __cpuidex(cpu_info, *eax, *ecx); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif } -static inline bool is_frozen(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_FROZEN; + +static inline uint64_t xgetbv(void) { +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t xcr0_lo, xcr0_hi; + __asm__("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0)); + return xcr0_lo | ((uint64_t)xcr0_hi << 32); +#endif } -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *type, - int *index) { - roaring_array_t *ra = &r->high_low_container; +/** + * This is a relatively expensive function but it will get called at most + * *once* per compilation units. Normally, the CRoaring library is built + * as one compilation unit. + */ +static inline uint32_t dynamic_croaring_detect_supported_architectures(void) { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + // Can be found on Intel ISA Reference for CPUID + static uint32_t cpuid_avx2_bit = + 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 + static uint32_t cpuid_bmi1_bit = + 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 + static uint32_t cpuid_bmi2_bit = + 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 + static uint32_t cpuid_avx512f_bit = + 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 + static uint32_t cpuid_avx512dq_bit = + 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 + static uint32_t cpuid_avx512bw_bit = + 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 + static uint32_t cpuid_avx512vbmi2_bit = + 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 + static uint32_t cpuid_avx512bitalg_bit = + 1 << 12; ///< @private bit 12 of ECX for EAX=0x7 + static uint32_t cpuid_avx512vpopcntdq_bit = + 1 << 14; ///< @private bit 14 of ECX for EAX=0x7 + static uint64_t cpuid_avx256_saved = 1 << 2; ///< @private bit 2 = AVX + static uint64_t cpuid_avx512_saved = + 7 << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM + static uint32_t cpuid_sse42_bit = + 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 + static uint32_t cpuid_osxsave = + (1 << 26) | (1 << 27); ///< @private bits 26+27 of ECX for EAX=0x1 + static uint32_t cpuid_pclmulqdq_bit = + 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 - uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - if (i >= 0) { - ra_unshare_container_at_index(ra, (uint16_t)i); - container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type); - uint8_t new_type = *type; - container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); - *index = i; - if (c2 != c) { - container_free(c, *type); - ra_set_container_at_index(ra, i, c2, new_type); - *type = new_type; - return c2; - } else { - return c; - } + // EBX for EAX=0x1 + eax = 0x1; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= CROARING_SSE42; } else { - array_container_t *new_ac = array_container_create(); - container_t *c = - container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); - *index = -i - 1; - return c; + return host_isa; // everything after is redundant } -} -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= CROARING_PCLMULQDQ; } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); - if (!is_ok) { - roaring_free(ans); - return NULL; + + if ((ecx & cpuid_osxsave) != cpuid_osxsave) { + return host_isa; } - return ans; -} -bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { - return ra_init_with_capacity(&r->high_low_container, cap); -} + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); -static inline void add_bulk_impl(roaring_bitmap_t *r, - roaring_bulk_context_t *context, - uint32_t val) { - uint16_t key = val >> 16; - if (context->container == NULL || context->key != key) { - uint8_t typecode; - int idx; - context->container = - containerptr_roaring_bitmap_add(r, val, &typecode, &idx); - context->typecode = typecode; - context->idx = idx; - context->key = key; - } else { - // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion directly, bypassing the roaring_bitmap_add call - uint8_t new_typecode; - container_t *container2 = container_add( - context->container, val & 0xFFFF, context->typecode, &new_typecode); - if (container2 != context->container) { - // rare instance when we need to change the container type - container_free(context->container, context->typecode); - ra_set_container_at_index(&r->high_low_container, context->idx, - container2, new_typecode); - context->typecode = new_typecode; - context->container = container2; - } + if ((xcr0 & cpuid_avx256_saved) == 0) { + return host_isa; } -} -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - uint32_t val; - const uint32_t *start = vals; - const uint32_t *end = vals + n_args; - const uint32_t *current_val = start; + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= CROARING_AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= CROARING_BMI1; + } - if (n_args == 0) { - return; + if (ebx & cpuid_bmi2_bit) { + host_isa |= CROARING_BMI2; + } + + if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) { + return host_isa; + } + + if (ebx & cpuid_avx512f_bit) { + host_isa |= CROARING_AVX512F; + } + + if (ebx & cpuid_avx512bw_bit) { + host_isa |= CROARING_AVX512BW; + } + + if (ebx & cpuid_avx512dq_bit) { + host_isa |= CROARING_AVX512DQ; + } + + if (ecx & cpuid_avx512vbmi2_bit) { + host_isa |= CROARING_AVX512VBMI2; } - uint8_t typecode; - int idx; - container_t *container; - val = *current_val; - container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); - roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), - typecode}; + if (ecx & cpuid_avx512bitalg_bit) { + host_isa |= CROARING_AVX512BITALG; + } - for (; current_val != end; current_val++) { - memcpy(&val, current_val, sizeof(val)); - add_bulk_impl(r, &context, val); + if (ecx & cpuid_avx512vpopcntdq_bit) { + host_isa |= CROARING_AVX512VPOPCNTDQ; } -} -void roaring_bitmap_add_bulk(roaring_bitmap_t *r, - roaring_bulk_context_t *context, uint32_t val) { - add_bulk_impl(r, context, val); + return host_isa; } -bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, - roaring_bulk_context_t *context, - uint32_t val) { - uint16_t key = val >> 16; - if (context->container == NULL || context->key != key) { - int32_t start_idx = -1; - if (context->container != NULL && context->key < key) { - start_idx = context->idx; - } - int idx = ra_advance_until(&r->high_low_container, key, start_idx); - if (idx == ra_get_size(&r->high_low_container)) { - return false; - } - uint8_t typecode; - context->container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)idx, &typecode); - context->typecode = typecode; - context->idx = idx; - context->key = - ra_get_key_at_index(&r->high_low_container, (uint16_t)idx); - // ra_advance_until finds the next key >= the target, we found a later - // container. - if (context->key != key) { - return false; - } - } - // context is now set up - return container_contains(context->container, val & 0xFFFF, - context->typecode); -} +#endif // end SIMD extension detection code -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; -} +#if defined(__x86_64__) || defined(_M_AMD64) // x64 -roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - va_list ap; - va_start(ap, n_args); - for (size_t i = 0; i < n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add_bulk(answer, &context, val); +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP +static inline uint32_t croaring_detect_supported_architectures(void) { + // thread-safe as per the C++11 standard. + static uint32_t buffer = dynamic_croaring_detect_supported_architectures(); + return buffer; +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +static uint32_t croaring_detect_supported_architectures(void) { + // we use an atomic for thread safety + static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; + if (buffer == CROARING_UNINITIALIZED) { + // atomicity is sufficient + buffer = dynamic_croaring_detect_supported_architectures(); } - va_end(ap); - return answer; + return buffer; +} +#else +// If we do not have atomics, we do the best we can. +static inline uint32_t croaring_detect_supported_architectures(void) { + static uint32_t buffer = CROARING_UNINITIALIZED; + if (buffer == CROARING_UNINITIALIZED) { + buffer = dynamic_croaring_detect_supported_architectures(); + } + return buffer; } +#endif // CROARING_C_ATOMIC -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; +#ifdef ROARING_DISABLE_AVX + +int croaring_hardware_support(void) { return 0; } + +#elif defined(__AVX512F__) && defined(__AVX512DQ__) && \ + defined(__AVX512BW__) && defined(__AVX512VBMI2__) && \ + defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__) +int croaring_hardware_support(void) { + return ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512; } +#elif defined(__AVX2__) -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if (max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - for (uint32_t value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; +int croaring_hardware_support(void) { + static +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C + _Atomic +#endif + int support = 0xFFFFFFF; + if (support == 0xFFFFFFF) { + bool avx512_support = false; +#if CROARING_COMPILER_SUPPORTS_AVX512 + avx512_support = + ((croaring_detect_supported_architectures() & + CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED); +#endif + support = ROARING_SUPPORTS_AVX2 | + (avx512_support ? ROARING_SUPPORTS_AVX512 : 0); } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = - (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - container_t *container = container_from_range( - &type, container_min, container_max, (uint16_t)step); - ra_append(&answer->high_low_container, (uint16_t)key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; + return support; } +#else -void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, - uint32_t max) { - if (min > max) { - return; +int croaring_hardware_support(void) { + static +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C + _Atomic +#endif + int support = 0xFFFFFFF; + if (support == 0xFFFFFFF) { + bool has_avx2 = (croaring_detect_supported_architectures() & + CROARING_AVX2) == CROARING_AVX2; + bool has_avx512 = false; +#if CROARING_COMPILER_SUPPORTS_AVX512 + has_avx512 = (croaring_detect_supported_architectures() & + CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED; +#endif // CROARING_COMPILER_SUPPORTS_AVX512 + support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) | + (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0); } + return support; +} +#endif - roaring_array_t *ra = &r->high_low_container; +#endif // defined(__x86_64__) || defined(_M_AMD64) // x64 +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/isadetection.c */ +/* begin file src/memory.c */ +#include - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = - count_greater(ra->keys, ra->size, (uint16_t)max_key); - int32_t prefix_length = - count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key); - int32_t common_length = ra->size - prefix_length - suffix_length; +// without the following, we get lots of warnings about posix_memalign +#ifndef __cplusplus +extern int posix_memalign(void** __memptr, size_t __alignment, size_t __size); +#endif //__cplusplus // C++ does not have a well defined signature - if (num_required_containers > common_length) { - ra_shift_tail(ra, suffix_length, - num_required_containers - common_length); - } +// portable version of posix_memalign +static void* roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { + void* p; +#ifdef _MSC_VER + p = _aligned_malloc(size, alignment); +#elif defined(__MINGW32__) || defined(__MINGW64__) + p = __mingw_aligned_malloc(size, alignment); +#else + // somehow, if this is used before including "x86intrin.h", it creates an + // implicit defined warning. + if (posix_memalign(&p, alignment, size) != 0) return NULL; +#endif + return p; +} - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->size - suffix_length - 1; - for (uint32_t key = max_key; key != min_key - 1; - key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - container_t *new_container; - uint8_t new_type; +static void roaring_bitmap_aligned_free(void* memblock) { +#ifdef _MSC_VER + _aligned_free(memblock); +#elif defined(__MINGW32__) || defined(__MINGW64__) + __mingw_aligned_free(memblock); +#else + free(memblock); +#endif +} - if (src >= 0 && ra->keys[src] == key) { - ra_unshare_container_at_index(ra, (uint16_t)src); - new_container = - container_add_range(ra->containers[src], ra->typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], ra->typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max + 1, 1); - } - ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key, - new_container, new_type); - dst--; - } +static roaring_memory_t global_memory_hook = { + .malloc = malloc, + .realloc = realloc, + .calloc = calloc, + .free = free, + .aligned_malloc = roaring_bitmap_aligned_malloc, + .aligned_free = roaring_bitmap_aligned_free, +}; + +void roaring_init_memory_hook(roaring_memory_t memory_hook) { + global_memory_hook = memory_hook; } -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, - uint32_t max) { - if (min > max) { - return; - } +void* roaring_malloc(size_t n) { return global_memory_hook.malloc(n); } - roaring_array_t *ra = &r->high_low_container; +void* roaring_realloc(void* p, size_t new_sz) { + return global_memory_hook.realloc(p, new_sz); +} - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; +void* roaring_calloc(size_t n_elements, size_t element_size) { + return global_memory_hook.calloc(n_elements, element_size); +} - int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key); - int32_t dst = src; - while (src < ra->size && ra->keys[src] <= max_key) { - uint32_t container_min = - (min_key == ra->keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = - (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(ra, (uint16_t)src); - container_t *new_container; - uint8_t new_type; - new_container = - container_remove_range(ra->containers[src], ra->typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], ra->typecodes[src]); - } - if (new_container) { - ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], - new_container, new_type); - dst++; - } - src++; - } - if (src > dst) { - ra_shift_tail(ra, ra->size - src, dst - src); - } +void roaring_free(void* p) { global_memory_hook.free(p); } + +void* roaring_aligned_malloc(size_t alignment, size_t size) { + return global_memory_hook.aligned_malloc(alignment, size); } -void roaring_bitmap_printf(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; +void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); } +/* end file src/memory.c */ +/* begin file src/roaring_array.c */ +#include +#include +#include +#include +#include +#include - printf("{"); - for (int i = 0; i < ra->size; ++i) { - container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - if (i + 1 < ra->size) { - printf(","); - } - } - printf("}"); -} +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif -void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; +// Convention: [0,ra->size) all elements are initialized +// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - printf("{"); - for (int i = 0; i < ra->size; ++i) { - printf("%d: %s (%d)", ra->keys[i], - get_full_container_name(ra->containers[i], ra->typecodes[i]), - container_get_cardinality(ra->containers[i], ra->typecodes[i])); - if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { - printf("(shared count = %" PRIu32 " )", - croaring_refcount_get( - &(CAST_shared(ra->containers[i])->counter))); - } +extern inline int32_t ra_get_size(const roaring_array_t *ra); +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - if (i + 1 < ra->size) { - printf(", "); - } - } - printf("}"); -} +extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, + uint16_t i, + uint8_t *typecode); -/** - * (For advanced users.) - * Collect statistics about the bitmap - */ -void roaring_bitmap_statistics(const roaring_bitmap_t *r, - roaring_statistics_t *stat) { - const roaring_array_t *ra = &r->high_low_container; +extern inline void ra_unshare_container_at_index(roaring_array_t *ra, + uint16_t i); - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->size; - stat->min_value = roaring_bitmap_minimum(r); - stat->max_value = roaring_bitmap_maximum(r); +extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, + int32_t i, + uint16_t key, + container_t *c, + uint8_t typecode); - for (int i = 0; i < ra->size; ++i) { - uint8_t truetype = - get_container_type(ra->containers[i], ra->typecodes[i]); - uint32_t card = - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->containers[i], ra->typecodes[i]); - stat->cardinality += card; - switch (truetype) { - case BITSET_CONTAINER_TYPE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - roaring_unreachable; - } - } -} +extern inline void ra_set_container_at_index(const roaring_array_t *ra, + int32_t i, container_t *c, + uint8_t typecode); -/* - * Checks that: - * - Array containers are sorted and contain no duplicates - * - Range containers are sorted and contain no overlapping ranges - * - Roaring containers are sorted by key and there are no duplicate keys - * - The correct container type is use for each container (e.g. bitmaps aren't - * used for small containers) - */ -bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, - const char **reason) { - const char *reason_local; - if (reason == NULL) { - // Always allow assigning through *reason - reason = &reason_local; - } - *reason = NULL; - const roaring_array_t *ra = &r->high_low_container; - if (ra->size < 0) { - *reason = "negative size"; - return false; - } - if (ra->allocation_size < 0) { - *reason = "negative allocation size"; - return false; - } - if (ra->size > ra->allocation_size) { - *reason = "more containers than allocated space"; - return false; - } - if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { - *reason = "invalid flags"; - return false; - } - if (ra->size == 0) { +static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { + // + // Note: not implemented using C's realloc(), because the memory layout is + // Struct-of-Arrays vs. Array-of-Structs: + // https://github.com/RoaringBitmap/CRoaring/issues/256 + + if (new_capacity == 0) { + roaring_free(ra->containers); + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; + ra->allocation_size = 0; return true; } + const size_t memoryneeded = + new_capacity * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + void *bigalloc = roaring_malloc(memoryneeded); + if (!bigalloc) return false; + void *oldbigalloc = ra->containers; + container_t **newcontainers = (container_t **)bigalloc; + uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); + uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); + assert((char *)(newtypecodes + new_capacity) == + (char *)bigalloc + memoryneeded); + if (ra->size > 0) { + memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); + memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); + memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); + } + ra->containers = newcontainers; + ra->keys = newkeys; + ra->typecodes = newtypecodes; + ra->allocation_size = new_capacity; + roaring_free(oldbigalloc); + return true; +} - if (ra->keys == NULL) { - *reason = "keys is NULL"; - return false; +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { + if (!new_ra) return false; + ra_init(new_ra); + + // Containers hold 64Ki elements, so 64Ki containers is enough to hold + // `0x10000 * 0x10000` (all 2^32) elements + if (cap > 0x10000) { + cap = 0x10000; } - if (ra->typecodes == NULL) { - *reason = "typecodes is NULL"; - return false; + + if (cap > 0) { + void *bigalloc = roaring_malloc( + cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); + if (bigalloc == NULL) return false; + new_ra->containers = (container_t **)bigalloc; + new_ra->keys = (uint16_t *)(new_ra->containers + cap); + new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); + // Narrowing is safe because of above check + new_ra->allocation_size = (int32_t)cap; } - if (ra->containers == NULL) { - *reason = "containers is NULL"; - return false; + return true; +} + +int ra_shrink_to_fit(roaring_array_t *ra) { + int savings = (ra->allocation_size - ra->size) * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + if (!realloc_array(ra, ra->size)) { + return 0; } + ra->allocation_size = ra->size; + return savings; +} - uint32_t prev_key = ra->keys[0]; - for (int32_t i = 1; i < ra->size; ++i) { - if (ra->keys[i] <= prev_key) { - *reason = "keys not strictly increasing"; +void ra_init(roaring_array_t *new_ra) { + if (!new_ra) { + return; + } + new_ra->keys = NULL; + new_ra->containers = NULL; + new_ra->typecodes = NULL; + + new_ra->allocation_size = 0; + new_ra->size = 0; + new_ra->flags = 0; +} + +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write) { + ra_clear_containers(dest); // we are going to overwrite them + if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size + dest->size = 0; // <--- This is important. + return true; // output was just cleared, so they match + } + if (dest->allocation_size < source->size) { + if (!realloc_array(dest, source->size)) { return false; } - prev_key = ra->keys[i]; } - - for (int32_t i = 0; i < ra->size; ++i) { - if (!container_internal_validate(ra->containers[i], ra->typecodes[i], - reason)) { - // reason should already be set - if (*reason == NULL) { - *reason = "container failed to validate but no reason given"; + dest->size = source->size; + memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); + // we go through the containers, turning them into shared containers... + if (copy_on_write) { + for (int32_t i = 0; i < dest->size; ++i) { + source->containers[i] = get_copy_of_container( + source->containers[i], &source->typecodes[i], copy_on_write); + } + // we do a shallow copy to the other bitmap + memcpy(dest->containers, source->containers, + dest->size * sizeof(container_t *)); + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } else { + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + for (int32_t i = 0; i < dest->size; i++) { + dest->containers[i] = + container_clone(source->containers[i], source->typecodes[i]); + if (dest->containers[i] == NULL) { + for (int32_t j = 0; j < i; j++) { + container_free(dest->containers[j], dest->typecodes[j]); + } + ra_clear_without_containers(dest); + return false; } - return false; } } - return true; } -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - if (!ra_init_with_capacity( // allocation of list of containers can fail - &ans->high_low_container, r->high_low_container.size)) { - roaring_free(ans); - return NULL; +void ra_clear_containers(roaring_array_t *ra) { + for (int32_t i = 0; i < ra->size; ++i) { + container_free(ra->containers[i], ra->typecodes[i]); } - if (!ra_overwrite( // memory allocation of individual containers may fail - &r->high_low_container, &ans->high_low_container, is_cow(r))) { - roaring_bitmap_free(ans); // overwrite should leave in freeable state - return NULL; +} + +void ra_reset(roaring_array_t *ra) { + ra_clear_containers(ra); + ra->size = 0; + ra_shrink_to_fit(ra); +} + +void ra_clear_without_containers(roaring_array_t *ra) { + roaring_free( + ra->containers); // keys and typecodes are allocated with containers + ra->size = 0; + ra->allocation_size = 0; + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; +} + +void ra_clear(roaring_array_t *ra) { + ra_clear_containers(ra); + ra_clear_without_containers(ra); +} + +bool extend_array(roaring_array_t *ra, int32_t k) { + int32_t desired_size = ra->size + k; + const int32_t max_containers = 65536; + assert(desired_size <= max_containers); + if (desired_size > ra->allocation_size) { + int32_t new_capacity = + (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + if (new_capacity > max_containers) { + new_capacity = max_containers; + } + + return realloc_array(ra, new_capacity); } - roaring_bitmap_set_copy_on_write(ans, is_cow(r)); - return ans; + return true; } -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - roaring_bitmap_set_copy_on_write(dest, is_cow(src)); - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - is_cow(src)); +void ra_append(roaring_array_t *ra, uint16_t key, container_t *c, + uint8_t typecode) { + extend_array(ra, 1); + const int32_t pos = ra->size; + + ra->keys[pos] = key; + ra->containers[pos] = c; + ra->typecodes[pos] = typecode; + ra->size++; } -void roaring_bitmap_free(const roaring_bitmap_t *r) { - if (r == NULL) { - return; +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t index, bool copy_on_write) { + extend_array(ra, 1); + const int32_t pos = ra->size; + + // old contents is junk that does not need freeing + ra->keys[pos] = sa->keys[index]; + // the shared container will be in two bitmaps + if (copy_on_write) { + sa->containers[index] = get_copy_of_container( + sa->containers[index], &sa->typecodes[index], copy_on_write); + ra->containers[pos] = sa->containers[index]; + ra->typecodes[pos] = sa->typecodes[index]; + } else { + ra->containers[pos] = + container_clone(sa->containers[index], sa->typecodes[index]); + ra->typecodes[pos] = sa->typecodes[index]; } - if (!is_frozen(r)) { - ra_clear((roaring_array_t *)&r->high_low_container); + ra->size++; +} + +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t stopping_key, bool copy_on_write) { + for (int32_t i = 0; i < sa->size; ++i) { + if (sa->keys[i] >= stopping_key) break; + ra_append_copy(ra, sa, (uint16_t)i, copy_on_write); } - roaring_free((roaring_bitmap_t *)r); } -void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; + } } -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - roaring_array_t *ra = &r->high_low_container; +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t before_start, bool copy_on_write) { + int start_location = ra_get_index(sa, before_start); + if (start_location >= 0) + ++start_location; + else + start_location = -start_location - 1; + ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); +} - const uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(ra, (uint16_t)i); - container_t *container = - ra_get_container_at_index(ra, (uint16_t)i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + + ra->keys[pos] = sa->keys[i]; + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + ra->size++; + } +} + +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; } - } else { - array_container_t *newac = array_container_create(); - container_t *container = - container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); + ra->size++; } } -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); +container_t *ra_get_container(roaring_array_t *ra, uint16_t x, + uint8_t *typecode) { + int i = binarySearch(ra->keys, (int32_t)ra->size, x); + if (i < 0) return NULL; + *typecode = ra->typecodes[i]; + return ra->containers[i]; +} + +extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, + uint16_t i, + uint8_t *typecode); - const int oldCardinality = - container_get_cardinality(container, typecode); +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, + uint16_t i); - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - container_t *container = - container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; +extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, + int32_t pos); + +// everything skipped over is freed +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { + while (pos < ra->size && ra->keys[pos] < x) { + container_free(ra->containers[pos], ra->typecodes[pos]); + ++pos; } + return pos; +} - return result; +void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, + container_t *c, uint8_t typecode) { + extend_array(ra, 1); + // May be an optimization opportunity with DIY memmove + memmove(&(ra->keys[i + 1]), &(ra->keys[i]), + sizeof(uint16_t) * (ra->size - i)); + memmove(&(ra->containers[i + 1]), &(ra->containers[i]), + sizeof(container_t *) * (ra->size - i)); + memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), + sizeof(uint8_t) * (ra->size - i)); + ra->keys[i] = key; + ra->containers[i] = c; + ra->typecodes[i] = typecode; + ra->size++; } -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_get_cardinality(container2, newtypecode) != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - } +// note: Java routine set things to 0, enabling GC. +// Java called it "resize" but it was always used to downsize. +// Allowing upsize would break the conventions about +// valid containers below ra->size. + +void ra_downsize(roaring_array_t *ra, int32_t new_length) { + assert(new_length <= ra->size); + ra->size = new_length; } -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); +void ra_remove_at_index(roaring_array_t *ra, int32_t i) { + memmove(&(ra->containers[i]), &(ra->containers[i + 1]), + sizeof(container_t *) * (ra->size - i - 1)); + memmove(&(ra->keys[i]), &(ra->keys[i + 1]), + sizeof(uint16_t) * (ra->size - i - 1)); + memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), + sizeof(uint8_t) * (ra->size - i - 1)); + ra->size--; +} - const int oldCardinality = - container_get_cardinality(container, typecode); +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { + container_free(ra->containers[i], ra->typecodes[i]); + ra_remove_at_index(ra, i); +} - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. In use it should be followed by a call to +// downsize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, + uint32_t new_begin) { + assert(begin <= end); + assert(new_begin < begin); - const int newCardinality = - container_get_cardinality(container2, newtypecode); + const int range = end - begin; - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } + // We ensure to previously have freed overwritten containers + // that are not copied elsewhere - result = oldCardinality != newCardinality; + memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), + sizeof(container_t *) * range); + memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), + sizeof(uint16_t) * range); + memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), + sizeof(uint8_t) * range); +} + +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { + if (distance > 0) { + extend_array(ra, distance); } - return result; + int32_t srcpos = ra->size - count; + int32_t dstpos = srcpos + distance; + memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count); + memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), + sizeof(container_t *) * count); + memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), + sizeof(uint8_t) * count); + ra->size += distance; } -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { + size_t ctr = 0; + for (int32_t i = 0; i < ra->size; ++i) { + int num_added = container_to_uint32_array( + ans + ctr, ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + ctr += num_added; } - int32_t pos = - -1; // position of the container used in the previous iteration - for (size_t i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); +} + +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, + size_t limit, uint32_t *ans) { + size_t ctr = 0; + size_t dtr = 0; + + size_t t_limit = 0; + + bool first = false; + size_t first_skip = 0; + + uint32_t *t_ans = NULL; + size_t cur_len = 0; + + for (int i = 0; i < ra->size; ++i) { + const container_t *c = + container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + t_limit = (const_CAST_bitset(c))->cardinality; + break; + case ARRAY_CONTAINER_TYPE: + t_limit = (const_CAST_array(c))->cardinality; + break; + case RUN_CONTAINER_TYPE: + t_limit = run_container_cardinality(const_CAST_run(c)); + break; } - if (pos >= 0) { - uint8_t new_typecode; - container_t *new_container; - new_container = container_remove( - r->high_low_container.containers[pos], vals[i] & 0xffff, - r->high_low_container.typecodes[pos], &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); + if (ctr + t_limit - 1 >= offset && ctr < offset + limit) { + if (!first) { + // first_skip = t_limit - (ctr + t_limit - offset); + first_skip = offset - ctr; + first = true; + t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * + (first_skip + limit)); + if (t_ans == NULL) { + return false; + } + memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)); + cur_len = first_skip + limit; } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; + if (dtr + t_limit > cur_len) { + uint32_t *append_ans = (uint32_t *)roaring_malloc( + sizeof(*append_ans) * (cur_len + t_limit)); + if (append_ans == NULL) { + if (t_ans != NULL) roaring_free(t_ans); + return false; + } + memset(append_ans, 0, + sizeof(*append_ans) * (cur_len + t_limit)); + cur_len = cur_len + t_limit; + memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); + roaring_free(t_ans); + t_ans = append_ans; + } + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_bitset(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case ARRAY_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_array(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case RUN_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_run(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; } + dtr += t_limit; } + ctr += t_limit; + if (dtr - first_skip >= limit) break; + } + if (t_ans != NULL) { + memcpy(ans, t_ans + first_skip, limit * sizeof(uint32_t)); + free(t_ans); } + return true; } -// there should be some SIMD optimizations possible here -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +bool ra_has_run_container(const roaring_array_t *ra) { + for (int32_t k = 0; k < ra->size; ++k) { + if (get_container_type(ra->containers[k], ra->typecodes[k]) == + RUN_CONTAINER_TYPE) + return true; + } + return false; +} - int pos1 = 0, pos2 = 0; +uint32_t ra_portable_header_size(const roaring_array_t *ra) { + if (ra_has_run_container(ra)) { + if (ra->size < + NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets + return 4 + (ra->size + 7) / 8 + 4 * ra->size; + } + return 4 + (ra->size + 7) / 8 + + 8 * ra->size; // - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * ra->size; + } +} - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { + size_t count = ra_portable_header_size(ra); - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_and(c1, type1, c2, type2, &result_type); + for (int32_t k = 0; k < ra->size; ++k) { + count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + return count; +} - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); // otherwise: memory leak! +// This function is endian-sensitive. +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { + char *initbuf = buf; + uint32_t startOffset = 0; + bool hasrun = ra_has_run_container(ra); + if (hasrun) { + uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16); + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + uint32_t s = (ra->size + 7) / 8; + uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); + assert(bitmapOfRunContainers != NULL); // todo: handle + for (int32_t i = 0; i < ra->size; ++i) { + if (get_container_type(ra->containers[i], ra->typecodes[i]) == + RUN_CONTAINER_TYPE) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); } - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } - } - return answer; -} + memcpy(buf, bitmapOfRunContainers, s); + buf += s; + roaring_free(bitmapOfRunContainers); + if (ra->size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * ra->size + s; + } else { + startOffset = 4 + 8 * ra->size + s; + } + } else { // backwards compatibility + uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; -/** - * Compute the union of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + memcpy(buf, &ra->size, sizeof(ra->size)); + buf += sizeof(ra->size); + + startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; } - if (number == 1) { - return roaring_bitmap_copy(x[0]); + for (int32_t k = 0; k < ra->size; ++k) { + memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); + buf += sizeof(ra->keys[k]); + // get_cardinality returns a value in [1,1<<16], subtracting one + // we get [0,1<<16 - 1] which fits in 16 bits + uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k], + ra->typecodes[k]) - + 1); + memcpy(buf, &card, sizeof(card)); + buf += sizeof(card); } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); + if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { + // writing the containers offsets + for (int32_t k = 0; k < ra->size; k++) { + memcpy(buf, &startOffset, sizeof(startOffset)); + buf += sizeof(startOffset); + startOffset = + startOffset + + container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } } - roaring_bitmap_repair_after_lazy(answer); - return answer; + for (int32_t k = 0; k < ra->size; ++k) { + buf += container_write(ra->containers[k], ra->typecodes[k], buf); + } + return buf - initbuf; } -/** - * Compute the xor of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); +// Quickly checks whether there is a serialized bitmap at the pointer, +// not exceeding size "maxbytes" in bytes. This function does not allocate +// memory dynamically. +// +// This function returns 0 if and only if no valid bitmap is found. +// Otherwise, it returns how many bytes are occupied. +// +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { + size_t bytestotal = sizeof(int32_t); // for cookie + if (bytestotal > maxbytes) return 0; + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + return 0; } - if (number == 1) { - return roaring_bitmap_copy(x[0]); + int32_t size; + + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + bytestotal += sizeof(int32_t); + if (bytestotal > maxbytes) return 0; + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); + if (size > (1 << 16) || size < 0) { + return 0; } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} - -// inplace and (modifies its first argument). -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); - - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - if (s1 == s2) { - uint8_t type1, type2, result_type; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_and(c1, type1, c2, type2, &result_type) - : container_iand(c1, type1, c2, type2, &result_type); - - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, type1); - } - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - result_type); - intersection_size++; - } else { - container_free(c, result_type); + char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + bytestotal += s; + if (bytestotal > maxbytes) return 0; + bitmapOfRunContainers = (char *)buf; + buf += s; + } + bytestotal += size * 2 * sizeof(uint16_t); + if (bytestotal > maxbytes) return 0; + uint16_t *keyscards = (uint16_t *)buf; + buf += size * 2 * sizeof(uint16_t); + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + bytestotal += size * 4; + if (bytestotal > maxbytes) return 0; + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + if (isbitmap) { + size_t containersize = + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; + } else if (isrun) { + bytestotal += sizeof(uint16_t); + if (bytestotal > maxbytes) return 0; + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + buf += sizeof(uint16_t); + size_t containersize = n_runs * sizeof(rle16_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; + } else { + size_t containersize = thiscard * sizeof(uint16_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; } } - - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; - } - - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); + return bytestotal; } -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); +// This function populates answer from the content of buf (reading up to +// maxbytes bytes). The function returns false if a properly serialized bitmap +// cannot be found. If it returns true, readbytes is populated by how many bytes +// were read, we have that *readbytes <= maxbytes. +// +// This function is endian-sensitive. +bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, + const size_t maxbytes, size_t *readbytes) { + *readbytes = sizeof(int32_t); // for cookie + if (*readbytes > maxbytes) { + // Ran out of bytes while reading first 4 bytes. + return false; } - if (0 == length2) { - return roaring_bitmap_copy(x1); + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + // "I failed to find one of the right cookies. + return false; } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_or(c1, type1, c2, type2, &result_type); - - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - // c1 = container_clone(c1, type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + int32_t size; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // c2 = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + *readbytes += sizeof(int32_t); + if (*readbytes > maxbytes) { + // Ran out of bytes while reading second part of the cookie. + return false; } + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); + if (size < 0) { + // You cannot have a negative number of containers, the data must be + // corrupted. + return false; } - return answer; -} - -// inplace or (modifies its first argument). -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; + if (size > (1 << 16)) { + // You cannot have so many containers, the data must be corrupted. + return false; } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - if (!container_is_full(c1, type1)) { - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, (uint16_t)pos2, &type2); - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_or(c1, type1, c2, type2, &result_type) - : container_ior(c1, type1, c2, type2, &result_type); - - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - - // container_t *c2_clone = container_clone(c2, type2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + const char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + *readbytes += s; + if (*readbytes > maxbytes) { // data is corrupted? + // Ran out of bytes while reading run bitmap. + return false; } + bitmapOfRunContainers = buf; + buf += s; } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); + uint16_t *keyscards = (uint16_t *)buf; + + *readbytes += size * 2 * sizeof(uint16_t); + if (*readbytes > maxbytes) { + // Ran out of bytes while reading key-cardinality array. + return false; } -} + buf += size * 2 * sizeof(uint16_t); -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); + bool is_ok = ra_init_with_capacity(answer, size); + if (!is_ok) { + // Failed to allocate memory for roaring array. Bailing out. + return false; } - if (0 == length2) { - return roaring_bitmap_copy(x1); + + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k, sizeof(tmp)); + answer->keys[k] = tmp; } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_xor(c1, type1, c2, type2, &result_type); + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + *readbytes += size * 4; + if (*readbytes > maxbytes) { // data is corrupted? + // Ran out of bytes while reading offsets. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); + // skipping the offsets + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); + } + if (isbitmap) { + // we check that the read is allowed + size_t containersize = + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { + // Running out of bytes while reading a bitset container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + // it is now safe to read + bitset_container_t *c = bitset_container_create(); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for a bitset container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + answer->size++; + buf += bitset_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = BITSET_CONTAINER_TYPE; + } else if (isrun) { + // we check that the read is allowed + *readbytes += sizeof(uint16_t); + if (*readbytes > maxbytes) { + // Running out of bytes while reading a run container (header). + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + size_t containersize = n_runs * sizeof(rle16_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { // data is corrupted? + // Running out of bytes while reading a run container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + // it is now safe to read - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); + run_container_t *c = run_container_create(); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for a run container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + answer->size++; + buf += run_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = RUN_CONTAINER_TYPE; + } else { + // we check that the read is allowed + size_t containersize = thiscard * sizeof(uint16_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { // data is corrupted? + // Running out of bytes while reading an array container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + // it is now safe to read + array_container_t *c = + array_container_create_given_capacity(thiscard); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for an array container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + answer->size++; + buf += array_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = ARRAY_CONTAINER_TYPE; } } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; + return true; } -// inplace xor (modifies its first argument). - -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. - - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/roaring_array.c */ +/* begin file src/roaring_priority_queue.c */ - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // so release - } else { - c = container_ixor(c1, type1, c2, type2, &result_type); - } +#ifdef __cplusplus +using namespace ::roaring::internal; - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } +extern "C" { +namespace roaring { +namespace api { +#endif - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +struct roaring_pq_element_s { + uint64_t size; + bool is_temporary; + roaring_bitmap_t *bitmap; +}; - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); +typedef struct roaring_pq_element_s roaring_pq_element_t; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } +struct roaring_pq_s { + roaring_pq_element_t *elements; + uint64_t size; +}; - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } +typedef struct roaring_pq_s roaring_pq_t; + +static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { + return t1->size < t2->size; } -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(empty_bitmap, - is_cow(x1) || is_cow(x2)); - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); +static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { + uint64_t i = pq->size; + pq->elements[pq->size++] = *t; + while (i > 0) { + uint64_t p = (i - 1) >> 1; + roaring_pq_element_t ap = pq->elements[p]; + if (!compare(t, &ap)) break; + pq->elements[i] = ap; + i = p; } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + pq->elements[i] = *t; +} - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = - container_andnot(c1, type1, c2, type2, &result_type); +static void pq_free(roaring_pq_t *pq) { roaring_free(pq); } - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); +static void percolate_down(roaring_pq_t *pq, uint32_t i) { + uint32_t size = (uint32_t)pq->size; + uint32_t hsize = size >> 1; + roaring_pq_element_t ai = pq->elements[i]; + while (i < hsize) { + uint32_t l = (i << 1) + 1; + uint32_t r = l + 1; + roaring_pq_element_t bestc = pq->elements[l]; + if (r < size) { + if (compare(pq->elements + r, &bestc)) { + l = r; + bestc = pq->elements[r]; } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - is_cow(x1)); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; } + if (!compare(&bestc, &ai)) { + break; + } + pq->elements[i] = bestc; + i = l; } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); + pq->elements[i] = ai; +} + +static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { + size_t alloc_size = + sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; + roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); + answer->elements = (roaring_pq_element_t *)(answer + 1); + answer->size = length; + for (uint32_t i = 0; i < length; i++) { + answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; + answer->elements[i].is_temporary = false; + answer->elements[i].size = + roaring_bitmap_portable_size_in_bytes(arr[i]); + } + for (int32_t i = (length >> 1); i >= 0; i--) { + percolate_down(answer, i); } return answer; } -// inplace andnot (modifies its first argument). - -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); +static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { + roaring_pq_element_t ans = *pq->elements; + if (pq->size > 1) { + pq->elements[0] = pq->elements[--pq->size]; + percolate_down(pq, 0); + } else + --pq->size; + // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; + return ans; +} +// this function consumes and frees the inputs +static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, + roaring_bitmap_t *x2) { uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; - - if (0 == length2) return; - + const int length1 = ra_get_size(&x1->high_low_container), + length2 = ra_get_size(&x2->high_low_container); if (0 == length1) { - roaring_bitmap_clear(x1); - return; + roaring_bitmap_free(x1); + return x2; } - + if (0 == length2) { + roaring_bitmap_free(x2); + return x1; + } + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); int pos1 = 0, pos2 = 0; uint8_t type1, type2; uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); while (true) { if (s1 == s2) { + // todo: unsharing can be inefficient as it may create a clone where + // none + // is needed, but it has the benefit of being easy to reason about. + + ra_unshare_container_at_index(&x1->high_low_container, + (uint16_t)pos1); container_t *c1 = ra_get_container_at_index(&x1->high_low_container, (uint16_t)pos1, &type1); + assert(type1 != SHARED_CONTAINER_TYPE); + + ra_unshare_container_at_index(&x2->high_low_container, + (uint16_t)pos2); container_t *c2 = ra_get_container_at_index(&x2->high_low_container, (uint16_t)pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. + assert(type2 != SHARED_CONTAINER_TYPE); container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_andnot(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } else { - c = container_iandnot(c1, type1, c2, type2, &result_type); - } - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, result_type); + if ((type2 == BITSET_CONTAINER_TYPE) && + (type1 != BITSET_CONTAINER_TYPE)) { + c = container_lazy_ior(c2, type2, c1, type1, &result_type); + container_free(c1, type1); + if (c != c2) { + container_free(c2, type2); + } } else { - container_free(c, result_type); + c = container_lazy_ior(c1, type1, c2, type2, &result_type); + container_free(c2, type2); + if (c != c1) { + container_free(c1, type1); + } } - + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); ++pos1; ++pos2; if (pos1 == length1) break; @@ -20609,231 +20921,183 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, (uint16_t)pos1, &type1); - - ra_replace_key_and_container_at_index( - &x1->high_low_container, intersection_size, s1, c1, type1); - } - intersection_size++; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + ra_append(&answer->high_low_container, s1, c1, type1); pos1++; if (pos1 == length1) break; s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; if (pos2 == length2) break; s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); + if (pos1 == length1) { + ra_append_move_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2); + } else if (pos2 == length2) { + ra_append_move_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1); } - ra_downsize(&x1->high_low_container, intersection_size); -} - -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; - - uint64_t card = 0; - for (int i = 0; i < ra->size; ++i) - card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); - return card; -} - -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, - uint64_t range_start, - uint64_t range_end) { - const roaring_array_t *ra = &r->high_low_container; + ra_clear_without_containers(&x1->high_low_container); + ra_clear_without_containers(&x2->high_low_container); + roaring_free(x1); + roaring_free(x2); + return answer; +} - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); +/** + * Compute the union of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_or_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); } - if (range_start >= range_end) { - return 0; + if (number == 1) { + return roaring_bitmap_copy(x[0]); } - range_end--; // make range_end inclusive - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - uint16_t minhb = (uint16_t)(range_start >> 16); - uint16_t maxhb = (uint16_t)(range_end >> 16); + roaring_pq_t *pq = create_pq(x, number); + while (pq->size > 1) { + roaring_pq_element_t x1 = pq_poll(pq); + roaring_pq_element_t x2 = pq_poll(pq); - uint64_t card = 0; + if (x1.is_temporary && x2.is_temporary) { + roaring_bitmap_t *newb = + lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); + // should normally return a fresh new bitmap *except* that + // it can return x1.bitmap or x2.bitmap in degenerate cases + bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = temporary, .bitmap = newb}; + pq_add(pq, &newelement); + } else if (x2.is_temporary) { + roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); + x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); + pq_add(pq, &x2); + } else if (x1.is_temporary) { + roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); + x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); - int i = ra_get_index(ra, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); + pq_add(pq, &x1); } else { - card += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->containers[i], ra->typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } + roaring_bitmap_t *newb = + roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = true, .bitmap = newb}; - for (; i < ra->size; i++) { - uint16_t key = ra->keys[i]; - if (key < maxhb) { - card += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); - break; - } else { - break; + pq_add(pq, &newelement); } } - - return card; + roaring_pq_element_t X = pq_poll(pq); + roaring_bitmap_t *answer = X.bitmap; + roaring_bitmap_repair_after_lazy(answer); + pq_free(pq); + return answer; } -bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { - return r->high_low_container.size == 0; +#ifdef __cplusplus } - -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { - ra_to_uint32_array(&r->high_low_container, ans); } +} // extern "C" { namespace roaring { namespace api { +#endif +/* end file src/roaring_priority_queue.c */ +/* begin file src/roaring.c */ +#include +#include +#include +#include +#include +#include +#include -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, - size_t limit, uint32_t *ans) { - return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); -} -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - */ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - ra_unshare_container_at_index( - &r->high_low_container, - (uint16_t)i); // TODO: this introduces extra cloning! - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - container_t *c1 = convert_run_optimize(c, type_original, &type_after); - if (type_after == RUN_CONTAINER_TYPE) { - answer = true; - } - ra_set_container_at_index(&r->high_low_container, i, c1, type_after); - } - return answer; -} +// Include after roaring.h -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original; - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - answer += container_shrink_to_fit(c, type_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; -} +#ifdef __cplusplus +using namespace ::roaring::internal; -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { - answer = true; - if (type_original == SHARED_CONTAINER_TYPE) { - run_container_t *truec = CAST_run(CAST_shared(c)->container); - int32_t card = run_container_cardinality(truec); - container_t *c1 = convert_to_bitset_or_array_container( - truec, card, &type_after); - shared_container_free(CAST_shared(c)); // frees run as needed - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); +extern "C" { +namespace roaring { +namespace api { +#endif - } else { - int32_t card = run_container_cardinality(CAST_run(c)); - container_t *c1 = convert_to_bitset_or_array_container( - CAST_run(c), card, &type_after); - run_container_free(CAST_run(c)); - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); - } - } - } - return answer; -} +#define CROARING_SERIALIZATION_ARRAY_UINT32 1 +#define CROARING_SERIALIZATION_CONTAINER 2 +extern inline int roaring_trailing_zeroes(unsigned long long input_num); +extern inline int roaring_leading_zeroes(unsigned long long input_num); +extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r); +extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r); +extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, + bool cow); +extern inline roaring_bitmap_t *roaring_bitmap_create(void); +extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, + uint64_t max); +extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max); -size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t cardinality = roaring_bitmap_get_cardinality(r); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = CROARING_SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(r, buf + 1) + 1; - } else { - buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; - } +static inline bool is_cow(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_COW; } - -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t sizeasarray = - roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 - : (size_t)sizeasarray + 1; +static inline bool is_frozen(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_FROZEN; } -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { - return ra_portable_size_in_bytes(&r->high_low_container); +// this is like roaring_bitmap_add, but it populates pointer arguments in such a +// way +// that we can recover the container touched, which, in turn can be used to +// accelerate some functions (when you repeatedly need to add to the same +// container) +static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, + uint32_t val, + uint8_t *type, + int *index) { + roaring_array_t *ra = &r->high_low_container; + + uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + if (i >= 0) { + ra_unshare_container_at_index(ra, (uint16_t)i); + container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type); + uint8_t new_type = *type; + container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); + *index = i; + if (c2 != c) { + container_free(c, *type); + ra_set_container_at_index(ra, i, c2, new_type); + *type = new_type; + return c2; + } else { + return c; + } + } else { + array_container_t *new_ac = array_container_create(); + container_t *c = + container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); + *index = -i - 1; + return c; + } } -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, - size_t maxbytes) { +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { roaring_bitmap_t *ans = (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, - maxbytes, &bytesread); - if (!is_ok) { - roaring_free(ans); + if (!ans) { return NULL; } - roaring_bitmap_set_copy_on_write(ans, false); + bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); if (!is_ok) { roaring_free(ans); return NULL; @@ -20841,704 +21105,905 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, return ans; } -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); -} - -size_t roaring_bitmap_portable_deserialize_size(const char *buf, - size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); -} - -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) { - return ra_portable_serialize(&r->high_low_container, buf); +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { + return ra_init_with_capacity(&r->high_low_container, cap); } -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - roaring_bitmap_t *bitmap = roaring_bitmap_create(); - if (bitmap == NULL) { - return NULL; - } - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (uint32_t i = 0; i < card; i++) { - // elems may not be aligned, read with memcpy - uint32_t elem; - memcpy(&elem, elems + i, sizeof(elem)); - roaring_bitmap_add_bulk(bitmap, &context, elem); +static inline void add_bulk_impl(roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + uint8_t typecode; + int idx; + context->container = + containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + context->typecode = typecode; + context->idx = idx; + context->key = key; + } else { + // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion directly, bypassing the roaring_bitmap_add call + uint8_t new_typecode; + container_t *container2 = container_add( + context->container, val & 0xFFFF, context->typecode, &new_typecode); + if (container2 != context->container) { + // rare instance when we need to change the container type + container_free(context->container, context->typecode); + ra_set_container_at_index(&r->high_low_container, context->idx, + container2, new_typecode); + context->typecode = new_typecode; + context->container = container2; } - return bitmap; - - } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); + } } -roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, - size_t maxbytes) { - if (maxbytes < 1) { - return NULL; - } +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + uint32_t val; + const uint32_t *start = vals; + const uint32_t *end = vals + n_args; + const uint32_t *current_val = start; - const char *bufaschar = (const char *)buf; - if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { - if (maxbytes < 1 + sizeof(uint32_t)) { - return NULL; - } + if (n_args == 0) { + return; + } - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + uint8_t typecode; + int idx; + container_t *container; + val = *current_val; + container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), + typecode}; - // Check the buffer is big enough to contain card uint32_t elements - if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) { - return NULL; - } + for (; current_val != end; current_val++) { + memcpy(&val, current_val, sizeof(val)); + add_bulk_impl(r, &context, val); + } +} - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val) { + add_bulk_impl(r, context, val); +} - roaring_bitmap_t *bitmap = roaring_bitmap_create(); - if (bitmap == NULL) { - return NULL; +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + int32_t start_idx = -1; + if (context->container != NULL && context->key < key) { + start_idx = context->idx; } - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (uint32_t i = 0; i < card; i++) { - // elems may not be aligned, read with memcpy - uint32_t elem; - memcpy(&elem, elems + i, sizeof(elem)); - roaring_bitmap_add_bulk(bitmap, &context, elem); + int idx = ra_advance_until(&r->high_low_container, key, start_idx); + if (idx == ra_get_size(&r->high_low_container)) { + return false; } - return bitmap; - - } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, - maxbytes - 1); - } else - return (NULL); -} - -bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, - void *ptr) { - const roaring_array_t *ra = &r->high_low_container; - - for (int i = 0; i < ra->size; ++i) - if (!container_iterate(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) { + uint8_t typecode; + context->container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)idx, &typecode); + context->typecode = typecode; + context->idx = idx; + context->key = + ra_get_key_at_index(&r->high_low_container, (uint16_t)idx); + // ra_advance_until finds the next key >= the target, we found a later + // container. + if (context->key != key) { return false; } - return true; + } + // context is now set up + return container_contains(context->container, val & 0xFFFF, + context->typecode); } -bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - const roaring_array_t *ra = &r->high_low_container; +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bitmap_add_many(answer, n_args, vals); + return answer; +} - for (int i = 0; i < ra->size; ++i) - if (!container_iterate64(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; - } - return true; +roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { + // todo: could be greatly optimized but we do not expect this call to ever + // include long lists + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + va_list ap; + va_start(ap, n_args); + for (size_t i = 0; i < n_args; i++) { + uint32_t val = va_arg(ap, uint32_t); + roaring_bitmap_add_bulk(answer, &context, val); + } + va_end(ap); + return answer; } -/**** - * begin roaring_uint32_iterator_t - *****/ +static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} -/** - * Partially initializes the iterator. Leaves it in either state: - * 1. Invalid due to `has_value = false`, or - * 2. At a container, with the high bits set, `has_value = true`. - */ -CROARING_WARN_UNUSED static bool iter_new_container_partial_init( - roaring_uint32_iterator_t *newit) { - newit->current_value = 0; - if (newit->container_index >= newit->parent->high_low_container.size || - newit->container_index < 0) { - newit->current_value = UINT32_MAX; - return (newit->has_value = false); +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step) { + if (max >= UINT64_C(0x100000000)) { + max = UINT64_C(0x100000000); } - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - return true; + if (step == 0) return NULL; + if (max <= min) return NULL; + roaring_bitmap_t *answer = roaring_bitmap_create(); + if (step >= (1 << 16)) { + for (uint32_t value = (uint32_t)min; value < max; value += step) { + roaring_bitmap_add(answer, value); + } + return answer; + } + uint64_t min_tmp = min; + do { + uint32_t key = (uint32_t)min_tmp >> 16; + uint32_t container_min = min_tmp & 0xFFFF; + uint32_t container_max = + (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); + uint8_t type; + container_t *container = container_from_range( + &type, container_min, container_max, (uint16_t)step); + ra_append(&answer->high_low_container, (uint16_t)key, container, type); + uint32_t gap = container_max - container_min + step - 1; + min_tmp += gap - (gap % step); + } while (min_tmp < max); + // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step + return answer; } -/** - * Positions the iterator at the first value of the current container that the - * iterator points at, if available. - */ -CROARING_WARN_UNUSED static bool loadfirstvalue( - roaring_uint32_iterator_t *newit) { - if (iter_new_container_partial_init(newit)) { - uint16_t value = 0; - newit->container_it = - container_init_iterator(newit->container, newit->typecode, &value); - newit->current_value = newit->highbits | value; +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max) { + if (min > max) { + return; } - return newit->has_value; -} -/** - * Positions the iterator at the last value of the current container that the - * iterator points at, if available. - */ -CROARING_WARN_UNUSED static bool loadlastvalue( - roaring_uint32_iterator_t *newit) { - if (iter_new_container_partial_init(newit)) { - uint16_t value = 0; - newit->container_it = container_init_iterator_last( - newit->container, newit->typecode, &value); - newit->current_value = newit->highbits | value; + roaring_array_t *ra = &r->high_low_container; + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t num_required_containers = max_key - min_key + 1; + int32_t suffix_length = + count_greater(ra->keys, ra->size, (uint16_t)max_key); + int32_t prefix_length = + count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key); + int32_t common_length = ra->size - prefix_length - suffix_length; + + if (num_required_containers > common_length) { + ra_shift_tail(ra, suffix_length, + num_required_containers - common_length); + } + + int32_t src = prefix_length + common_length - 1; + int32_t dst = ra->size - suffix_length - 1; + for (uint32_t key = max_key; key != min_key - 1; + key--) { // beware of min_key==0 + uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; + container_t *new_container; + uint8_t new_type; + + if (src >= 0 && ra->keys[src] == key) { + ra_unshare_container_at_index(ra, (uint16_t)src); + new_container = + container_add_range(ra->containers[src], ra->typecodes[src], + container_min, container_max, &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], ra->typecodes[src]); + } + src--; + } else { + new_container = container_from_range(&new_type, container_min, + container_max + 1, 1); + } + ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key, + new_container, new_type); + dst--; } - return newit->has_value; } -/** - * Positions the iterator at the smallest value that is larger than or equal to - * `val` within the current container that the iterator points at. Assumes such - * a value exists within the current container. - */ -CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal( - roaring_uint32_iterator_t *newit, uint32_t val) { - bool partial_init = iter_new_container_partial_init(newit); - assert(partial_init); - if (!partial_init) { - return false; +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max) { + if (min > max) { + return; } - uint16_t value = 0; - newit->container_it = - container_init_iterator(newit->container, newit->typecode, &value); - bool found = container_iterator_lower_bound( - newit->container, newit->typecode, &newit->container_it, &value, - val & 0xFFFF); - assert(found); - if (!found) { - return false; + + roaring_array_t *ra = &r->high_low_container; + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key); + int32_t dst = src; + while (src < ra->size && ra->keys[src] <= max_key) { + uint32_t container_min = + (min_key == ra->keys[src]) ? (min & 0xffff) : 0; + uint32_t container_max = + (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; + ra_unshare_container_at_index(ra, (uint16_t)src); + container_t *new_container; + uint8_t new_type; + new_container = + container_remove_range(ra->containers[src], ra->typecodes[src], + container_min, container_max, &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], ra->typecodes[src]); + } + if (new_container) { + ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], + new_container, new_type); + dst++; + } + src++; + } + if (src > dst) { + ra_shift_tail(ra, ra->size - src, dst - src); } - newit->current_value = newit->highbits | value; - return true; } -void roaring_iterator_init(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); -} +void roaring_bitmap_printf(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; -void roaring_iterator_init_last(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = newit->parent->high_low_container.size - 1; - newit->has_value = loadlastvalue(newit); -} + printf("{"); + for (int i = 0; i < ra->size; ++i) { + container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); -roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)roaring_malloc( - sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_iterator_init(r, newit); - return newit; + if (i + 1 < ra->size) { + printf(","); + } + } + printf("}"); } -roaring_uint32_iterator_t *roaring_uint32_iterator_copy( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)roaring_malloc( - sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; -} +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; -bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, - uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(&it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = - container_maximum(it->parent->high_low_container.containers[i], - it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if (lowvalue < lb) { - // will have to load first value of next container - it->container_index = i + 1; - } else { - // the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; + printf("{"); + for (int i = 0; i < ra->size; ++i) { + printf("%d: %s (%d)", ra->keys[i], + get_full_container_name(ra->containers[i], ra->typecodes[i]), + container_get_cardinality(ra->containers[i], ra->typecodes[i])); + if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { + printf("(shared count = %" PRIu32 " )", + croaring_refcount_get( + &(CAST_shared(ra->containers[i])->counter))); + } + + if (i + 1 < ra->size) { + printf(", "); } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i - 1; } - it->has_value = loadfirstvalue(it); - return it->has_value; + printf("}"); } -bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - if (it->container_index < 0) { - it->container_index = 0; - return (it->has_value = loadfirstvalue(it)); - } - uint16_t low16 = (uint16_t)it->current_value; - if (container_iterator_next(it->container, it->typecode, &it->container_it, - &low16)) { - it->current_value = it->highbits | low16; - return (it->has_value = true); +/** + * (For advanced users.) + * Collect statistics about the bitmap + */ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, + roaring_statistics_t *stat) { + const roaring_array_t *ra = &r->high_low_container; + + memset(stat, 0, sizeof(*stat)); + stat->n_containers = ra->size; + stat->min_value = roaring_bitmap_minimum(r); + stat->max_value = roaring_bitmap_maximum(r); + + for (int i = 0; i < ra->size; ++i) { + uint8_t truetype = + get_container_type(ra->containers[i], ra->typecodes[i]); + uint32_t card = + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + uint32_t sbytes = + container_size_in_bytes(ra->containers[i], ra->typecodes[i]); + stat->cardinality += card; + switch (truetype) { + case BITSET_CONTAINER_TYPE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + roaring_unreachable; + } } - it->container_index++; - return (it->has_value = loadfirstvalue(it)); } -bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) { - if (it->container_index < 0) { - return (it->has_value = false); +/* + * Checks that: + * - Array containers are sorted and contain no duplicates + * - Range containers are sorted and contain no overlapping ranges + * - Roaring containers are sorted by key and there are no duplicate keys + * - The correct container type is use for each container (e.g. bitmaps aren't + * used for small containers) + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, + const char **reason) { + const char *reason_local; + if (reason == NULL) { + // Always allow assigning through *reason + reason = &reason_local; } - if (it->container_index >= it->parent->high_low_container.size) { - it->container_index = it->parent->high_low_container.size - 1; - return (it->has_value = loadlastvalue(it)); + *reason = NULL; + const roaring_array_t *ra = &r->high_low_container; + if (ra->size < 0) { + *reason = "negative size"; + return false; } - uint16_t low16 = (uint16_t)it->current_value; - if (container_iterator_prev(it->container, it->typecode, &it->container_it, - &low16)) { - it->current_value = it->highbits | low16; - return (it->has_value = true); + if (ra->allocation_size < 0) { + *reason = "negative allocation size"; + return false; } - it->container_index--; - return (it->has_value = loadlastvalue(it)); -} - -uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, - uint32_t *buf, uint32_t count) { - uint32_t ret = 0; - while (it->has_value && ret < count) { - uint32_t consumed; - uint16_t low16 = (uint16_t)it->current_value; - bool has_value = container_iterator_read_into_uint32( - it->container, it->typecode, &it->container_it, it->highbits, buf, - count - ret, &consumed, &low16); - ret += consumed; - buf += consumed; - if (has_value) { - it->has_value = true; - it->current_value = it->highbits | low16; - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); + if (ra->size > ra->allocation_size) { + *reason = "more containers than allocated space"; + return false; + } + if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { + *reason = "invalid flags"; + return false; + } + if (ra->size == 0) { + return true; } - return ret; -} - -void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) { - roaring_free(it); -} - -/**** - * end of roaring_uint32_iterator_t - *****/ - -bool roaring_bitmap_equals(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; - if (ra1->size != ra2->size) { + if (ra->keys == NULL) { + *reason = "keys is NULL"; return false; } - for (int i = 0; i < ra1->size; ++i) { - if (ra1->keys[i] != ra2->keys[i]) { + if (ra->typecodes == NULL) { + *reason = "typecodes is NULL"; + return false; + } + if (ra->containers == NULL) { + *reason = "containers is NULL"; + return false; + } + + uint32_t prev_key = ra->keys[0]; + for (int32_t i = 1; i < ra->size; ++i) { + if (ra->keys[i] <= prev_key) { + *reason = "keys not strictly increasing"; return false; } + prev_key = ra->keys[i]; } - for (int i = 0; i < ra1->size; ++i) { - bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i], - ra2->containers[i], ra2->typecodes[i]); - if (!areequal) { + + for (int32_t i = 0; i < ra->size; ++i) { + if (!container_internal_validate(ra->containers[i], ra->typecodes[i], + reason)) { + // reason should already be set + if (*reason == NULL) { + *reason = "container failed to validate but no reason given"; + } return false; } } + return true; } -bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; - - const int length1 = ra1->size, length2 = ra2->size; - - int pos1 = 0, pos2 = 0; +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + if (!ra_init_with_capacity( // allocation of list of containers can fail + &ans->high_low_container, r->high_low_container.size)) { + roaring_free(ans); + return NULL; + } + if (!ra_overwrite( // memory allocation of individual containers may fail + &r->high_low_container, &ans->high_low_container, is_cow(r))) { + roaring_bitmap_free(ans); // overwrite should leave in freeable state + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, is_cow(r)); + return ans; +} - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1); - const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2); +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src) { + roaring_bitmap_set_copy_on_write(dest, is_cow(src)); + return ra_overwrite(&src->high_low_container, &dest->high_low_container, + is_cow(src)); +} - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = - ra_get_container_at_index(ra1, (uint16_t)pos1, &type1); - container_t *c2 = - ra_get_container_at_index(ra2, (uint16_t)pos2, &type2); - if (!container_is_subset(c1, type1, c2, type2)) return false; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - return false; - } else { // s1 > s2 - pos2 = ra_advance_until(ra2, s1, pos2); - } +void roaring_bitmap_free(const roaring_bitmap_t *r) { + if (r == NULL) { + return; } - if (pos1 == length1) - return true; - else - return false; + if (!is_frozen(r)) { + ra_clear((roaring_array_t *)&r->high_low_container); + } + roaring_free((roaring_bitmap_t *)r); } -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); +void roaring_bitmap_clear(roaring_bitmap_t *r) { + ra_reset(&r->high_low_container); +} - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { + roaring_array_t *ra = &r->high_low_container; + + const uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(ra, (uint16_t)i); + container_t *container = + ra_get_container_at_index(ra, (uint16_t)i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); } } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); + array_container_t *newac = array_container_create(); + container_t *container = + container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); } } -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + result = true; } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } + const int newCardinality = + container_get_cardinality(container, newtypecode); + result = oldCardinality != newCardinality; + } } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); + array_container_t *newac = array_container_create(); + container_t *container = + container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + result = true; } + + return result; } -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + if (container_nonzero_cardinality(container2, newtypecode)) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); } } -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + + const int newCardinality = + container_get_cardinality(container2, newtypecode); + + if (newCardinality != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); + ra_remove_at_index_and_free(&r->high_low_container, i); } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); + result = oldCardinality != newCardinality; } + return result; } -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return roaring_bitmap_copy(x1); +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + if (n_args == 0 || r->high_low_container.size == 0) { + return; } - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); + int32_t pos = + -1; // position of the container used in the previous iteration + for (size_t i = 0; i < n_args; i++) { + uint16_t key = (uint16_t)(vals[i] >> 16); + if (pos < 0 || key != r->high_low_container.keys[pos]) { + pos = ra_get_index(&r->high_low_container, key); + } + if (pos >= 0) { + uint8_t new_typecode; + container_t *new_container; + new_container = container_remove( + r->high_low_container.containers[pos], vals[i] & 0xffff, + r->high_low_container.typecodes[pos], &new_typecode); + if (new_container != r->high_low_container.containers[pos]) { + container_free(r->high_low_container.containers[pos], + r->high_low_container.typecodes[pos]); + ra_replace_key_and_container_at_index(&r->high_low_container, + pos, key, new_container, + new_typecode); + } + if (!container_nonzero_cardinality(new_container, new_typecode)) { + container_free(new_container, new_typecode); + ra_remove_at_index(&r->high_low_container, pos); + pos = -1; + } + } } +} - roaring_bitmap_t *ans = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); +// there should be some SIMD optimizations possible here +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; + int pos1 = 0, pos2 = 0; - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, is_cow(x1)); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = container_and(c1, type1, c2, type2, &result_type); - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, - (uint16_t)hb); + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); // otherwise: memory leak! + } + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } + } + return answer; +} - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } +/** + * Compute the union of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, is_cow(x1)); - return ans; + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = + roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +/** + * Compute the xor of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_xor_inplace(answer, x[i]); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; } -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range - } - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } +// inplace and (modifies its first argument). +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + if (x1 == x2) return; + int pos1 = 0, pos2 = 0, intersection_size = 0; + const int length1 = ra_get_size(&x1->high_low_container); + const int length2 = ra_get_size(&x2->high_low_container); - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); + // any skipped-over or newly emptied containers in x1 + // have to be freed. + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } + if (s1 == s2) { + uint8_t type1, type2, result_type; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); - if (lb_end != 0xFFFF) --hb_end; + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_and(c1, type1, c2, type2, &result_type) + : container_iand(c1, type1, c2, type2, &result_type); - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; + if (c != c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, type1); + } + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c, + result_type); + intersection_size++; + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + } else if (s1 < s2) { + pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } } -} -static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, - uint8_t t) { - int size = ra_get_size(ra); - if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) { - // No merge. - ra_append(ra, (uint16_t)k, c, t); - return; + // if we ended early because x2 ran out, then all remaining in x1 should be + // freed + while (pos1 < length1) { + container_free(x1->high_low_container.containers[pos1], + x1->high_low_container.typecodes[pos1]); + ++pos1; } - uint8_t last_t, new_t; - container_t *last_c, *new_c; - - // NOTE: we don't need to unwrap here, since we added last_c ourselves - // we have the certainty it's not a shared container. - // The same applies to c, as it's the result of calling container_offset. - last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t); - new_c = container_ior(last_c, last_t, c, t, &new_t); - - ra_set_container_at_index(ra, size - 1, new_c, new_t); - - // Comparison of pointers of different origin is UB (or so claim some - // compiler makers), so we compare their bit representation only. - if ((uintptr_t)last_c != (uintptr_t)new_c) { - container_free(last_c, last_t); - } - container_free(c, t); + // all containers after this have either been copied or freed + ra_downsize(&x1->high_low_container, intersection_size); } -// roaring_bitmap_add_offset adds the value 'offset' to each and every value in -// a bitmap, generating a new bitmap in the process. If offset + element is -// outside of the range [0,2^32), that the element will be dropped. -// We need "offset" to be 64 bits because we want to support values -// between -0xFFFFFFFF up to +0xFFFFFFFF. -roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, - int64_t offset) { - roaring_bitmap_t *answer; - roaring_array_t *ans_ra; - int64_t container_offset; - uint16_t in_offset; - - const roaring_array_t *bm_ra = &bm->high_low_container; - int length = bm_ra->size; - - if (offset == 0) { - return roaring_bitmap_copy(bm); +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = container_or(c1, type1, c2, type2, &result_type); - container_offset = offset >> 16; - in_offset = (uint16_t)(offset - container_offset * (1 << 16)); + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - answer = roaring_bitmap_create(); - bool cow = is_cow(bm); - roaring_bitmap_set_copy_on_write(answer, cow); + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + // c1 = container_clone(c1, type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - ans_ra = &answer->high_low_container; + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // c2 = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} - if (in_offset == 0) { - ans_ra = &answer->high_low_container; +// inplace or (modifies its first argument). +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; - for (int i = 0, j = 0; i < length; ++i) { - int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i); - key += container_offset; + if (0 == length2) return; - if (key < 0 || key >= (1 << 16)) { - continue; - } - ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow); - ans_ra->keys[j++] = (uint16_t)key; - } - return answer; + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + if (!container_is_full(c1, type1)) { + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, (uint16_t)pos2, &type2); + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_or(c1, type1, c2, type2, &result_type) + : container_ior(c1, type1, c2, type2, &result_type); - uint8_t t; - const container_t *c; - container_t *lo, *hi, **lo_ptr, **hi_ptr; - int64_t k; + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - for (int i = 0; i < length; ++i) { - lo = hi = NULL; - lo_ptr = hi_ptr = NULL; + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset; - if (k >= 0 && k < (1 << 16)) { - lo_ptr = &lo; - } - if (k + 1 >= 0 && k + 1 < (1 << 16)) { - hi_ptr = &hi; - } - if (lo_ptr == NULL && hi_ptr == NULL) { - continue; - } - c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t); - c = container_unwrap_shared(c, &t); + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } - container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); - if (lo != NULL) { - offset_append_with_merge(ans_ra, (int)k, lo, t); - } - if (hi != NULL) { - ra_append(ans_ra, (uint16_t)(k + 1), hi, t); + // container_t *c2_clone = container_clone(c2, type2); + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - // the `lo` and `hi` container type always keep same as container `c`. - // in the case of `container_add_offset` on bitset container, `lo` and - // `hi` may has small cardinality, they must be repaired to array - // container. } - - roaring_bitmap_repair_after_lazy(answer); // do required type conversions. - return answer; + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } } -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { uint8_t result_type = 0; const int length1 = x1->high_low_container.size, length2 = x2->high_low_container.size; @@ -21561,26 +22026,13 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, (uint16_t)pos1, &type1); container_t *c2 = ra_get_container_at_index(&x2->high_low_container, (uint16_t)pos2, &type2); - container_t *c; - if (bitsetconversion && - (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && - (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) { - container_t *newc1 = - container_mutable_unwrap_shared(c1, &type1); - newc1 = container_to_bitset(newc1, type1); - type1 = BITSET_CONTAINER_TYPE; - c = container_lazy_ior(newc1, type1, c2, type2, &result_type); - if (c != newc1) { // should not happen - container_free(newc1, type1); - } + container_t *c = container_xor(c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); } else { - c = container_lazy_or(c1, type1, c2, type2, &result_type); + container_free(c, result_type); } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); ++pos1; ++pos2; if (pos1 == length1) break; @@ -21627,9 +22079,11 @@ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, return answer; } -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { +// inplace xor (modifies its first argument). + +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); uint8_t result_type = 0; int length1 = x1->high_low_container.size; const int length2 = x2->high_low_container.size; @@ -21640,6 +22094,10 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, roaring_bitmap_overwrite(x1, x2); return; } + + // XOR can have new containers inserted from x2, but can also + // lose containers when x1 and x2 are nonempty and identical. + int pos1 = 0, pos2 = 0; uint8_t type1, type2; uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); @@ -21648,33 +22106,184 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, if (s1 == s2) { container_t *c1 = ra_get_container_at_index(&x1->high_low_container, (uint16_t)pos1, &type1); - if (!container_is_full(c1, type1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) { - c1 = get_writable_copy_if_shared(c1, &type1); - } else { - // convert to bitset - container_t *old_c1 = c1; - uint8_t old_type1 = type1; - c1 = container_mutable_unwrap_shared(c1, &type1); - c1 = container_to_bitset(c1, type1); - container_free(old_c1, old_type1); - type1 = BITSET_CONTAINER_TYPE; - } + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. + + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // so release + } else { + c = container_ixor(c1, type1, c2, type2, &result_type); + } + + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; + } else { + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(empty_bitmap, + is_cow(x1) || is_cow(x2)); + return empty_bitmap; + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = 0; + uint16_t s2 = 0; + while (true) { + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = + container_andnot(c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + } else if (s1 < s2) { // s1 < s2 + const int next_pos1 = + ra_advance_until(&x1->high_low_container, s2, pos1); + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, next_pos1, + is_cow(x1)); + // TODO : perhaps some of the copy_on_write should be based on + // answer rather than x1 (more stringent?). Many similar cases + pos1 = next_pos1; + if (pos1 == length1) break; + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; + } + } + if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +// inplace andnot (modifies its first argument). + +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + int intersection_size = 0; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_clear(x1); + return; + } + + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, (uint16_t)pos2, &type2); - container_t *c = - container_lazy_ior(c1, type1, c2, type2, &result_type); + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_andnot(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release + } else { + c = container_iandnot(c1, type1, c2, type2, &result_type); + } - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size++, s1, + c, result_type); + } else { + container_free(c, result_type); } + ++pos1; ++pos2; if (pos1 == length1) break; @@ -21683,4301 +22292,4833 @@ void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } else if (s1 < s2) { // s1 < s2 + if (pos1 != intersection_size) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, (uint16_t)pos1, &type1); + + ra_replace_key_and_container_at_index( + &x1->high_low_container, intersection_size, s1, c1, type1); + } + intersection_size++; pos1++; if (pos1 == length1) break; s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); if (pos2 == length2) break; s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); + + if (pos1 < length1) { + // all containers between intersection_size and + // pos1 are junk. However, they have either been moved + // (thus still referenced) or involved in an iandnot + // that will clean up all containers that could not be reused. + // Thus we should not free the junk containers between + // intersection_size and pos1. + if (pos1 > intersection_size) { + // left slide of remaining items + ra_copy_range(&x1->high_low_container, pos1, length1, + intersection_size); + } + // else current placement is fine + intersection_size += (length1 - pos1); + } + ra_downsize(&x1->high_low_container, intersection_size); +} + +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; + + uint64_t card = 0; + for (int i = 0; i < ra->size; ++i) + card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); + return card; +} + +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return 0; + } + return roaring_bitmap_range_cardinality_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { + const roaring_array_t *ra = &r->high_low_container; + + if (range_start > range_end) { + return 0; + } + + // now we have: 0 <= range_start <= range_end <= UINT32_MAX + + uint16_t minhb = (uint16_t)(range_start >> 16); + uint16_t maxhb = (uint16_t)(range_end >> 16); + + uint64_t card = 0; + + int i = ra_get_index(ra, minhb); + if (i >= 0) { + if (minhb == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); + } else { + card += + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + } + if ((range_start & 0xffff) != 0) { + card -= container_rank(ra->containers[i], ra->typecodes[i], + (range_start & 0xffff) - 1); + } + i++; + } else { + i = -i - 1; + } + + for (; i < ra->size; i++) { + uint16_t key = ra->keys[i]; + if (key < maxhb) { + card += + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + } else if (key == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); + break; + } else { + break; + } + } + + return card; +} + +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { + return r->high_low_container.size == 0; +} + +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { + ra_to_uint32_array(&r->high_low_container, ans); +} + +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, + size_t limit, uint32_t *ans) { + return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); +} + +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. + */ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + ra_unshare_container_at_index( + &r->high_low_container, + (uint16_t)i); // TODO: this introduces extra cloning! + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + container_t *c1 = convert_run_optimize(c, type_original, &type_after); + if (type_after == RUN_CONTAINER_TYPE) { + answer = true; + } + ra_set_container_at_index(&r->high_low_container, i, c1, type_after); + } + return answer; +} + +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { + size_t answer = 0; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original; + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + answer += container_shrink_to_fit(c, type_original); + } + answer += ra_shrink_to_fit(&r->high_low_container); + return answer; +} + +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { + answer = true; + if (type_original == SHARED_CONTAINER_TYPE) { + run_container_t *truec = CAST_run(CAST_shared(c)->container); + int32_t card = run_container_cardinality(truec); + container_t *c1 = convert_to_bitset_or_array_container( + truec, card, &type_after); + shared_container_free(CAST_shared(c)); // frees run as needed + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); + + } else { + int32_t card = run_container_cardinality(CAST_run(c)); + container_t *c1 = convert_to_bitset_or_array_container( + CAST_run(c), card, &type_after); + run_container_free(CAST_run(c)); + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); + } + } + } + return answer; +} + +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t cardinality = roaring_bitmap_get_cardinality(r); + uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + if (portablesize < sizeasarray) { + buf[0] = CROARING_SERIALIZATION_CONTAINER; + return roaring_bitmap_portable_serialize(r, buf + 1) + 1; + } else { + buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; + memcpy(buf + 1, &cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array( + r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); + return 1 + (size_t)sizeasarray; + } +} + +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t sizeasarray = + roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t); + return portablesize < sizeasarray ? portablesize + 1 + : (size_t)sizeasarray + 1; +} + +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { + return ra_portable_size_in_bytes(&r->high_low_container); +} + +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, + size_t maxbytes) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (ans == NULL) { + return NULL; + } + size_t bytesread; + bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, + maxbytes, &bytesread); + if (!is_ok) { + roaring_free(ans); + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, false); + if (!is_ok) { + roaring_free(ans); + return NULL; } + return ans; } -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = - container_lazy_xor(c1, type1, c2, type2, &result_type); +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { + return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); +} - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } +size_t roaring_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes) { + return ra_portable_deserialize_size(buf, maxbytes); +} - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) { + return ra_portable_serialize(&r->high_low_container, buf); +} - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { + const char *bufaschar = (const char *)buf; + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); + } + return bitmap; + + } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize(bufaschar + 1); + } else + return (NULL); } -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; +roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, + size_t maxbytes) { + if (maxbytes < 1) { + return NULL; + } - if (0 == length2) return; + const char *bufaschar = (const char *)buf; + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { + if (maxbytes < 1 + sizeof(uint32_t)) { + return NULL; + } - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. + // Check the buffer is big enough to contain card uint32_t elements + if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) { + return NULL; + } - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_lazy_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } else { - c = container_lazy_ixor(c1, type1, c2, type2, &result_type); - } + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; + } + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); + } + return bitmap; - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, + maxbytes - 1); + } else + return (NULL); +} - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, + void *ptr) { + const roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) + if (!container_iterate(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) { + return false; } + return true; +} + +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr) { + const roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) + if (!container_iterate64(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, iterator, + high_bits, ptr)) { + return false; + } + return true; +} + +/**** + * begin roaring_uint32_iterator_t + *****/ + +/** + * Partially initializes the iterator. Leaves it in either state: + * 1. Invalid due to `has_value = false`, or + * 2. At a container, with the high bits set, `has_value = true`. + */ +CROARING_WARN_UNUSED static bool iter_new_container_partial_init( + roaring_uint32_iterator_t *newit) { + newit->current_value = 0; + if (newit->container_index >= newit->parent->high_low_container.size || + newit->container_index < 0) { + newit->current_value = UINT32_MAX; + return (newit->has_value = false); } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } + newit->has_value = true; + // we precompute container, typecode and highbits so that successive + // iterators do not have to grab them from odd memory locations + // and have to worry about the (easily predicted) container_unwrap_shared + // call. + newit->container = + newit->parent->high_low_container.containers[newit->container_index]; + newit->typecode = + newit->parent->high_low_container.typecodes[newit->container_index]; + newit->highbits = + ((uint32_t) + newit->parent->high_low_container.keys[newit->container_index]) + << 16; + newit->container = + container_unwrap_shared(newit->container, &(newit->typecode)); + return true; } -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { - roaring_array_t *ra = &r->high_low_container; +/** + * Positions the iterator at the first value of the current container that the + * iterator points at, if available. + */ +CROARING_WARN_UNUSED static bool loadfirstvalue( + roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; + } + return newit->has_value; +} - for (int i = 0; i < ra->size; ++i) { - const uint8_t old_type = ra->typecodes[i]; - container_t *old_c = ra->containers[i]; - uint8_t new_type = old_type; - container_t *new_c = container_repair_after_lazy(old_c, &new_type); - ra->containers[i] = new_c; - ra->typecodes[i] = new_type; +/** + * Positions the iterator at the last value of the current container that the + * iterator points at, if available. + */ +CROARING_WARN_UNUSED static bool loadlastvalue( + roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = container_init_iterator_last( + newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; } + return newit->has_value; } /** - * roaring_bitmap_rank returns the number of integers that are smaller or equal - * to x. + * Positions the iterator at the smallest value that is larger than or equal to + * `val` within the current container that the iterator points at. Assumes such + * a value exists within the current container. */ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } +CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal( + roaring_uint32_iterator_t *newit, uint32_t val) { + bool partial_init = iter_new_container_partial_init(newit); + assert(partial_init); + if (!partial_init) { + return false; } - return size; + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + bool found = container_iterator_lower_bound( + newit->container, newit->typecode, &newit->container_it, &value, + val & 0xFFFF); + assert(found); + if (!found) { + return false; + } + newit->current_value = newit->highbits | value; + return true; } -void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin, - const uint32_t *end, uint64_t *ans) { - uint64_t size = 0; - int i = 0; - const uint32_t *iter = begin; - while (i < bm->high_low_container.size && iter != end) { - uint32_t x = *iter; - uint32_t xhigh = x >> 16; - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - i++; - } else if (xhigh == key) { - uint32_t consumed = container_rank_many( - bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], size, iter, end, ans); - iter += consumed; - ans += consumed; +void roaring_iterator_init(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = 0; + newit->has_value = loadfirstvalue(newit); +} + +void roaring_iterator_init_last(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = newit->parent->high_low_container.size - 1; + newit->has_value = loadlastvalue(newit); +} + +roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc( + sizeof(roaring_uint32_iterator_t)); + if (newit == NULL) return NULL; + roaring_iterator_init(r, newit); + return newit; +} + +roaring_uint32_iterator_t *roaring_uint32_iterator_copy( + const roaring_uint32_iterator_t *it) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc( + sizeof(roaring_uint32_iterator_t)); + memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); + return newit; +} + +bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val) { + uint16_t hb = val >> 16; + const int i = ra_get_index(&it->parent->high_low_container, hb); + if (i >= 0) { + uint32_t lowvalue = + container_maximum(it->parent->high_low_container.containers[i], + it->parent->high_low_container.typecodes[i]); + uint16_t lb = val & 0xFFFF; + if (lowvalue < lb) { + // will have to load first value of next container + it->container_index = i + 1; } else { - *(ans++) = size; - iter++; + // the value is necessarily within the range of the container + it->container_index = i; + it->has_value = loadfirstvalue_largeorequal(it, val); + return it->has_value; } + } else { + // there is no matching, so we are going for the next container + it->container_index = -i - 1; } + it->has_value = loadfirstvalue(it); + return it->has_value; } -/** - * roaring_bitmap_get_index returns the index of x, if not exsist return -1. - */ -int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) { - int64_t index = 0; - const uint16_t xhigh = x >> 16; - int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh); - if (high_idx < 0) return -1; +bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) { + if (it->container_index >= it->parent->high_low_container.size) { + return (it->has_value = false); + } + if (it->container_index < 0) { + it->container_index = 0; + return (it->has_value = loadfirstvalue(it)); + } + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_next(it->container, it->typecode, &it->container_it, + &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); + } + it->container_index++; + return (it->has_value = loadfirstvalue(it)); +} - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - index += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - int32_t low_idx = container_get_index( - bm->high_low_container.containers[high_idx], - bm->high_low_container.typecodes[high_idx], x & 0xFFFF); - if (low_idx < 0) return -1; - return index + low_idx; - } else { - return -1; +bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) { + if (it->container_index < 0) { + return (it->has_value = false); + } + if (it->container_index >= it->parent->high_low_container.size) { + it->container_index = it->parent->high_low_container.size - 1; + return (it->has_value = loadlastvalue(it)); + } + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_prev(it->container, it->typecode, &it->container_it, + &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); + } + it->container_index--; + return (it->has_value = loadlastvalue(it)); +} + +uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, + uint32_t *buf, uint32_t count) { + uint32_t ret = 0; + while (it->has_value && ret < count) { + uint32_t consumed; + uint16_t low16 = (uint16_t)it->current_value; + bool has_value = container_iterator_read_into_uint32( + it->container, it->typecode, &it->container_it, it->highbits, buf, + count - ret, &consumed, &low16); + ret += consumed; + buf += consumed; + if (has_value) { + it->has_value = true; + it->current_value = it->highbits | low16; + assert(ret == count); + return ret; } + it->container_index++; + it->has_value = loadfirstvalue(it); } - return index; + return ret; } -/** - * roaring_bitmap_smallest returns the smallest value in the set. - * Returns UINT32_MAX if the set is empty. - */ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *c = bm->high_low_container.containers[0]; - uint8_t type = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(c, type); - return lowvalue | (key << 16); - } - return UINT32_MAX; +void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) { + roaring_free(it); } -/** - * roaring_bitmap_smallest returns the greatest value in the set. - * Returns 0 if the set is empty. - */ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); - } - return 0; -} +/**** + * end of roaring_uint32_iterator_t + *****/ -bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - container_t *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; - } +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed - return true; - } else + if (ra1->size != ra2->size) { return false; + } + for (int i = 0; i < ra1->size; ++i) { + if (ra1->keys[i] != ra2->keys[i]) { + return false; + } + } + for (int i = 0; i < ra1->size; ++i) { + bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i], + ra2->containers[i], ra2->typecodes[i]); + if (!areequal) { + return false; + } + } + return true; } -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; + + const int length1 = ra1->size, length2 = ra2->size; + int pos1 = 0, pos2 = 0; while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1); + const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2); if (s1 == s2) { uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - if (container_intersect(c1, type1, c2, type2)) return true; + container_t *c1 = + ra_get_container_at_index(ra1, (uint16_t)pos1, &type1); + container_t *c2 = + ra_get_container_at_index(ra2, (uint16_t)pos2, &type2); + if (!container_is_subset(c1, type1, c2, type2)) return false; ++pos1; ++pos2; } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + return false; } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + pos2 = ra_advance_until(ra2, s1, pos2); } } - return answer != 0; + if (pos1 == length1) + return true; + else + return false; } -bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, - uint64_t y) { - if (x >= y) { - // Empty range. - return false; +static void insert_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); } - roaring_uint32_iterator_t it; - roaring_iterator_init(bm, &it); - if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) { - // No values above x. - return false; +} + +static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = container_inot_range( + container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + // if a new container was created, the old one was already freed + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); } - if (it.current_value >= y) { - // No values below y. - return false; +} + +static void insert_fully_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, + uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_not(container_to_flip, ctype_in, &ctype_out); + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); } - return true; } -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_inot(container_to_flip, ctype_in, &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); + } +} + +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return roaring_bitmap_copy(x1); + } + return roaring_bitmap_flip_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return roaring_bitmap_copy(x1); + } + + roaring_bitmap_t *ans = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); + + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; // & 0xFFFF; + + ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, + hb_start, is_cow(x1)); + if (hb_start == hb_end) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, + lb_start, 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } + + if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - answer += container_and_cardinality(c1, type1, c2, type2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + insert_fully_flipped_container(&ans->high_low_container, + &x1->high_low_container, + (uint16_t)hb); + } + + // handle a partial final container + if (lb_end != 0xFFFF) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; } } - return answer; + ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, + hb_end, is_cow(x1)); + return ans; } -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return; + } + roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); } -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; -} +void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return; // empty range + } -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; -} + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; -} + if (hb_start == hb_end) { + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } -bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; + if (lb_end != 0xFFFF) --hb_end; - uint8_t typecode; - // next call ought to be cheap - container_t *container = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary - // search - return container_contains(container, val & 0xFFFF, typecode); + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb); + } + // handle a partial final container + if (lb_end != 0xFFFF) { + inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; + } + } } -/** - * Check whether a range of values from range_start (included) to range_end - * (excluded) is present - */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, - uint64_t range_start, uint64_t range_end) { - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - if (range_start >= range_end) - return true; // empty range are always contained! - if (range_end - range_start == 1) - return roaring_bitmap_contains(r, (uint32_t)range_start); - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; - uint8_t type; - container_t *c = - ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); - if (hb_rs == hb_re) { - return container_contains_range(c, lb_rs, lb_re, type); - } - if (!container_contains_range(c, lb_rs, 1 << 16, type)) { - return false; - } - c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type); - if (!container_contains_range(c, 0, lb_re, type)) { - return false; +static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, + uint8_t t) { + int size = ra_get_size(ra); + if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) { + // No merge. + ra_append(ra, (uint16_t)k, c, t); + return; } - for (int32_t i = is + 1; i < ie; ++i) { - c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i, - &type); - if (!container_is_full(c, type)) { - return false; - } + + uint8_t last_t, new_t; + container_t *last_c, *new_c; + + // NOTE: we don't need to unwrap here, since we added last_c ourselves + // we have the certainty it's not a shared container. + // The same applies to c, as it's the result of calling container_offset. + last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t); + new_c = container_ior(last_c, last_t, c, t, &new_t); + + ra_set_container_at_index(ra, size - 1, new_c, new_t); + + // Comparison of pointers of different origin is UB (or so claim some + // compiler makers), so we compare their bit representation only. + if ((uintptr_t)last_c != (uintptr_t)new_c) { + container_free(last_c, last_t); } - return true; + container_free(c, t); } -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - return (roaring_bitmap_get_cardinality(r2) > - roaring_bitmap_get_cardinality(r1) && - roaring_bitmap_is_subset(r1, r2)); -} +// roaring_bitmap_add_offset adds the value 'offset' to each and every value in +// a bitmap, generating a new bitmap in the process. If offset + element is +// outside of the range [0,2^32), that the element will be dropped. +// We need "offset" to be 64 bits because we want to support values +// between -0xFFFFFFFF up to +0xFFFFFFFF. +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset) { + roaring_bitmap_t *answer; + roaring_array_t *ans_ra; + int64_t container_offset; + uint16_t in_offset; -/* - * FROZEN SERIALIZATION FORMAT DESCRIPTION - * - * -- (beginning must be aligned by 32 bytes) -- - * uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * - * num_bitset_containers] rle16_t[total number of rle elements in - * all run containers] uint16_t[total number of array elements in - * all array containers] uint16_t[num_containers] - * uint16_t[num_containers] uint8_t[num_containers]
- * uint32_t - * - *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) - * and the number of containers (17 bits). - * - * stores number of elements for every container. - * Its meaning depends on container type. - * For array and bitset containers, this value is the container cardinality - * minus one. For run container, it is the number of rle_t elements (n_runs). - * - * ,, are flat arrays of elements of - * all containers of respective type. - * - * <*_data> and are kept close together because they are not accessed - * during deserilization. This may reduce IO in case of large mmaped bitmaps. - * All members have their native alignments during deserilization except - *
, which is not guaranteed to be aligned by 4 bytes. - */ + const roaring_array_t *bm_ra = &bm->high_low_container; + int length = bm_ra->size; -size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { - const roaring_array_t *ra = &rb->high_low_container; - size_t num_bytes = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - num_bytes += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - num_bytes += ac->cardinality * sizeof(uint16_t); - break; - } - default: - roaring_unreachable; - } + if (offset == 0) { + return roaring_bitmap_copy(bm); } - num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes - num_bytes += 4; // header - return num_bytes; -} -inline static void *arena_alloc(char **arena, size_t num_bytes) { - char *res = *arena; - *arena += num_bytes; - return res; -} + container_offset = offset >> 16; + in_offset = (uint16_t)(offset - container_offset * (1 << 16)); + + answer = roaring_bitmap_create(); + bool cow = is_cow(bm); + roaring_bitmap_set_copy_on_write(answer, cow); + + ans_ra = &answer->high_low_container; -void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { - /* - * Note: we do not require user to supply a specifically aligned buffer. - * Thus we have to use memcpy() everywhere. - */ + if (in_offset == 0) { + ans_ra = &answer->high_low_container; - const roaring_array_t *ra = &rb->high_low_container; + for (int i = 0, j = 0; i < length; ++i) { + int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i); + key += container_offset; - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - run_zone_size += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - array_zone_size += ac->cardinality * sizeof(uint16_t); - break; + if (key < 0 || key >= (1 << 16)) { + continue; } - default: - roaring_unreachable; + ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow); + ans_ra->keys[j++] = (uint16_t)key; } + return answer; } - uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); - rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); - uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); - uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); - uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); - uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); - uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); + uint8_t t; + const container_t *c; + container_t *lo, *hi, **lo_ptr, **hi_ptr; + int64_t k; - for (int32_t i = 0; i < ra->size; i++) { - uint16_t count; - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = - const_CAST_bitset(ra->containers[i]); - memcpy(bitset_zone, bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { - count = (uint16_t)(bc->cardinality - 1); - } else { - count = - (uint16_t)(bitset_container_compute_cardinality(bc) - - 1); - } - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - size_t num_bytes = rc->n_runs * sizeof(rle16_t); - memcpy(run_zone, rc->runs, num_bytes); - run_zone += rc->n_runs; - count = (uint16_t)rc->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - size_t num_bytes = ac->cardinality * sizeof(uint16_t); - memcpy(array_zone, ac->array, num_bytes); - array_zone += ac->cardinality; - count = (uint16_t)(ac->cardinality - 1); - break; - } - default: - roaring_unreachable; + for (int i = 0; i < length; ++i) { + lo = hi = NULL; + lo_ptr = hi_ptr = NULL; + + k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset; + if (k >= 0 && k < (1 << 16)) { + lo_ptr = &lo; } - memcpy(&count_zone[i], &count, 2); - } - memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); - memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); - uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; - memcpy(header_zone, &header, 4); -} + if (k + 1 >= 0 && k + 1 < (1 << 16)) { + hi_ptr = &hi; + } + if (lo_ptr == NULL && hi_ptr == NULL) { + continue; + } + c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t); + c = container_unwrap_shared(c, &t); -const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, - size_t length) { - if ((uintptr_t)buf % 32 != 0) { - return NULL; + container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); + if (lo != NULL) { + offset_append_with_merge(ans_ra, (int)k, lo, t); + } + if (hi != NULL) { + ra_append(ans_ra, (uint16_t)(k + 1), hi, t); + } + // the `lo` and `hi` container type always keep same as container `c`. + // in the case of `container_add_offset` on bitset container, `lo` and + // `hi` may has small cardinality, they must be repaired to array + // container. } - // cookie and num_containers - if (length < 4) { - return NULL; + roaring_bitmap_repair_after_lazy(answer); // do required type conversions. + return answer; +} + +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); } - uint32_t header; - memcpy(&header, buf + length - 4, 4); // header may be misaligned - if ((header & 0x7FFF) != FROZEN_COOKIE) { - return NULL; + if (0 == length2) { + return roaring_bitmap_copy(x1); } - int32_t num_containers = (header >> 15); + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c; + if (bitsetconversion && + (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && + (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) { + container_t *newc1 = + container_mutable_unwrap_shared(c1, &type1); + newc1 = container_to_bitset(newc1, type1); + type1 = BITSET_CONTAINER_TYPE; + c = container_lazy_ior(newc1, type1, c2, type2, &result_type); + if (c != newc1) { // should not happen + container_free(newc1, type1); + } + } else { + c = container_lazy_or(c1, type1, c2, type2, &result_type); + } + // since we assume that the initial containers are non-empty, + // the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - // typecodes, counts and keys - if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { - return NULL; - } - uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); - uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); - uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - // {bitset,array,run}_zone - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: - num_bitset_containers++; - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - case RUN_CONTAINER_TYPE: - num_run_containers++; - run_zone_size += counts[i] * sizeof(rle16_t); - break; - case ARRAY_CONTAINER_TYPE: - num_array_containers++; - array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); - break; - default: - return NULL; + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - if (length != bitset_zone_size + run_zone_size + array_zone_size + - 5 * num_containers + 4) { - return NULL; + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); } - uint64_t *bitset_zone = (uint64_t *)(buf); - rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size); - uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size); + return answer; +} - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(container_t *); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; - char *arena = (char *)roaring_malloc(alloc_size); - if (arena == NULL) { - return NULL; + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + if (!container_is_full(c1, type1)) { + if ((bitsetconversion == false) || + (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) { + c1 = get_writable_copy_if_shared(c1, &type1); + } else { + // convert to bitset + container_t *old_c1 = c1; + uint8_t old_type1 = type1; + c1 = container_mutable_unwrap_shared(c1, &type1); + c1 = container_to_bitset(c1, type1); + container_free(old_c1, old_type1); + type1 = BITSET_CONTAINER_TYPE; + } - roaring_bitmap_t *rb = - (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.keys = (uint16_t *)keys; - rb->high_low_container.typecodes = (uint8_t *)typecodes; - rb->high_low_container.containers = (container_t **)arena_alloc( - &arena, sizeof(container_t *) * num_containers); - // Ensure offset of high_low_container.containers is known distance used in - // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the - // only allocation that precedes high_low_container.containers. If this is - // changed (new allocation or changed order), this offset will also need to - // be changed in the C++ wrapper. - assert(rb == - (roaring_bitmap_t *)((char *)rb->high_low_container.containers - - sizeof(roaring_bitmap_t))); - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *)arena_alloc( - &arena, sizeof(bitset_container_t)); - bitset->words = bitset_zone; - bitset->cardinality = counts[i] + UINT32_C(1); - rb->high_low_container.containers[i] = bitset; - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *)arena_alloc( - &arena, sizeof(run_container_t)); - run->capacity = counts[i]; - run->n_runs = counts[i]; - run->runs = run_zone; - rb->high_low_container.containers[i] = run; - run_zone += run->n_runs; - break; + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, (uint16_t)pos2, &type2); + container_t *c = + container_lazy_ior(c1, type1, c2, type2, &result_type); + + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } + + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *)arena_alloc( - &arena, sizeof(array_container_t)); - array->capacity = counts[i] + UINT32_C(1); - array->cardinality = counts[i] + UINT32_C(1); - array->array = array_zone; - rb->high_low_container.containers[i] = array; - array_zone += counts[i] + UINT32_C(1); - break; + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); } - default: - roaring_free(arena); - return NULL; + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - - return rb; + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } } -ALLOW_UNALIGNED -roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { - char *start_of_buf = (char *)buf; - uint32_t cookie; - int32_t num_containers; - uint16_t *descriptive_headers; - uint32_t *offset_headers = NULL; - const char *run_flag_bitset = NULL; - bool hasrun = false; - - // deserialize cookie - memcpy(&cookie, buf, sizeof(uint32_t)); - buf += sizeof(uint32_t); - if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { - memcpy(&num_containers, buf, sizeof(int32_t)); - buf += sizeof(int32_t); - descriptive_headers = (uint16_t *)buf; - buf += num_containers * 2 * sizeof(uint16_t); - offset_headers = (uint32_t *)buf; - buf += num_containers * sizeof(uint32_t); - } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { - num_containers = (cookie >> 16) + 1; - hasrun = true; - int32_t run_flag_bitset_size = (num_containers + 7) / 8; - run_flag_bitset = buf; - buf += run_flag_bitset_size; - descriptive_headers = (uint16_t *)buf; - buf += num_containers * 2 * sizeof(uint16_t); - if (num_containers >= NO_OFFSET_THRESHOLD) { - offset_headers = (uint32_t *)buf; - buf += num_containers * sizeof(uint32_t); - } - } else { - return NULL; +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = + container_lazy_xor(c1, type1, c2, type2, &result_type); - // calculate total size for allocation - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } - for (int32_t i = 0; i < num_containers; i++) { - uint16_t tmp; - memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); - uint32_t cardinality = tmp + 1; - bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { - isbitmap = false; - isrun = true; + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); } - } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - if (isbitmap) { - num_bitset_containers++; - } else if (isrun) { - num_run_containers++; - } else { - num_array_containers++; + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(container_t *); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); - alloc_size += num_containers * sizeof(uint16_t); // keys - alloc_size += num_containers * sizeof(uint8_t); // typecodes - - // allocate bitmap and construct containers - char *arena = (char *)roaring_malloc(alloc_size); - if (arena == NULL) { - return NULL; + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); } + return answer; +} - roaring_bitmap_t *rb = - (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.containers = (container_t **)arena_alloc( - &arena, sizeof(container_t *) * num_containers); - - uint16_t *keys = - (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); - uint8_t *typecodes = - (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; - rb->high_low_container.keys = keys; - rb->high_low_container.typecodes = typecodes; + if (0 == length2) return; - for (int32_t i = 0; i < num_containers; i++) { - uint16_t tmp; - memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); - int32_t cardinality = tmp + 1; - bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); - keys[i] = descriptive_headers[2 * i]; + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. - if (isbitmap) { - typecodes[i] = BITSET_CONTAINER_TYPE; - bitset_container_t *c = (bitset_container_t *)arena_alloc( - &arena, sizeof(bitset_container_t)); - c->cardinality = cardinality; - if (offset_headers != NULL) { - c->words = (uint64_t *)(start_of_buf + offset_headers[i]); + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_lazy_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release } else { - c->words = (uint64_t *)buf; - buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + c = container_lazy_ixor(c1, type1, c2, type2, &result_type); } - rb->high_low_container.containers[i] = c; - } else if (isrun) { - typecodes[i] = RUN_CONTAINER_TYPE; - run_container_t *c = - (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); - c->capacity = cardinality; - uint16_t n_runs; - if (offset_headers != NULL) { - memcpy(&n_runs, start_of_buf + offset_headers[i], - sizeof(uint16_t)); - c->n_runs = n_runs; - c->runs = (rle16_t *)(start_of_buf + offset_headers[i] + - sizeof(uint16_t)); + + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; } else { - memcpy(&n_runs, buf, sizeof(uint16_t)); - c->n_runs = n_runs; - buf += sizeof(uint16_t); - c->runs = (rle16_t *)buf; - buf += c->n_runs * sizeof(rle16_t); + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; } - rb->high_low_container.containers[i] = c; - } else { - typecodes[i] = ARRAY_CONTAINER_TYPE; - array_container_t *c = (array_container_t *)arena_alloc( - &arena, sizeof(array_container_t)); - c->cardinality = cardinality; - c->capacity = cardinality; - if (offset_headers != NULL) { - c->array = (uint16_t *)(start_of_buf + offset_headers[i]); - } else { - c->array = (uint16_t *)buf; - buf += cardinality * sizeof(uint16_t); + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); } - rb->high_low_container.containers[i] = c; + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} - return rb; +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { + roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) { + const uint8_t old_type = ra->typecodes[i]; + container_t *old_c = ra->containers[i]; + uint8_t new_type = old_type; + container_t *new_c = container_repair_after_lazy(old_c, &new_type); + ra->containers[i] = new_c; + ra->typecodes[i] = new_type; + } } -bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { - uint32_t max_value = roaring_bitmap_maximum(r); - size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64); - bool resize_ok = bitset_resize(bitset, new_array_size, true); - if (!resize_ok) { - return false; +/** + * roaring_bitmap_rank returns the number of integers that are smaller or equal + * to x. + */ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { + uint64_t size = 0; + uint32_t xhigh = x >> 16; + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + return size + container_rank(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], + x & 0xFFFF); + } else { + return size; + } } - const roaring_array_t *ra = &r->high_low_container; - for (int i = 0; i < ra->size; ++i) { - uint64_t *words = bitset->array + (ra->keys[i] << 10); - uint8_t type = ra->typecodes[i]; - const container_t *c = ra->containers[i]; - if (type == SHARED_CONTAINER_TYPE) { - c = container_unwrap_shared(c, &type); + return size; +} +void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin, + const uint32_t *end, uint64_t *ans) { + uint64_t size = 0; + + int i = 0; + const uint32_t *iter = begin; + while (i < bm->high_low_container.size && iter != end) { + uint32_t x = *iter; + uint32_t xhigh = x >> 16; + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + i++; + } else if (xhigh == key) { + uint32_t consumed = container_rank_many( + bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], size, iter, end, ans); + iter += consumed; + ans += consumed; + } else { + *(ans++) = size; + iter++; } - switch (type) { - case BITSET_CONTAINER_TYPE: { - size_t max_word_index = new_array_size - (ra->keys[i] << 10); - if (max_word_index > 1024) { - max_word_index = 1024; - } - const bitset_container_t *src = const_CAST_bitset(c); - memcpy(words, src->words, max_word_index * sizeof(uint64_t)); - } break; - case ARRAY_CONTAINER_TYPE: { - const array_container_t *src = const_CAST_array(c); - bitset_set_list(words, src->array, src->cardinality); - } break; - case RUN_CONTAINER_TYPE: { - const run_container_t *src = const_CAST_run(c); - for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) { - rle16_t rle = src->runs[rlepos]; - bitset_set_lenrange(words, rle.value, rle.length); - } - } break; - default: - roaring_unreachable; + } +} + +/** + * roaring_bitmap_get_index returns the index of x, if not exsist return -1. + */ +int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) { + int64_t index = 0; + const uint16_t xhigh = x >> 16; + int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh); + if (high_idx < 0) return -1; + + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + index += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + int32_t low_idx = container_get_index( + bm->high_low_container.containers[high_idx], + bm->high_low_container.typecodes[high_idx], x & 0xFFFF); + if (low_idx < 0) return -1; + return index + low_idx; + } else { + return -1; } } - return true; + return index; } -#ifdef __cplusplus -} +/** + * roaring_bitmap_smallest returns the smallest value in the set. + * Returns UINT32_MAX if the set is empty. + */ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *c = bm->high_low_container.containers[0]; + uint8_t type = bm->high_low_container.typecodes[0]; + uint32_t key = bm->high_low_container.keys[0]; + uint32_t lowvalue = container_minimum(c, type); + return lowvalue | (key << 16); + } + return UINT32_MAX; } -} // extern "C" { namespace roaring { -#endif -/* end file src/roaring.c */ -/* begin file src/roaring64.c */ -#include -#include -#include -#include - - -// For serialization / deserialization -// containers.h last to avoid conflict with ROARING_CONTAINER_T. - -#ifdef __cplusplus -using namespace ::roaring::internal; - -extern "C" { -namespace roaring { -namespace api { -#endif - -// TODO: Copy on write. -// TODO: Error on failed allocation. -typedef struct roaring64_bitmap_s { - art_t art; - uint8_t flags; -} roaring64_bitmap_t; +/** + * roaring_bitmap_smallest returns the greatest value in the set. + * Returns 0 if the set is empty. + */ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *container = + bm->high_low_container.containers[bm->high_low_container.size - 1]; + uint8_t typecode = + bm->high_low_container.typecodes[bm->high_low_container.size - 1]; + uint32_t key = + bm->high_low_container.keys[bm->high_low_container.size - 1]; + uint32_t lowvalue = container_maximum(container, typecode); + return lowvalue | (key << 16); + } + return 0; +} -// Leaf type of the ART used to keep the high 48 bits of each entry. -typedef struct roaring64_leaf_s { - art_val_t _pad; - uint8_t typecode; +bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, + uint32_t *element) { container_t *container; -} roaring64_leaf_t; - -// Alias to make it easier to work with, since it's an internal-only type -// anyway. -typedef struct roaring64_leaf_s leaf_t; - -// Iterator struct to hold iteration state. -typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *parent; - art_iterator_t art_it; - roaring_container_iterator_t container_it; - uint64_t high48; // Key that art_it points to. - - uint64_t value; - bool has_value; - - // If has_value is false, then the iterator is saturated. This field - // indicates the direction of saturation. If true, there are no more values - // in the forward direction. If false, there are no more values in the - // backward direction. - bool saturated_forward; -} roaring64_iterator_t; + uint8_t typecode; + uint16_t key; + uint32_t start_rank = 0; + int i = 0; + bool valid = false; + while (!valid && i < bm->high_low_container.size) { + container = bm->high_low_container.containers[i]; + typecode = bm->high_low_container.typecodes[i]; + valid = + container_select(container, typecode, &start_rank, rank, element); + i++; + } -// Splits the given uint64 key into high 48 bit and low 16 bit components. -// Expects high48_out to be of length ART_KEY_BYTES. -static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { - uint64_t tmp = croaring_htobe64(key); - memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); - return (uint16_t)key; + if (valid) { + key = bm->high_low_container.keys[i - 1]; + *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed + return true; + } else + return false; } -// Recombines the high 48 bit and low 16 bit components into a uint64 key. -// Expects high48_out to be of length ART_KEY_BYTES. -static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { - uint64_t result = 0; - memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); - return croaring_be64toh(result) | low16; -} +bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; -static inline uint64_t minimum(uint64_t a, uint64_t b) { - return (a < b) ? a : b; -} + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); -static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { - leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf->container = container; - leaf->typecode = typecode; - return leaf; + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + if (container_intersect(c1, type1, c2, type2)) return true; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer != 0; } -static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->typecode = leaf->typecode; - // get_copy_of_container modifies the typecode passed in. - result_leaf->container = get_copy_of_container( - leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); - return result_leaf; +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, + uint64_t y) { + if (x >= y) { + // Empty range. + return false; + } + roaring_uint32_iterator_t it; + roaring_iterator_init(bm, &it); + if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) { + // No values above x. + return false; + } + if (it.current_value >= y) { + // No values below y. + return false; + } + return true; } -static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); -static inline int compare_high48(art_key_chunk_t key1[], - art_key_chunk_t key2[]) { - return art_compare_keys(key1, key2); + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + answer += container_and_cardinality(c1, type1, c2, type2); + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer; } -static inline bool roaring64_iterator_init_at_leaf_first( - roaring64_iterator_t *it) { - it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = 0; - it->container_it = - container_init_iterator(leaf->container, leaf->typecode, &low16); - it->value = it->high48 | low16; - return (it->has_value = true); +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return (double)inter / (double)(c1 + c2 - inter); } -static inline bool roaring64_iterator_init_at_leaf_last( - roaring64_iterator_t *it) { - it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = 0; - it->container_it = - container_init_iterator_last(leaf->container, leaf->typecode, &low16); - it->value = it->high48 | low16; - return (it->has_value = true); +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - inter; } -static inline roaring64_iterator_t *roaring64_iterator_init_at( - const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->parent = r; - it->art_it = art_init_iterator(&r->art, first); - it->has_value = it->art_it.value != NULL; - if (it->has_value) { - if (first) { - roaring64_iterator_init_at_leaf_first(it); - } else { - roaring64_iterator_init_at_leaf_last(it); - } - } else { - it->saturated_forward = first; - } - return it; +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 - inter; } -roaring64_bitmap_t *roaring64_bitmap_create(void) { - roaring64_bitmap_t *r = - (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - r->art.root = NULL; - r->flags = 0; - return r; +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - 2 * inter; } -void roaring64_bitmap_free(roaring64_bitmap_t *r) { - if (!r) { - return; - } - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); - art_iterator_next(&it); - } - art_free(&r->art); - roaring_free(r); -} +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + /* + * the next function call involves a binary search and lots of branching. + */ + int32_t i = ra_get_index(&r->high_low_container, hb); + if (i < 0) return false; -roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); + uint8_t typecode; + // next call ought to be cheap + container_t *container = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &typecode); + // rest might be a tad expensive, possibly involving another round of binary + // search + return container_contains(container, val & 0xFFFF, typecode); +} - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint8_t result_typecode = leaf->typecode; - container_t *result_container = get_copy_of_container( - leaf->container, &result_typecode, /*copy_on_write=*/false); - leaf_t *result_leaf = create_leaf(result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t *)result_leaf); - art_iterator_next(&it); +/** + * Check whether a range of values from range_start (included) to range_end + * (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, + uint64_t range_start, uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return true; } - return result; + return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); } -roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, - uint64_t step) { - if (step == 0 || max <= min) { - return NULL; +/** + * Check whether a range of values from range_start (included) to range_end + * (included) is present + */ +bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return true; + } // empty range are always contained! + if (range_end == range_start) { + return roaring_bitmap_contains(r, (uint32_t)range_start); } - roaring64_bitmap_t *r = roaring64_bitmap_create(); - if (step >= (1 << 16)) { - // Only one value per container. - for (uint64_t value = min; value < max; value += step) { - roaring64_bitmap_add(r, value); - if (value > UINT64_MAX - step) { - break; - } - } - return r; + uint16_t hb_rs = (uint16_t)(range_start >> 16); + uint16_t hb_re = (uint16_t)(range_end >> 16); + const int32_t span = hb_re - hb_rs; + const int32_t hlc_sz = ra_get_size(&r->high_low_container); + if (hlc_sz < span + 1) { + return false; } - do { - uint64_t high_bits = min & 0xFFFFFFFFFFFF0000; - uint16_t container_min = min & 0xFFFF; - uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16); - - uint8_t typecode; - container_t *container = container_from_range( - &typecode, container_min, container_max, (uint16_t)step); - - uint8_t high48[ART_KEY_BYTES]; - split_key(min, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); - - uint64_t gap = container_max - container_min + step - 1; - uint64_t increment = gap - (gap % step); - if (min > UINT64_MAX - increment) { - break; + int32_t is = ra_get_index(&r->high_low_container, hb_rs); + int32_t ie = ra_get_index(&r->high_low_container, hb_re); + if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) { + return false; + } + const uint32_t lb_rs = range_start & 0xFFFF; + const uint32_t lb_re = (range_end & 0xFFFF) + 1; + uint8_t type; + container_t *c = + ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); + if (hb_rs == hb_re) { + return container_contains_range(c, lb_rs, lb_re, type); + } + if (!container_contains_range(c, lb_rs, 1 << 16, type)) { + return false; + } + c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type); + if (!container_contains_range(c, 0, lb_re, type)) { + return false; + } + for (int32_t i = is + 1; i < ie; ++i) { + c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i, + &type); + if (!container_is_full(c, type)) { + return false; } - min += increment; - } while (min < max); - return r; + } + return true; } -roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, - const uint64_t *vals) { - roaring64_bitmap_t *r = roaring64_bitmap_create(); - roaring64_bitmap_add_many(r, n_args, vals); - return r; +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + return (roaring_bitmap_get_cardinality(r2) > + roaring_bitmap_get_cardinality(r1) && + roaring_bitmap_is_subset(r1, r2)); } -roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) { - roaring64_bitmap_t *r = roaring64_bitmap_create(); - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - va_list ap; - va_start(ap, n_args); - for (size_t i = 0; i < n_args; i++) { - uint64_t val = va_arg(ap, uint64_t); - roaring64_bitmap_add_bulk(r, &context, val); - } - va_end(ap); - return r; -} +/* + * FROZEN SERIALIZATION FORMAT DESCRIPTION + * + * -- (beginning must be aligned by 32 bytes) -- + * uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * + * num_bitset_containers] rle16_t[total number of rle elements in + * all run containers] uint16_t[total number of array elements in + * all array containers] uint16_t[num_containers] + * uint16_t[num_containers] uint8_t[num_containers]
+ * uint32_t + * + *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) + * and the number of containers (17 bits). + * + * stores number of elements for every container. + * Its meaning depends on container type. + * For array and bitset containers, this value is the container cardinality + * minus one. For run container, it is the number of rle_t elements (n_runs). + * + * ,, are flat arrays of elements of + * all containers of respective type. + * + * <*_data> and are kept close together because they are not accessed + * during deserilization. This may reduce IO in case of large mmaped bitmaps. + * All members have their native alignments during deserilization except + *
, which is not guaranteed to be aligned by 4 bytes. + */ -static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, - uint8_t *high48, - uint16_t low16, - leaf_t *leaf) { - if (leaf != NULL) { - uint8_t typecode2; - container_t *container2 = - container_add(leaf->container, low16, leaf->typecode, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { + const roaring_array_t *ra = &rb->high_low_container; + size_t num_bytes = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + num_bytes += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + num_bytes += ac->cardinality * sizeof(uint16_t); + break; + } + default: + roaring_unreachable; } - return leaf; - } else { - array_container_t *ac = array_container_create(); - uint8_t typecode; - container_t *container = - container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); - assert(ac == container); - leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); - return leaf; } + num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes + num_bytes += 4; // header + return num_bytes; } -void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - containerptr_roaring64_bitmap_add(r, high48, low16, leaf); +inline static void *arena_alloc(char **arena, size_t num_bytes) { + char *res = *arena; + *arena += num_bytes; + return res; } -bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { + /* + * Note: we do not require user to supply a specifically aligned buffer. + * Thus we have to use memcpy() everywhere. + */ - int old_cardinality = 0; - if (leaf != NULL) { - old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - } - leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); - int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - return old_cardinality != new_cardinality; -} + const roaring_array_t *ra = &rb->high_low_container; -void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - if (context->leaf != NULL && - compare_high48(context->high_bytes, high48) == 0) { - // We're at a container with the correct high bits. - uint8_t typecode2; - container_t *container2 = - container_add(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + run_zone_size += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + array_zone_size += ac->cardinality * sizeof(uint16_t); + break; + } + default: + roaring_unreachable; } - } else { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - context->leaf = - containerptr_roaring64_bitmap_add(r, high48, low16, leaf); - memcpy(context->high_bytes, high48, ART_KEY_BYTES); } -} -void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, - const uint64_t *vals) { - if (n_args == 0) { - return; - } - const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (const uint64_t *current_val = vals; current_val != end; - current_val++) { - roaring64_bitmap_add_bulk(r, &context, *current_val); - } -} + uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); + rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); + uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); + uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); + uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); + uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); + uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); -static inline void add_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { - leaf_t *leaf = (leaf_t *)art_find(art, high48); - if (leaf != NULL) { - uint8_t typecode2; - container_t *container2 = container_add_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; + for (int32_t i = 0; i < ra->size; i++) { + uint16_t count; + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = + const_CAST_bitset(ra->containers[i]); + memcpy(bitset_zone, bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { + count = (uint16_t)(bc->cardinality - 1); + } else { + count = + (uint16_t)(bitset_container_compute_cardinality(bc) - + 1); + } + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + size_t num_bytes = rc->n_runs * sizeof(rle16_t); + memcpy(run_zone, rc->runs, num_bytes); + run_zone += rc->n_runs; + count = (uint16_t)rc->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + size_t num_bytes = ac->cardinality * sizeof(uint16_t); + memcpy(array_zone, ac->array, num_bytes); + array_zone += ac->cardinality; + count = (uint16_t)(ac->cardinality - 1); + break; + } + default: + roaring_unreachable; } - return; + memcpy(&count_zone[i], &count, 2); } - uint8_t typecode; - // container_add_range is inclusive, but `container_range_of_ones` is - // exclusive. - container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf = create_leaf(container, typecode); - art_insert(art, high48, (art_val_t *)leaf); + memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); + memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); + uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; + memcpy(header_zone, &header, 4); } -void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return; +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, + size_t length) { + if ((uintptr_t)buf % 32 != 0) { + return NULL; } - roaring64_bitmap_add_range_closed(r, min, max - 1); -} -void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min > max) { - return; + // cookie and num_containers + if (length < 4) { + return NULL; } - - art_t *art = &r->art; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - if (compare_high48(min_high48, max_high48) == 0) { - // Only populate range within one container. - add_range_closed_at(art, min_high48, min_low16, max_low16); - return; + uint32_t header; + memcpy(&header, buf + length - 4, 4); // header may be misaligned + if ((header & 0x7FFF) != FROZEN_COOKIE) { + return NULL; } + int32_t num_containers = (header >> 15); - // Populate a range across containers. Fill intermediate containers - // entirely. - add_range_closed_at(art, min_high48, min_low16, 0xffff); - uint64_t min_high_bits = min >> 16; - uint64_t max_high_bits = max >> 16; - for (uint64_t current = min_high_bits + 1; current < max_high_bits; - ++current) { - uint8_t current_high48[ART_KEY_BYTES]; - split_key(current << 16, current_high48); - add_range_closed_at(art, current_high48, 0, 0xffff); + // typecodes, counts and keys + if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { + return NULL; } - add_range_closed_at(art, max_high48, 0, max_low16); -} + uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); + uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); + uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); -bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - if (leaf != NULL) { - return container_contains(leaf->container, low16, leaf->typecode); + // {bitset,array,run}_zone + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: + num_bitset_containers++; + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + case RUN_CONTAINER_TYPE: + num_run_containers++; + run_zone_size += counts[i] * sizeof(rle16_t); + break; + case ARRAY_CONTAINER_TYPE: + num_array_containers++; + array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); + break; + default: + return NULL; + } } - return false; -} - -bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return true; + if (length != bitset_zone_size + run_zone_size + array_zone_size + + 5 * num_containers + 4) { + return NULL; } + uint64_t *bitset_zone = (uint64_t *)(buf); + rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size); + uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size); - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t *); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); - art_iterator_t it = art_lower_bound(&r->art, min_high48); - if (it.value == NULL || combine_key(it.key, 0) > min) { - return false; + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; } - uint64_t prev_high48_bits = min & 0xFFFFFFFFFFFF0000; - while (it.value != NULL) { - uint64_t current_high48_bits = combine_key(it.key, 0); - if (current_high48_bits > max_high48_bits) { - // We've passed the end of the range with all containers containing - // the range. - return true; - } - if (current_high48_bits - prev_high48_bits > 0x10000) { - // There is a gap in the iterator that falls in the range. - return false; - } - - leaf_t *leaf = (leaf_t *)it.value; - uint32_t container_min = 0; - if (compare_high48(it.key, min_high48) == 0) { - container_min = min_low16; - } - uint32_t container_max = 0xFFFF + 1; // Exclusive - if (compare_high48(it.key, max_high48) == 0) { - container_max = max_low16; - } - // For the first and last containers we use container_contains_range, - // for the intermediate containers we can use container_is_full. - if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(leaf->container, leaf->typecode)) { - return false; + roaring_bitmap_t *rb = + (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.keys = (uint16_t *)keys; + rb->high_low_container.typecodes = (uint8_t *)typecodes; + rb->high_low_container.containers = (container_t **)arena_alloc( + &arena, sizeof(container_t *) * num_containers); + // Ensure offset of high_low_container.containers is known distance used in + // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the + // only allocation that precedes high_low_container.containers. If this is + // changed (new allocation or changed order), this offset will also need to + // be changed in the C++ wrapper. + assert(rb == + (roaring_bitmap_t *)((char *)rb->high_low_container.containers - + sizeof(roaring_bitmap_t))); + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)arena_alloc( + &arena, sizeof(bitset_container_t)); + bitset->words = bitset_zone; + bitset->cardinality = counts[i] + UINT32_C(1); + rb->high_low_container.containers[i] = bitset; + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)arena_alloc( + &arena, sizeof(run_container_t)); + run->capacity = counts[i]; + run->n_runs = counts[i]; + run->runs = run_zone; + rb->high_low_container.containers[i] = run; + run_zone += run->n_runs; + break; } - } else if (!container_contains_range(leaf->container, container_min, - container_max, leaf->typecode)) { - return false; - } - prev_high48_bits = current_high48_bits; - art_iterator_next(&it); - } - return prev_high48_bits == max_high48_bits; -} - -bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - - if (context->leaf == NULL || - art_compare_keys(context->high_bytes, high48) != 0) { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - if (leaf == NULL) { - return false; - } - context->leaf = leaf; - memcpy(context->high_bytes, high48, ART_KEY_BYTES); - } - return container_contains(context->leaf->container, low16, - context->leaf->typecode); -} - -bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, - uint64_t *element) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint64_t start_rank = 0; - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint64_t cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - if (start_rank + cardinality > rank) { - uint32_t uint32_start = 0; - uint32_t uint32_rank = rank - start_rank; - uint32_t uint32_element = 0; - if (container_select(leaf->container, leaf->typecode, &uint32_start, - uint32_rank, &uint32_element)) { - *element = combine_key(it.key, (uint16_t)uint32_element); - return true; + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)arena_alloc( + &arena, sizeof(array_container_t)); + array->capacity = counts[i] + UINT32_C(1); + array->cardinality = counts[i] + UINT32_C(1); + array->array = array_zone; + rb->high_low_container.containers[i] = array; + array_zone += counts[i] + UINT32_C(1); + break; } - return false; + default: + roaring_free(arena); + return NULL; } - start_rank += cardinality; - art_iterator_next(&it); } - return false; + + return rb; } -uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); +ALLOW_UNALIGNED +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { + char *start_of_buf = (char *)buf; + uint32_t cookie; + int32_t num_containers; + uint16_t *descriptive_headers; + uint32_t *offset_headers = NULL; + const char *run_flag_bitset = NULL; + bool hasrun = false; - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint64_t rank = 0; - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - int compare_result = compare_high48(it.key, high48); - if (compare_result < 0) { - rank += container_get_cardinality(leaf->container, leaf->typecode); - } else if (compare_result == 0) { - return rank + - container_rank(leaf->container, leaf->typecode, low16); - } else { - return rank; + // deserialize cookie + memcpy(&cookie, buf, sizeof(uint32_t)); + buf += sizeof(uint32_t); + if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { + memcpy(&num_containers, buf, sizeof(int32_t)); + buf += sizeof(int32_t); + descriptive_headers = (uint16_t *)buf; + buf += num_containers * 2 * sizeof(uint16_t); + offset_headers = (uint32_t *)buf; + buf += num_containers * sizeof(uint32_t); + } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { + num_containers = (cookie >> 16) + 1; + hasrun = true; + int32_t run_flag_bitset_size = (num_containers + 7) / 8; + run_flag_bitset = buf; + buf += run_flag_bitset_size; + descriptive_headers = (uint16_t *)buf; + buf += num_containers * 2 * sizeof(uint16_t); + if (num_containers >= NO_OFFSET_THRESHOLD) { + offset_headers = (uint32_t *)buf; + buf += num_containers * sizeof(uint32_t); } - art_iterator_next(&it); + } else { + return NULL; } - return rank; -} -bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, - uint64_t *out_index) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); + // calculate total size for allocation + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint64_t index = 0; - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - int compare_result = compare_high48(it.key, high48); - if (compare_result < 0) { - index += container_get_cardinality(leaf->container, leaf->typecode); - } else if (compare_result == 0) { - int index16 = - container_get_index(leaf->container, leaf->typecode, low16); - if (index16 < 0) { - return false; + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); + uint32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; } - *out_index = index + index16; - return true; + } + + if (isbitmap) { + num_bitset_containers++; + } else if (isrun) { + num_run_containers++; } else { - return false; + num_array_containers++; } - art_iterator_next(&it); } - return false; -} -static inline leaf_t *containerptr_roaring64_bitmap_remove( - roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { - if (leaf == NULL) { - return NULL; - } + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t *); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + alloc_size += num_containers * sizeof(uint16_t); // keys + alloc_size += num_containers * sizeof(uint8_t); // typecodes - container_t *container = leaf->container; - uint8_t typecode = leaf->typecode; - uint8_t typecode2; - container_t *container2 = - container_remove(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - leaf->container = container2; - leaf->typecode = typecode2; - } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - leaf = (leaf_t *)art_erase(&r->art, high48); - if (leaf != NULL) { - free_leaf(leaf); - } + // allocate bitmap and construct containers + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { return NULL; } - return leaf; -} -void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - - leaf_t *leaf = (leaf_t *)art_find(art, high48); - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); -} + roaring_bitmap_t *rb = + (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.containers = (container_t **)arena_alloc( + &arena, sizeof(container_t *) * num_containers); -bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(art, high48); + uint16_t *keys = + (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); + uint8_t *typecodes = + (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); - if (leaf == NULL) { - return false; - } - int old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - if (leaf == NULL) { - return true; - } - int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - return new_cardinality != old_cardinality; -} + rb->high_low_container.keys = keys; + rb->high_low_container.typecodes = typecodes; -void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - if (context->leaf != NULL && - compare_high48(context->high_bytes, high48) == 0) { - // We're at a container with the correct high bits. - uint8_t typecode2; - container_t *container2 = - container_remove(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; - } - if (!container_nonzero_cardinality(container2, typecode2)) { - leaf_t *leaf = (leaf_t *)art_erase(art, high48); - container_free(container2, typecode2); - free_leaf(leaf); + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); + int32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; + } } - } else { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf_t *leaf = (leaf_t *)art_find(art, high48); - context->leaf = - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - memcpy(context->high_bytes, high48, ART_KEY_BYTES); - } -} - -void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, - const uint64_t *vals) { - if (n_args == 0) { - return; - } - const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (const uint64_t *current_val = vals; current_val != end; - current_val++) { - roaring64_bitmap_remove_bulk(r, &context, *current_val); - } -} -static inline void remove_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { - leaf_t *leaf = (leaf_t *)art_find(art, high48); - if (leaf == NULL) { - return; - } - uint8_t typecode2; - container_t *container2 = container_remove_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - if (container2 != NULL) { - leaf->container = container2; - leaf->typecode = typecode2; + keys[i] = descriptive_headers[2 * i]; + + if (isbitmap) { + typecodes[i] = BITSET_CONTAINER_TYPE; + bitset_container_t *c = (bitset_container_t *)arena_alloc( + &arena, sizeof(bitset_container_t)); + c->cardinality = cardinality; + if (offset_headers != NULL) { + c->words = (uint64_t *)(start_of_buf + offset_headers[i]); + } else { + c->words = (uint64_t *)buf; + buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + rb->high_low_container.containers[i] = c; + } else if (isrun) { + typecodes[i] = RUN_CONTAINER_TYPE; + run_container_t *c = + (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); + c->capacity = cardinality; + uint16_t n_runs; + if (offset_headers != NULL) { + memcpy(&n_runs, start_of_buf + offset_headers[i], + sizeof(uint16_t)); + c->n_runs = n_runs; + c->runs = (rle16_t *)(start_of_buf + offset_headers[i] + + sizeof(uint16_t)); + } else { + memcpy(&n_runs, buf, sizeof(uint16_t)); + c->n_runs = n_runs; + buf += sizeof(uint16_t); + c->runs = (rle16_t *)buf; + buf += c->n_runs * sizeof(rle16_t); + } + rb->high_low_container.containers[i] = c; } else { - art_erase(art, high48); - free_leaf(leaf); + typecodes[i] = ARRAY_CONTAINER_TYPE; + array_container_t *c = (array_container_t *)arena_alloc( + &arena, sizeof(array_container_t)); + c->cardinality = cardinality; + c->capacity = cardinality; + if (offset_headers != NULL) { + c->array = (uint16_t *)(start_of_buf + offset_headers[i]); + } else { + c->array = (uint16_t *)buf; + buf += cardinality * sizeof(uint16_t); + } + rb->high_low_container.containers[i] = c; } } -} -void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return; - } - roaring64_bitmap_remove_range_closed(r, min, max - 1); + return rb; } -void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min > max) { - return; - } - - art_t *art = &r->art; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - if (compare_high48(min_high48, max_high48) == 0) { - // Only remove a range within one container. - remove_range_closed_at(art, min_high48, min_low16, max_low16); - return; +bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { + uint32_t max_value = roaring_bitmap_maximum(r); + size_t new_array_size = (size_t)(max_value / 64 + 1); + bool resize_ok = bitset_resize(bitset, new_array_size, true); + if (!resize_ok) { + return false; } - - // Remove a range across containers. Remove intermediate containers - // entirely. - remove_range_closed_at(art, min_high48, min_low16, 0xffff); - - art_iterator_t it = art_upper_bound(art, min_high48); - while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + const roaring_array_t *ra = &r->high_low_container; + for (int i = 0; i < ra->size; ++i) { + uint64_t *words = bitset->array + (ra->keys[i] << 10); + uint8_t type = ra->typecodes[i]; + const container_t *c = ra->containers[i]; + if (type == SHARED_CONTAINER_TYPE) { + c = container_unwrap_shared(c, &type); + } + switch (type) { + case BITSET_CONTAINER_TYPE: { + size_t max_word_index = new_array_size - (ra->keys[i] << 10); + if (max_word_index > 1024) { + max_word_index = 1024; + } + const bitset_container_t *src = const_CAST_bitset(c); + memcpy(words, src->words, max_word_index * sizeof(uint64_t)); + } break; + case ARRAY_CONTAINER_TYPE: { + const array_container_t *src = const_CAST_array(c); + bitset_set_list(words, src->array, src->cardinality); + } break; + case RUN_CONTAINER_TYPE: { + const run_container_t *src = const_CAST_run(c); + for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) { + rle16_t rle = src->runs[rlepos]; + bitset_set_lenrange(words, rle.value, rle.length); + } + } break; + default: + roaring_unreachable; + } } - remove_range_closed_at(art, max_high48, 0, max_low16); -} - -void roaring64_bitmap_clear(roaring64_bitmap_t *r) { - roaring64_bitmap_remove_range_closed(r, 0, UINT64_MAX); + return true; } -uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint64_t cardinality = 0; - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); - art_iterator_next(&it); - } - return cardinality; +#ifdef __cplusplus } - -uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return 0; - } - // Convert to a closed range - // No underflow here: passing the above condition implies min < max, so - // there is a number less than max - return roaring64_bitmap_range_closed_cardinality(r, min, max - 1); } +} // extern "C" { namespace roaring { +#endif +/* end file src/roaring.c */ +/* begin file src/roaring64.c */ +#include +#include +#include +#include +#include -uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min > max) { - return 0; - } - uint64_t cardinality = 0; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); +// For serialization / deserialization +// containers.h last to avoid conflict with ROARING_CONTAINER_T. - art_iterator_t it = art_lower_bound(&r->art, min_high48); - while (it.value != NULL) { - int max_compare_result = compare_high48(it.key, max_high48); - if (max_compare_result > 0) { - // We're outside the range. - break; - } +#define CROARING_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) - leaf_t *leaf = (leaf_t *)it.value; - if (max_compare_result == 0) { - // We're at the max high key, add only the range up to the low - // 16 bits of max. - cardinality += - container_rank(leaf->container, leaf->typecode, max_low16); - } else { - // We're not yet at the max high key, add the full container - // range. - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); - } - if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { - // We're at the min high key, remove the range up to the low 16 - // bits of min. - cardinality -= - container_rank(leaf->container, leaf->typecode, min_low16 - 1); - } - art_iterator_next(&it); - } - return cardinality; -} +#define CROARING_BITSET_ALIGNMENT 64 -bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { - return art_is_empty(&r->art); -} +#ifdef __cplusplus +using namespace ::roaring::internal; -uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - if (it.value == NULL) { - return UINT64_MAX; - } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_minimum(leaf->container, leaf->typecode)); -} +extern "C" { +namespace roaring { +namespace api { +#endif -uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); - if (it.value == NULL) { - return 0; - } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_maximum(leaf->container, leaf->typecode)); -} +// TODO: Copy on write. +// TODO: Error on failed allocation. -bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - bool has_run_container = false; - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint8_t new_typecode; - // We don't need to free the existing container if a new one was - // created, convert_run_optimize does that internally. - leaf->container = convert_run_optimize(leaf->container, leaf->typecode, - &new_typecode); - leaf->typecode = new_typecode; - has_run_container |= new_typecode == RUN_CONTAINER_TYPE; - art_iterator_next(&it); - } - return has_run_container; -} +typedef struct roaring64_bitmap_s { + art_t art; + uint8_t flags; + uint64_t first_free; + uint64_t capacity; + container_t **containers; +} roaring64_bitmap_t; -/** - * (For advanced users.) - * Collect statistics about the bitmap - */ -void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, - roaring64_statistics_t *stat) { - memset(stat, 0, sizeof(*stat)); - stat->min_value = roaring64_bitmap_minimum(r); - stat->max_value = roaring64_bitmap_maximum(r); +// Leaf type of the ART used to keep the high 48 bits of each entry. +// Low 8 bits: typecode +// High 56 bits: container index +typedef roaring64_leaf_t leaf_t; - art_iterator_t it = art_init_iterator(&r->art, true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - stat->n_containers++; - uint8_t truetype = get_container_type(leaf->container, leaf->typecode); - uint32_t card = - container_get_cardinality(leaf->container, leaf->typecode); - uint32_t sbytes = - container_size_in_bytes(leaf->container, leaf->typecode); - stat->cardinality += card; - switch (truetype) { - case BITSET_CONTAINER_TYPE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - roaring_unreachable; - } - art_iterator_next(&it); - } +// Iterator struct to hold iteration state. +typedef struct roaring64_iterator_s { + const roaring64_bitmap_t *r; + art_iterator_t art_it; + roaring_container_iterator_t container_it; + uint64_t high48; // Key that art_it points to. + + uint64_t value; + bool has_value; + + // If has_value is false, then the iterator is saturated. This field + // indicates the direction of saturation. If true, there are no more values + // in the forward direction. If false, there are no more values in the + // backward direction. + bool saturated_forward; +} roaring64_iterator_t; + +static inline bool is_frozen64(const roaring64_bitmap_t *r) { + return r->flags & ROARING_FLAG_FROZEN; } -static bool roaring64_leaf_internal_validate(const art_val_t *val, - const char **reason) { - leaf_t *leaf = (leaf_t *)val; - return container_internal_validate(leaf->container, leaf->typecode, reason); +// Splits the given uint64 key into high 48 bit and low 16 bit components. +// Expects high48_out to be of length ART_KEY_BYTES. +static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { + uint64_t tmp = croaring_htobe64(key); + memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); + return (uint16_t)key; } -bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, - const char **reason) { - return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate); +// Recombines the high 48 bit and low 16 bit components into a uint64 key. +// Expects high48_out to be of length ART_KEY_BYTES. +static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { + uint64_t result = 0; + memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); + return croaring_be64toh(result) | low16; } -bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +static inline uint64_t minimum(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} - while (it1.value != NULL && it2.value != NULL) { - if (compare_high48(it1.key, it2.key) != 0) { - return false; - } - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_equals(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { - return false; - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - return it1.value == NULL && it2.value == NULL; +static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { + return (container_index << 8) | typecode; } -bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } - while (it1.value != NULL) { - bool it2_present = it2.value != NULL; +static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_is_subset(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { - return false; - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if (!it2_present || compare_result < 0) { - return false; - } else if (compare_result > 0) { - art_iterator_lower_bound(&it2, it1.key); - } - } - return true; +static inline container_t *get_container(const roaring64_bitmap_t *r, + leaf_t leaf) { + return r->containers[get_index(leaf)]; } -bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - return roaring64_bitmap_get_cardinality(r1) < - roaring64_bitmap_get_cardinality(r2) && - roaring64_bitmap_is_subset(r1, r2); +// Replaces the container of `leaf` with the given container. Returns the +// modified leaf for convenience. +static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, + container_t *container, + uint8_t typecode) { + uint64_t index = get_index(*leaf); + r->containers[index] = container; + *leaf = create_leaf(index, typecode); + return *leaf; } -roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); - - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +/** + * Extends the array of container pointers. + */ +static void extend_containers(roaring64_bitmap_t *r) { + uint64_t size = r->first_free; + if (size < r->capacity) { + return; + } + uint64_t new_capacity; + if (r->capacity == 0) { + new_capacity = 2; + } else if (r->capacity < 1024) { + new_capacity = 2 * r->capacity; + } else { + new_capacity = 5 * r->capacity / 4; + } + uint64_t increase = new_capacity - r->capacity; + r->containers = (container_t **)roaring_realloc( + r->containers, new_capacity * sizeof(container_t *)); + memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); + r->capacity = new_capacity; +} - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> output it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - } else { - container_free(result_leaf->container, result_leaf->typecode); - free_leaf(result_leaf); - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); - } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); +static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { + for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { + if (r->containers[i] == NULL) { + return i; } } - return result; + return r->capacity; } -uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t result = 0; +static uint64_t allocate_index(roaring64_bitmap_t *r) { + uint64_t first_free = r->first_free; + if (first_free == r->capacity) { + extend_containers(r); + } + r->first_free = next_free_container_idx(r); + return first_free; +} - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, + uint8_t typecode) { + uint64_t index = allocate_index(r); + r->containers[index] = container; + return create_leaf(index, typecode); +} - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result += - container_and_cardinality(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); - } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); - } +static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { + uint64_t index = get_index(leaf); + r->containers[index] = NULL; + if (index < r->first_free) { + r->first_free = index; } - return result; } -// Inplace and (modifies its first argument). -void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - if (r1 == r2) { - return; - } - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +// Copies the container referenced by `leaf` from `r1` to `r2`. +static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, leaf_t leaf) { + uint8_t typecode = get_typecode(leaf); + // get_copy_of_container modifies the typecode passed in. + container_t *container = get_copy_of_container( + get_container(r1, leaf), &typecode, /*copy_on_write=*/false); + return add_container(r2, container, typecode); +} - while (it1.value != NULL) { - // Cases: - // 1. !it2_present -> erase it1 - // 2. it2_present - // a. it1 < it2 -> erase it1 - // b. it1 == it2 -> output it1 & it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2a: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; +static inline int compare_high48(art_key_chunk_t key1[], + art_key_chunk_t key2[]) { + return art_compare_keys(key1, key2); +} - // We do the computation "in place" only when c1 is not a - // shared container. Rationale: using a shared container - // safely with in place computation would require making a - // copy and then doing the computation in place which is - // likely less efficient than avoiding in place entirely and - // always generating a new container. - uint8_t typecode2; - container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } else { - container2 = container_iand( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } +static inline bool roaring64_iterator_init_at_leaf_first( + roaring64_iterator_t *it) { + it->high48 = combine_key(it->art_it.key, 0); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = 0; + it->container_it = container_init_iterator(get_container(it->r, leaf), + get_typecode(leaf), &low16); + it->value = it->high48 | low16; + return (it->has_value = true); +} - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; - } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); - } else { - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); - } - } +static inline bool roaring64_iterator_init_at_leaf_last( + roaring64_iterator_t *it) { + it->high48 = combine_key(it->art_it.key, 0); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = 0; + it->container_it = container_init_iterator_last(get_container(it->r, leaf), + get_typecode(leaf), &low16); + it->value = it->high48 | low16; + return (it->has_value = true); +} - if (!it2_present || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); - assert(leaf != NULL); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_lower_bound(&it2, it1.key); +static inline roaring64_iterator_t *roaring64_iterator_init_at( + const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { + it->r = r; + it->art_it = art_init_iterator((art_t *)&r->art, first); + it->has_value = it->art_it.value != NULL; + if (it->has_value) { + if (first) { + roaring64_iterator_init_at_leaf_first(it); + } else { + roaring64_iterator_init_at_leaf_last(it); } + } else { + it->saturated_forward = first; } + return it; } -bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - bool intersect = false; - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +roaring64_bitmap_t *roaring64_bitmap_create(void) { + roaring64_bitmap_t *r = + (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); + art_init_cleared(&r->art); + r->flags = 0; + r->capacity = 0; + r->first_free = 0; + r->containers = NULL; + return r; +} - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - intersect |= container_intersect(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); +void roaring64_bitmap_free(roaring64_bitmap_t *r) { + if (!r) { + return; + } + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + if (is_frozen64(r)) { + // Only free the container itself, not the buffer-backed contents + // within. + roaring_free(get_container(r, leaf)); } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); + container_free(get_container(r, leaf), get_typecode(leaf)); } + art_iterator_next(&it); } - return intersect; + if (!is_frozen64(r)) { + art_free(&r->art); + } + roaring_free(r->containers); + roaring_free(r); } -bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return false; - } - roaring64_iterator_t it; - roaring64_iterator_init_at(r, &it, /*first=*/true); - if (!roaring64_iterator_move_equalorlarger(&it, min)) { - return false; +roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t result_typecode = get_typecode(leaf); + container_t *result_container = get_copy_of_container( + get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t)result_leaf); + art_iterator_next(&it); } - return roaring64_iterator_has_value(&it) && - roaring64_iterator_value(&it) < max; + return result; } -double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return (double)inter / (double)(c1 + c2 - inter); +/** + * Steal the containers from a 32-bit bitmap and insert them into a 64-bit + * bitmap (with an offset) + * + * After calling this function, the original bitmap will be empty, and the + * returned bitmap will contain all the values from the original bitmap. + */ +static void move_from_roaring32_offset(roaring64_bitmap_t *dst, + roaring_bitmap_t *src, + uint32_t high_bits) { + uint64_t key_base = ((uint64_t)high_bits) << 32; + uint32_t r32_size = ra_get_size(&src->high_low_container); + for (uint32_t i = 0; i < r32_size; ++i) { + uint16_t key = ra_get_key_at_index(&src->high_low_container, i); + uint8_t typecode; + container_t *container = ra_get_container_at_index( + &src->high_low_container, (uint16_t)i, &typecode); + + uint8_t high48[ART_KEY_BYTES]; + uint64_t high48_bits = key_base | ((uint64_t)key << 16); + split_key(high48_bits, high48); + leaf_t leaf = add_container(dst, container, typecode); + art_insert(&dst->art, high48, (art_val_t)leaf); + } + // We stole all the containers, so leave behind a size of zero + src->high_low_container.size = 0; } -roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { +roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32( + roaring_bitmap_t *bitmap32) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + move_from_roaring32_offset(result, bitmap32, 0); - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + return result; +} - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. !it1_present && it2_present -> output it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 | it2, it1++, it2++ - // c. it1 > it2 -> output it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_or( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - art_iterator_next(&it1); - art_iterator_next(&it2); +roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, + uint64_t step) { + if (step == 0 || max <= min) { + return NULL; + } + roaring64_bitmap_t *r = roaring64_bitmap_create(); + if (step >= (1 << 16)) { + // Only one value per container. + for (uint64_t value = min; value < max; value += step) { + roaring64_bitmap_add(r, value); + if (value > UINT64_MAX - step) { + break; } } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); - art_iterator_next(&it2); - } + return r; } - return result; + do { + uint64_t high_bits = min & 0xFFFFFFFFFFFF0000; + uint16_t container_min = min & 0xFFFF; + uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16); + + uint8_t typecode; + container_t *container = container_from_range( + &typecode, container_min, container_max, (uint16_t)step); + + uint8_t high48[ART_KEY_BYTES]; + split_key(min, high48); + leaf_t leaf = add_container(r, container, typecode); + art_insert(&r->art, high48, (art_val_t)leaf); + + uint64_t gap = container_max - container_min + step - 1; + uint64_t increment = gap - (gap % step); + if (min > UINT64_MAX - increment) { + break; + } + min += increment; + } while (min < max); + return r; } -uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 + c2 - inter; +roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, + const uint64_t *vals) { + roaring64_bitmap_t *r = roaring64_bitmap_create(); + roaring64_bitmap_add_many(r, n_args, vals); + return r; } -void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - if (r1 == r2) { - return; +static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { + if (leaf != NULL) { + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); + } + return leaf; + } else { + array_container_t *ac = array_container_create(); + uint8_t typecode; + container_t *container = + container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); + assert(ac == container); + leaf_t new_leaf = add_container(r, container, typecode); + return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); } - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +} - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; +void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + containerptr_roaring64_bitmap_add(r, high48, low16, leaf); +} - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. !it1_present && it2_present -> add it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 | it2, it1++, it2++ - // c. it1 > it2 -> add it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - uint8_t typecode2; - container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_or(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode, - &typecode2); - } else { - container2 = container_ior( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); - art_iterator_next(&it2); +bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + + int old_cardinality = 0; + if (leaf != NULL) { + old_cardinality = container_get_cardinality(get_container(r, *leaf), + get_typecode(*leaf)); + } + leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); + int new_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + return old_cardinality != new_cardinality; +} + +void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = context->leaf; + if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add(container1, low16, typecode1, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf = (leaf_t *)art_find(&r->art, high48); + context->leaf = + containerptr_roaring64_bitmap_add(r, high48, low16, leaf); + memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } -roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); +void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_add_bulk(r, &context, *current_val); + } +} - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf != NULL) { + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add_range(container1, typecode1, min, max, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); + } + return; + } + uint8_t typecode; + // container_add_range is inclusive, but `container_range_of_ones` is + // exclusive. + container_t *container = container_range_of_ones(min, max + 1, &typecode); + leaf_t new_leaf = add_container(r, container, typecode); + art_insert(art, high48, (art_val_t)new_leaf); +} - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; +void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return; + } + roaring64_bitmap_add_range_closed(r, min, max - 1); +} - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. !it1_present && it2_present -> output it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 ^ it2, it1++, it2++ - // c. it1 > it2 -> output it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); - art_iterator_next(&it2); - } +void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; + } + + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only populate range within one container. + add_range_closed_at(r, art, min_high48, min_low16, max_low16); + return; } - return result; + + // Populate a range across containers. Fill intermediate containers + // entirely. + add_range_closed_at(r, art, min_high48, min_low16, 0xffff); + uint64_t min_high_bits = min >> 16; + uint64_t max_high_bits = max >> 16; + for (uint64_t current = min_high_bits + 1; current < max_high_bits; + ++current) { + uint8_t current_high48[ART_KEY_BYTES]; + split_key(current << 16, current_high48); + add_range_closed_at(r, art, current_high48, 0, 0xffff); + } + add_range_closed_at(r, art, max_high48, 0, max_low16); } -uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 + c2 - 2 * inter; +bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf != NULL) { + return container_contains(get_container(r, *leaf), low16, + get_typecode(*leaf)); + } + return false; } -void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - assert(r1 != r2); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); +bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return true; + } - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. !it1_present && it2_present -> add it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 ^ it2, it1++, it2++ - // c. it1 > it2 -> add it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; - uint8_t typecode2; - container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - if (container2 != container1) { - // We only free when doing container_xor, not - // container_ixor, as ixor frees the original - // internally. - container_free(container1, typecode1); - } - } else { - container2 = container_ixor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } - leaf1->container = container2; - leaf1->typecode = typecode2; + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + if (it.value == NULL || combine_key(it.key, 0) > min) { + return false; + } + uint64_t prev_high48_bits = min & 0xFFFFFFFFFFFF0000; + while (it.value != NULL) { + uint64_t current_high48_bits = combine_key(it.key, 0); + if (current_high48_bits > max_high48_bits) { + // We've passed the end of the range with all containers containing + // the range. + return true; + } + if (current_high48_bits - prev_high48_bits > 0x10000) { + // There is a gap in the iterator that falls in the range. + return false; + } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); - } else { - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); - } + leaf_t leaf = (leaf_t)*it.value; + uint32_t container_min = 0; + if (compare_high48(it.key, min_high48) == 0) { + container_min = min_low16; } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - if (it1_present) { - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); - art_iterator_next(&it1); - } else { - art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); + uint32_t container_max = 0xFFFF + 1; // Exclusive + if (compare_high48(it.key, max_high48) == 0) { + container_max = max_low16; + } + + // For the first and last containers we use container_contains_range, + // for the intermediate containers we can use container_is_full. + if (container_min == 0 && container_max == 0xFFFF + 1) { + if (!container_is_full(get_container(r, leaf), + get_typecode(leaf))) { + return false; } - art_iterator_next(&it2); + } else if (!container_contains_range(get_container(r, leaf), + container_min, container_max, + get_typecode(leaf))) { + return false; } + prev_high48_bits = current_high48_bits; + art_iterator_next(&it); } + return prev_high48_bits == max_high48_bits; } -roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); +bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + if (context->leaf == NULL || + art_compare_keys(context->high_bytes, high48) != 0) { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf == NULL) { + return false; + } + context->leaf = leaf; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); + } + return container_contains(get_container(r, *context->leaf), low16, + get_typecode(*context->leaf)); +} - while (it1.value != NULL) { - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 - it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2b: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); - } - art_iterator_next(&it1); - art_iterator_next(&it2); +bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, + uint64_t *element) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t start_rank = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint64_t cardinality = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + if (start_rank + cardinality > rank) { + uint32_t uint32_start = 0; + uint32_t uint32_rank = rank - start_rank; + uint32_t uint32_element = 0; + if (container_select(get_container(r, leaf), get_typecode(leaf), + &uint32_start, uint32_rank, &uint32_element)) { + *element = combine_key(it.key, (uint16_t)uint32_element); + return true; } + return false; } - if (!it2_present || compare_result < 0) { - // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); - art_iterator_next(&it1); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_next(&it2); - } + start_rank += cardinality; + art_iterator_next(&it); } - return result; + return false; } -uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 - inter; -} +uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); -void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t rank = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + rank += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + } else if (compare_result == 0) { + return rank + container_rank(get_container(r, leaf), + get_typecode(leaf), low16); + } else { + return rank; + } + art_iterator_next(&it); + } + return rank; +} - while (it1.value != NULL) { - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 - it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; - uint8_t typecode2; - container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - if (container2 != container1) { - // We only free when doing container_andnot, not - // container_iandnot, as iandnot frees the original - // internally. - container_free(container1, typecode1); - } - } else { - container2 = container_iandnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } - if (container2 != container1) { - leaf1->container = container2; - leaf1->typecode = typecode2; - } +bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, + uint64_t *out_index) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); - } else { - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t index = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + index += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + } else if (compare_result == 0) { + int index16 = container_get_index(get_container(r, leaf), + get_typecode(leaf), low16); + if (index16 < 0) { + return false; } + *out_index = index + index16; + return true; + } else { + return false; } - if (!it2_present || compare_result < 0) { - // Cases 1 and 2a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_next(&it2); - } + art_iterator_next(&it); } + return false; } -/** - * Flips the leaf at high48 in the range [min, max), returning a new leaf with a - * new container. If the high48 key is not found in the existing bitmap, a new - * container is created. Returns null if the negation results in an empty range. - */ -static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r, - uint8_t high48[], uint32_t min, - uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48); - container_t *container2; - uint8_t typecode2; - if (leaf1 == NULL) { - // No container at this key, create a full container. - container2 = container_range_of_ones(min, max, &typecode2); - } else if (min == 0 && max > 0xFFFF) { - // Flip whole container. - container2 = - container_not(leaf1->container, leaf1->typecode, &typecode2); - } else { - // Partially flip a container. - container2 = container_not_range(leaf1->container, leaf1->typecode, min, - max, &typecode2); - } - if (container_nonzero_cardinality(container2, typecode2)) { - return create_leaf(container2, typecode2); +// Returns true if a container was removed. +static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { + if (leaf == NULL) { + return false; } - container_free(container2, typecode2); - return NULL; -} -/** - * Flips the leaf at high48 in the range [min, max). If the high48 key is not - * found in the bitmap, a new container is created. Deletes the leaf and - * associated container if the negation results in an empty range. - */ -static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], - uint32_t min, uint32_t max) { - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - container_t *container2; + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; - if (leaf == NULL) { - // No container at this key, insert a full container. - container2 = container_range_of_ones(min, max, &typecode2); - art_insert(&r->art, high48, - (art_val_t *)create_leaf(container2, typecode2)); - return; + container_t *container2 = + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); } - - if (min == 0 && max > 0xFFFF) { - // Flip whole container. - container2 = - container_inot(leaf->container, leaf->typecode, &typecode2); - } else { - // Partially flip a container. - container2 = container_inot_range(leaf->container, leaf->typecode, min, - max, &typecode2); + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); + assert(erased); + (void)erased; + remove_container(r, *leaf); + return true; } + return false; +} - leaf->container = container2; - leaf->typecode = typecode2; +void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) { - art_erase(&r->art, high48); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); - } + leaf_t *leaf = (leaf_t *)art_find(art, high48); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); } -roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return roaring64_bitmap_copy(r); - } - return roaring64_bitmap_flip_closed(r, min, max - 1); -} +bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(art, high48); -roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, - uint64_t min, uint64_t max) { - if (min > max) { - return roaring64_bitmap_copy(r1); + if (leaf == NULL) { + return false; } - uint8_t min_high48_key[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48_key); - uint8_t max_high48_key[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48_key); - uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; - uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - - roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true); - - // Copy the containers before min unchanged. - while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; - container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); - art_iterator_next(&it); + int old_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { + return true; } + int new_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + return new_cardinality != old_cardinality; +} - // Flip the range (including non-existent containers!) between min and max. - for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; - high48_bits++) { - uint8_t current_high48_key[ART_KEY_BYTES]; - split_key(high48_bits << 16, current_high48_key); - - uint32_t min_container = 0; - if (high48_bits == min_high48_bits) { - min_container = min_low16; - } - uint32_t max_container = 0xFFFF + 1; // Exclusive range. - if (high48_bits == max_high48_bits) { - max_container = max_low16 + 1; // Exclusive. +void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode = get_typecode(*context->leaf); + container_t *container = get_container(r, *context->leaf); + uint8_t typecode2; + container_t *container2 = + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, context->leaf, container2, typecode2); } - - leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key, - min_container, max_container); - if (leaf != NULL) { - art_insert(&r2->art, current_high48_key, (art_val_t *)leaf); + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + leaf_t leaf; + bool erased = art_erase(art, high48, (art_val_t *)&leaf); + assert(erased); + (void)erased; + remove_container(r, leaf); } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(art, high48); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + context->leaf = leaf; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); } +} - // Copy the containers after max unchanged. - it = art_upper_bound(&r1->art, max_high48_key); - while (it.value != NULL) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; - container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); - art_iterator_next(&it); +void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_remove_bulk(r, &context, *current_val); } +} - return r2; +static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf == NULL) { + return; + } + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_remove_range(container, typecode, min, max, &typecode2); + if (container2 != container) { + container_free(container, typecode); + if (container2 != NULL) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(art, high48, NULL); + assert(erased); + (void)erased; + remove_container(r, *leaf); + } + } } -void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, +void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max) { if (min >= max) { return; } - roaring64_bitmap_flip_closed_inplace(r, min, max - 1); + roaring64_bitmap_remove_range_closed(r, min, max - 1); } -void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, +void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint64_t max) { if (min > max) { return; } - uint16_t min_low16 = (uint16_t)min; - uint16_t max_low16 = (uint16_t)max; - uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; - uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - // Flip the range (including non-existent containers!) between min and max. - for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; - high48_bits++) { - uint8_t current_high48_key[ART_KEY_BYTES]; - split_key(high48_bits << 16, current_high48_key); + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only remove a range within one container. + remove_range_closed_at(r, art, min_high48, min_low16, max_low16); + return; + } - uint32_t min_container = 0; - if (high48_bits == min_high48_bits) { - min_container = min_low16; - } - uint32_t max_container = 0xFFFF + 1; // Exclusive range. - if (high48_bits == max_high48_bits) { - max_container = max_low16 + 1; // Exclusive. - } + // Remove a range across containers. Remove intermediate containers + // entirely. + remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); - roaring64_flip_leaf_inplace(r, current_high48_key, min_container, - max_container); + art_iterator_t it = art_upper_bound(art, min_high48); + while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { + leaf_t leaf; + bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); + assert(erased); + (void)erased; + container_free(get_container(r, leaf), get_typecode(leaf)); + remove_container(r, leaf); } + remove_range_closed_at(r, art, max_high48, 0, max_low16); } -// Returns the number of distinct high 32-bit entries in the bitmap. -static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint64_t high32_count = 0; - uint32_t prev_high32 = 0; +void roaring64_bitmap_clear(roaring64_bitmap_t *r) { + roaring64_bitmap_remove_range_closed(r, 0, UINT64_MAX); +} + +uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t cardinality = 0; while (it.value != NULL) { - uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); - if (high32_count == 0 || prev_high32 != current_high32) { - high32_count++; - prev_high32 = current_high32; - } + leaf_t leaf = (leaf_t)*it.value; + cardinality += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } - return high32_count; + return cardinality; } -// Frees the (32-bit!) bitmap without freeing the containers. -static inline void roaring_bitmap_free_without_containers(roaring_bitmap_t *r) { - ra_clear_without_containers(&r->high_low_container); - roaring_free(r); +uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return 0; + } + // Convert to a closed range + // No underflow here: passing the above condition implies min < max, so + // there is a number less than max + return roaring64_bitmap_range_closed_cardinality(r, min, max - 1); } -size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - size_t size = 0; - - // Write as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t high32_count; - size += sizeof(high32_count); - - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint32_t prev_high32 = 0; - roaring_bitmap_t *bitmap32 = NULL; - - // Iterate through buckets ordered by increasing keys. - while (it.value != NULL) { - uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); - if (bitmap32 == NULL || prev_high32 != current_high32) { - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - size += sizeof(prev_high32); +uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min > max) { + return 0; + } - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size += roaring_bitmap_portable_size_in_bytes(bitmap32); - roaring_bitmap_free_without_containers(bitmap32); - } + uint64_t cardinality = 0; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); - // Start a new 32-bit bitmap with the current high 32 bits. - art_iterator_t it2 = it; - uint32_t containers_with_high32 = 0; - while (it2.value != NULL && (uint32_t)(combine_key(it2.key, 0) >> - 32) == current_high32) { - containers_with_high32++; - art_iterator_next(&it2); - } - bitmap32 = - roaring_bitmap_create_with_capacity(containers_with_high32); + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + while (it.value != NULL) { + int max_compare_result = compare_high48(it.key, max_high48); + if (max_compare_result > 0) { + // We're outside the range. + break; + } - prev_high32 = current_high32; + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + if (max_compare_result == 0) { + // We're at the max high key, add only the range up to the low + // 16 bits of max. + cardinality += container_rank(container, typecode, max_low16); + } else { + // We're not yet at the max high key, add the full container + // range. + cardinality += container_get_cardinality(container, typecode); + } + if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { + // We're at the min high key, remove the range up to the low 16 + // bits of min. + cardinality -= container_rank(container, typecode, min_low16 - 1); } - leaf_t *leaf = (leaf_t *)it.value; - ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), leaf->container, - leaf->typecode); art_iterator_next(&it); } + return cardinality; +} - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - size += sizeof(prev_high32); +bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { + return art_is_empty(&r->art); +} - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size += roaring_bitmap_portable_size_in_bytes(bitmap32); - roaring_bitmap_free_without_containers(bitmap32); +uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + if (it.value == NULL) { + return UINT64_MAX; } - - return size; + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); } -size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, - char *buf) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { +uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); + if (it.value == NULL) { return 0; } - const char *initial_buf = buf; - - // Write as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t high32_count = count_high32(r); - memcpy(buf, &high32_count, sizeof(high32_count)); - buf += sizeof(high32_count); + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); +} +bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - uint32_t prev_high32 = 0; - roaring_bitmap_t *bitmap32 = NULL; - - // Iterate through buckets ordered by increasing keys. + bool has_run_container = false; while (it.value != NULL) { - uint64_t current_high48 = combine_key(it.key, 0); - uint32_t current_high32 = (uint32_t)(current_high48 >> 32); - if (bitmap32 == NULL || prev_high32 != current_high32) { - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - memcpy(buf, &prev_high32, sizeof(prev_high32)); - buf += sizeof(prev_high32); + leaf_t *leaf = (leaf_t *)it.value; + uint8_t new_typecode; + // We don't need to free the existing container if a new one was + // created, convert_run_optimize does that internally. + container_t *new_container = convert_run_optimize( + get_container(r, *leaf), get_typecode(*leaf), &new_typecode); + replace_container(r, leaf, new_container, new_typecode); + has_run_container |= new_typecode == RUN_CONTAINER_TYPE; + art_iterator_next(&it); + } + return has_run_container; +} - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - buf += roaring_bitmap_portable_serialize(bitmap32, buf); - roaring_bitmap_free_without_containers(bitmap32); - } +static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { + uint64_t idx = get_index(*leaf); + if (idx < r->first_free) { + return; + } + r->containers[r->first_free] = get_container(r, *leaf); + r->containers[idx] = NULL; + *leaf = create_leaf(r->first_free, get_typecode(*leaf)); + r->first_free = next_free_container_idx(r); +} - // Start a new 32-bit bitmap with the current high 32 bits. - art_iterator_t it2 = it; - uint32_t containers_with_high32 = 0; - while (it2.value != NULL && - (uint32_t)combine_key(it2.key, 0) == current_high32) { - containers_with_high32++; - art_iterator_next(&it2); - } - bitmap32 = - roaring_bitmap_create_with_capacity(containers_with_high32); +static inline bool is_shrunken(const roaring64_bitmap_t *r) { + return art_is_shrunken(&r->art) && r->first_free == r->capacity; +} - prev_high32 = current_high32; - } +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { + size_t freed = art_shrink_to_fit(&r->art); + art_iterator_t it = art_init_iterator(&r->art, true); + while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; - ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), leaf->container, - leaf->typecode); + freed += container_shrink_to_fit(get_container(r, *leaf), + get_typecode(*leaf)); + move_to_shrink(r, leaf); art_iterator_next(&it); } + if (is_shrunken(r)) { + return freed; + } + uint64_t new_capacity = r->first_free; + if (new_capacity < r->capacity) { + r->containers = (container_t **)roaring_realloc( + r->containers, new_capacity * sizeof(container_t *)); + freed += (r->capacity - new_capacity) * sizeof(container_t *); + r->capacity = new_capacity; + } + return freed; +} - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - memcpy(buf, &prev_high32, sizeof(prev_high32)); - buf += sizeof(prev_high32); +/** + * (For advanced users.) + * Collect statistics about the bitmap + */ +void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, + roaring64_statistics_t *stat) { + memset(stat, 0, sizeof(*stat)); + stat->min_value = roaring64_bitmap_minimum(r); + stat->max_value = roaring64_bitmap_maximum(r); - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - buf += roaring_bitmap_portable_serialize(bitmap32, buf); - roaring_bitmap_free_without_containers(bitmap32); + art_iterator_t it = art_init_iterator((art_t *)&r->art, true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + stat->n_containers++; + uint8_t truetype = + get_container_type(get_container(r, leaf), get_typecode(leaf)); + uint32_t card = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + uint32_t sbytes = + container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); + stat->cardinality += card; + switch (truetype) { + case BITSET_CONTAINER_TYPE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + roaring_unreachable; + } + art_iterator_next(&it); } +} - return buf - initial_buf; +static bool roaring64_leaf_internal_validate(const art_val_t val, + const char **reason, + void *context) { + leaf_t leaf = (leaf_t)val; + roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; + return container_internal_validate(get_container(r, leaf), + get_typecode(leaf), reason); } -size_t roaring64_bitmap_portable_deserialize_size(const char *buf, - size_t maxbytes) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { - return 0; - } - size_t read_bytes = 0; +bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, + const char **reason) { + return art_internal_validate(&r->art, reason, + roaring64_leaf_internal_validate, (void *)r); +} - // Read as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t buckets; - if (read_bytes + sizeof(buckets) > maxbytes) { - return 0; - } - memcpy(&buckets, buf, sizeof(buckets)); - buf += sizeof(buckets); - read_bytes += sizeof(buckets); +bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - // Buckets should be 32 bits with 4 bits of zero padding. - if (buckets > UINT32_MAX) { - return 0; + while (it1.value != NULL && it2.value != NULL) { + if (compare_high48(it1.key, it2.key) != 0) { + return false; + } + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); } + return it1.value == NULL && it2.value == NULL; +} - // Iterate through buckets ordered by increasing keys. - for (uint64_t bucket = 0; bucket < buckets; ++bucket) { - // Read as uint32 the most significant 32 bits of the bucket. - uint32_t high32; - if (read_bytes + sizeof(high32) > maxbytes) { - return 0; +bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL) { + bool it2_present = it2.value != NULL; + + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_is_subset( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } } - buf += sizeof(high32); - read_bytes += sizeof(high32); + if (!it2_present || compare_result < 0) { + return false; + } else if (compare_result > 0) { + art_iterator_lower_bound(&it2, it1.key); + } + } + return true; +} - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. - size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( - buf, maxbytes - read_bytes); - if (bitmap32_size == 0) { - return 0; +bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + return roaring64_bitmap_get_cardinality(r1) < + roaring64_bitmap_get_cardinality(r2) && + roaring64_bitmap_is_subset(r1, r2); +} + +roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_and(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); } - buf += bitmap32_size; - read_bytes += bitmap32_size; } - return read_bytes; + return result; } -roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( - const char *buf, size_t maxbytes) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { - return NULL; - } - size_t read_bytes = 0; +uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t result = 0; - // Read as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t buckets; - if (read_bytes + sizeof(buckets) > maxbytes) { - return NULL; + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + result += container_and_cardinality( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } } - memcpy(&buckets, buf, sizeof(buckets)); - buf += sizeof(buckets); - read_bytes += sizeof(buckets); + return result; +} - // Buckets should be 32 bits with 4 bits of zero padding. - if (buckets > UINT32_MAX) { - return NULL; +// Inplace and (modifies its first argument). +void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { + return; } + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - roaring64_bitmap_t *r = roaring64_bitmap_create(); - // Iterate through buckets ordered by increasing keys. - for (uint64_t bucket = 0; bucket < buckets; ++bucket) { - // Read as uint32 the most significant 32 bits of the bucket. - uint32_t high32; - if (read_bytes + sizeof(high32) > maxbytes) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&high32, buf, sizeof(high32)); - buf += sizeof(high32); - read_bytes += sizeof(high32); + while (it1.value != NULL) { + // Cases: + // 1. !it2_present -> erase it1 + // 2. it2_present + // a. it1 < it2 -> erase it1 + // b. it1 == it2 -> output it1 & it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2a: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. - size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( - buf, maxbytes - read_bytes); - if (bitmap32_size == 0) { - roaring64_bitmap_free(r); - return NULL; - } + // We do the computation "in place" only when c1 is not a + // shared container. Rationale: using a shared container + // safely with in place computation would require making a + // copy and then doing the computation in place which is + // likely less efficient than avoiding in place entirely and + // always generating a new container. + uint8_t typecode = get_typecode(*leaf1); + container_t *container = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and(container, typecode, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } else { + container2 = container_iand( + container, typecode, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } - roaring_bitmap_t *bitmap32 = roaring_bitmap_portable_deserialize_safe( - buf, maxbytes - read_bytes); - if (bitmap32 == NULL) { - roaring64_bitmap_free(r); - return NULL; + if (container2 != container) { + container_free(container, typecode); + } + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + art_iterator_erase(&it1, NULL); + remove_container(r1, *leaf1); + } else { + if (container2 != container) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } } - buf += bitmap32_size; - read_bytes += bitmap32_size; - // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. - uint32_t r32_size = ra_get_size(&bitmap32->high_low_container); - for (size_t i = 0; i < r32_size; ++i) { - uint16_t key16 = - ra_get_key_at_index(&bitmap32->high_low_container, (uint16_t)i); - uint8_t typecode; - container_t *container = ra_get_container_at_index( - &bitmap32->high_low_container, (uint16_t)i, &typecode); - - uint64_t high48_bits = - (((uint64_t)high32) << 32) | (((uint64_t)key16) << 16); - uint8_t high48[ART_KEY_BYTES]; - split_key(high48_bits, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); + if (!it2_present || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t leaf; + bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); + assert(erased); + (void)erased; + container_free(get_container(r1, leaf), get_typecode(leaf)); + remove_container(r1, leaf); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_lower_bound(&it2, it1.key); } - roaring_bitmap_free_without_containers(bitmap32); } - return r; } -bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, - roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - while (it.value != NULL) { - uint64_t high48 = combine_key(it.key, 0); - uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; - uint32_t low32 = high48; - leaf_t *leaf = (leaf_t *)it.value; - if (!container_iterate64(leaf->container, leaf->typecode, low32, - iterator, high32, ptr)) { - return false; +bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + bool intersect = false; + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + intersect |= container_intersect( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); } - art_iterator_next(&it); } - return true; + return intersect; } -void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, - uint64_t *out) { - roaring64_iterator_t it; // gets initialized in the next line +bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return false; + } + roaring64_iterator_t it; roaring64_iterator_init_at(r, &it, /*first=*/true); - roaring64_iterator_read(&it, out, UINT64_MAX); -} - -roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) { - roaring64_iterator_t *it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - return roaring64_iterator_init_at(r, it, /*first=*/true); -} - -roaring64_iterator_t *roaring64_iterator_create_last( - const roaring64_bitmap_t *r) { - roaring64_iterator_t *it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - return roaring64_iterator_init_at(r, it, /*first=*/false); -} - -void roaring64_iterator_reinit(const roaring64_bitmap_t *r, - roaring64_iterator_t *it) { - roaring64_iterator_init_at(r, it, /*first=*/true); -} - -void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, - roaring64_iterator_t *it) { - roaring64_iterator_init_at(r, it, /*first=*/false); + if (!roaring64_iterator_move_equalorlarger(&it, min)) { + return false; + } + return roaring64_iterator_has_value(&it) && + roaring64_iterator_value(&it) < max; } -roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it) { - roaring64_iterator_t *new_it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - memcpy(new_it, it, sizeof(*it)); - return new_it; +double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return (double)inter / (double)(c1 + c2 - inter); } -void roaring64_iterator_free(roaring64_iterator_t *it) { roaring_free(it); } +roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); -bool roaring64_iterator_has_value(const roaring64_iterator_t *it) { - return it->has_value; -} + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); -uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) { - return it->value; -} + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; -bool roaring64_iterator_advance(roaring64_iterator_t *it) { - if (it->art_it.value == NULL) { - if (it->saturated_forward) { - return (it->has_value = false); + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 | it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_or(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + art_iterator_next(&it2); + } } - roaring64_iterator_init_at(it->parent, it, /*first=*/true); - return it->has_value; - } - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(leaf->container, leaf->typecode, - &it->container_it, &low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); - } - if (art_iterator_next(&it->art_it)) { - return roaring64_iterator_init_at_leaf_first(it); - } - it->saturated_forward = true; - return (it->has_value = false); -} - -bool roaring64_iterator_previous(roaring64_iterator_t *it) { - if (it->art_it.value == NULL) { - if (!it->saturated_forward) { - // Saturated backward. - return (it->has_value = false); + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); } - roaring64_iterator_init_at(it->parent, it, /*first=*/false); - return it->has_value; - } - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(leaf->container, leaf->typecode, - &it->container_it, &low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); } - if (art_iterator_prev(&it->art_it)) { - return roaring64_iterator_init_at_leaf_last(it); - } - it->saturated_forward = false; // Saturated backward. - return (it->has_value = false); + return result; } -bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, - uint64_t val) { - uint8_t val_high48[ART_KEY_BYTES]; - uint16_t val_low16 = split_key(val, val_high48); - if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { - // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a key - // equal or greater. - if (!art_iterator_lower_bound(&it->art_it, val_high48)) { - // Only smaller keys found. - it->saturated_forward = true; - return (it->has_value = false); - } - it->high48 = combine_key(it->art_it.key, 0); - // Fall through to the next if statement. - } +uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - inter; +} - if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found in - // this container. - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound(leaf->container, leaf->typecode, - &it->container_it, &low16, - val_low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); - } - // Only smaller entries in this container, move to the next. - if (!art_iterator_next(&it->art_it)) { - it->saturated_forward = true; - return (it->has_value = false); - } +void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { + return; } + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - // We're at a leaf with high bits greater than `val`, so the first entry in - // this container is our result. - return roaring64_iterator_init_at_leaf_first(it); -} + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; -uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, - uint64_t count) { - uint64_t consumed = 0; - while (it->has_value && consumed < count) { - uint32_t container_consumed; - leaf_t *leaf = (leaf_t *)it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - uint32_t container_count = UINT32_MAX; - if (count - consumed < (uint64_t)UINT32_MAX) { - container_count = count - consumed; - } - bool has_value = container_iterator_read_into_uint64( - leaf->container, leaf->typecode, &it->container_it, it->high48, buf, - container_count, &container_consumed, &low16); - consumed += container_consumed; - buf += container_consumed; - if (has_value) { - it->has_value = true; - it->value = it->high48 | low16; - assert(consumed == count); - return consumed; + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 | it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { + container2 = container_or(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } else { + container2 = container_ior(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r1, leaf1, container2, typecode2); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } } - it->has_value = art_iterator_next(&it->art_it); - if (it->has_value) { - roaring64_iterator_init_at_leaf_first(it); + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); } } - return consumed; } -#ifdef __cplusplus -} // extern "C" -} // namespace roaring -} // namespace api -#endif -/* end file src/roaring64.c */ -/* begin file src/roaring_array.c */ -#include -#include -#include -#include -#include -#include - - -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif +roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); -// Convention: [0,ra->size) all elements are initialized -// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); -extern inline int32_t ra_get_size(const roaring_array_t *ra); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; -extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, - uint8_t *typecode); + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_xor(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); + } + } + return result; +} -extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); +uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - 2 * inter; +} -extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, - uint16_t key, - container_t *c, - uint8_t typecode); +void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + assert(r1 != r2); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); -extern inline void ra_set_container_at_index(const roaring_array_t *ra, - int32_t i, container_t *c, - uint8_t typecode); + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // - // Note: not implemented using C's realloc(), because the memory layout is - // Struct-of-Arrays vs. Array-of-Structs: - // https://github.com/RoaringBitmap/CRoaring/issues/256 + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_xor(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + if (container2 != container1) { + // We only free when doing container_xor, not + // container_ixor, as ixor frees the original + // internally. + container_free(container1, typecode1); + } + } else { + container2 = container_ixor( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } - if (new_capacity == 0) { - roaring_free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = - new_capacity * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - void *bigalloc = roaring_malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - container_t **newcontainers = (container_t **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if (ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + (void)erased; + remove_container(r1, *leaf1); + } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + if (it1_present) { + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else { + art_insert(&r1->art, it2.key, (art_val_t)result_leaf); + } + art_iterator_next(&it2); + } } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - roaring_free(oldbigalloc); - return true; } -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - ra_init(new_ra); +roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); - // Containers hold 64Ki elements, so 64Ki containers is enough to hold - // `0x10000 * 0x10000` (all 2^32) elements - if (cap > 0x10000) { - cap = 0x10000; - } + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - if (cap > 0) { - void *bigalloc = roaring_malloc( - cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); - if (bigalloc == NULL) return false; - new_ra->containers = (container_t **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - // Narrowing is safe because of above check - new_ra->allocation_size = (int32_t)cap; + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = container_andnot( + get_container(r1, *leaf1), get_typecode(*leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); + } } - return true; + return result; } -int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; +uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 - inter; } -void ra_init(roaring_array_t *new_ra) { - if (!new_ra) { - return; - } - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; +void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - new_ra->allocation_size = 0; - new_ra->size = 0; - new_ra->flags = 0; -} + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_andnot( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + if (container2 != container1) { + // We only free when doing container_andnot, not + // container_iandnot, as iandnot frees the original + // internally. + container_free(container1, typecode1); + } + } else { + container2 = container_iandnot( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size - dest->size = 0; // <--- This is important. - return true; // output was just cleared, so they match - } - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } - } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(container_t *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + (void)erased; + remove_container(r1, *leaf1); + } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); } - ra_clear_without_containers(dest); - return false; + art_iterator_next(&it2); } } + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); + } } - return true; } -void ra_clear_containers(roaring_array_t *ra) { - for (int32_t i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); +/** + * Flips the leaf at high48 in the range [min, max), adding the result to + * `r2`. If the high48 key is not found in `r1`, a new container is created. + */ +static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); + uint8_t typecode2; + container_t *container2; + if (leaf1 == NULL) { + // No container at this key, create a full container. + container2 = container_range_of_ones(min, max, &typecode2); + } else if (min == 0 && max > 0xFFFF) { + // Flip whole container. + container2 = container_not(get_container(r1, *leaf1), + get_typecode(*leaf1), &typecode2); + } else { + // Partially flip a container. + container2 = + container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), + min, max, &typecode2); + } + if (container_nonzero_cardinality(container2, typecode2)) { + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, high48, (art_val_t)leaf2); + } else { + container_free(container2, typecode2); } } -void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); -} +/** + * Flips the leaf at high48 in the range [min, max). If the high48 key is + * not found in the bitmap, a new container is created. Deletes the leaf and + * associated container if the negation results in an empty range. + */ +static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + container_t *container2; + uint8_t typecode2; + if (leaf == NULL) { + // No container at this key, insert a full container. + container2 = container_range_of_ones(min, max, &typecode2); + leaf_t new_leaf = add_container(r, container2, typecode2); + art_insert(&r->art, high48, (art_val_t)new_leaf); + return; + } -void ra_clear_without_containers(roaring_array_t *ra) { - roaring_free( - ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; + if (min == 0 && max > 0xFFFF) { + // Flip whole container. + container2 = container_inot(get_container(r, *leaf), + get_typecode(*leaf), &typecode2); + } else { + // Partially flip a container. + container2 = container_inot_range( + get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); + } + + if (container_nonzero_cardinality(container2, typecode2)) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(&r->art, high48, NULL); + assert(erased); + (void)erased; + container_free(container2, typecode2); + remove_container(r, *leaf); + } } -void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); +roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return roaring64_bitmap_copy(r); + } + return roaring64_bitmap_flip_closed(r, min, max - 1); } -bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - const int32_t max_containers = 65536; - assert(desired_size <= max_containers); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > max_containers) { - new_capacity = max_containers; - } +roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, + uint64_t min, uint64_t max) { + if (min > max) { + return roaring64_bitmap_copy(r1); + } + uint8_t min_high48_key[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48_key); + uint8_t max_high48_key[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48_key); + uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; + uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; + + roaring64_bitmap_t *r2 = roaring64_bitmap_create(); + art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); - return realloc_array(ra, new_capacity); + // Copy the containers before min unchanged. + while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); + container_t *container2 = get_copy_of_container( + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); + art_iterator_next(&it); } - return true; -} -void ra_append(roaring_array_t *ra, uint16_t key, container_t *c, - uint8_t typecode) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - ra->keys[pos] = key; - ra->containers[pos] = c; - ra->typecodes[pos] = typecode; - ra->size++; -} + // Flip the range (including non-existent containers!) between min and + // max. + for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; + high48_bits++) { + uint8_t current_high48_key[ART_KEY_BYTES]; + split_key(high48_bits << 16, current_high48_key); -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; + uint32_t min_container = 0; + if (high48_bits == min_high48_bits) { + min_container = min_low16; + } + uint32_t max_container = 0xFFFF + 1; // Exclusive range. + if (high48_bits == max_high48_bits) { + max_container = max_low16 + 1; // Exclusive. + } - // old contents is junk that does not need freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; + roaring64_flip_leaf(r1, r2, current_high48_key, min_container, + max_container); } - ra->size++; -} -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - for (int32_t i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, (uint16_t)i, copy_on_write); + // Copy the containers after max unchanged. + it = art_upper_bound((art_t *)&r1->art, max_high48_key); + while (it.value != NULL) { + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); + container_t *container2 = get_copy_of_container( + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); + art_iterator_next(&it); } + + return r2; } -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; +void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return; } + roaring64_bitmap_flip_closed_inplace(r, min, max - 1); } -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); -} +void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; + } + uint16_t min_low16 = (uint16_t)min; + uint16_t max_low16 = (uint16_t)max; + uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; + uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); + // Flip the range (including non-existent containers!) between min and + // max. + for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; + high48_bits++) { + uint8_t current_high48_key[ART_KEY_BYTES]; + split_key(high48_bits << 16, current_high48_key); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; + uint32_t min_container = 0; + if (high48_bits == min_high48_bits) { + min_container = min_low16; + } + uint32_t max_container = 0xFFFF + 1; // Exclusive range. + if (high48_bits == max_high48_bits) { + max_container = max_low16 + 1; // Exclusive. + } - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; + roaring64_flip_leaf_inplace(r, current_high48_key, min_container, + max_container); } } -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; +// Returns the number of distinct high 32-bit entries in the bitmap. +static inline uint64_t count_high32(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t high32_count = 0; + uint32_t prev_high32 = 0; + while (it.value != NULL) { + uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); + if (high32_count == 0 || prev_high32 != current_high32) { + high32_count++; + prev_high32 = current_high32; } - ra->size++; + art_iterator_next(&it); } + return high32_count; } -container_t *ra_get_container(roaring_array_t *ra, uint16_t x, - uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; +// Frees the (32-bit!) bitmap without freeing the containers. +static inline void roaring_bitmap_free_without_containers(roaring_bitmap_t *r) { + ra_clear_without_containers(&r->high_low_container); + roaring_free(r); } -extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, - uint8_t *typecode); +size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + size_t size = 0; -extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, - uint16_t i); + // Write as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t high32_count; + size += sizeof(high32_count); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint32_t prev_high32 = 0; + roaring_bitmap_t *bitmap32 = NULL; -extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); + // Iterate through buckets ordered by increasing keys. + while (it.value != NULL) { + uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); + if (bitmap32 == NULL || prev_high32 != current_high32) { + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the + // bucket. + size += sizeof(prev_high32); -// everything skipped over is freed -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size += roaring_bitmap_portable_size_in_bytes(bitmap32); + roaring_bitmap_free_without_containers(bitmap32); + } + + // Start a new 32-bit bitmap with the current high 32 bits. + art_iterator_t it2 = it; + uint32_t containers_with_high32 = 0; + while (it2.value != NULL && (uint32_t)(combine_key(it2.key, 0) >> + 32) == current_high32) { + containers_with_high32++; + art_iterator_next(&it2); + } + bitmap32 = + roaring_bitmap_create_with_capacity(containers_with_high32); + + prev_high32 = current_high32; + } + leaf_t leaf = (leaf_t)*it.value; + ra_append(&bitmap32->high_low_container, + (uint16_t)(current_high32 >> 16), get_container(r, leaf), + get_typecode(leaf)); + art_iterator_next(&it); } - return pos; -} -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - container_t *c, uint8_t typecode) { - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(container_t *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; - ra->size++; -} + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the bucket. + size += sizeof(prev_high32); -// note: Java routine set things to 0, enabling GC. -// Java called it "resize" but it was always used to downsize. -// Allowing upsize would break the conventions about -// valid containers below ra->size. + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size += roaring_bitmap_portable_size_in_bytes(bitmap32); + roaring_bitmap_free_without_containers(bitmap32); + } -void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; + return size; } -void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(container_t *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; -} +size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, + char *buf) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { + return 0; + } + const char *initial_buf = buf; -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); -} + // Write as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t high32_count = count_high32(r); + memcpy(buf, &high32_count, sizeof(high32_count)); + buf += sizeof(high32_count); -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. In use it should be followed by a call to -// downsize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint32_t prev_high32 = 0; + roaring_bitmap_t *bitmap32 = NULL; - const int range = end - begin; + // Iterate through buckets ordered by increasing keys. + while (it.value != NULL) { + uint64_t current_high48 = combine_key(it.key, 0); + uint32_t current_high32 = (uint32_t)(current_high48 >> 32); + if (bitmap32 == NULL || prev_high32 != current_high32) { + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the + // bucket. + memcpy(buf, &prev_high32, sizeof(prev_high32)); + buf += sizeof(prev_high32); - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + buf += roaring_bitmap_portable_serialize(bitmap32, buf); + roaring_bitmap_free_without_containers(bitmap32); + } - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(container_t *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); -} + // Start a new 32-bit bitmap with the current high 32 bits. + art_iterator_t it2 = it; + uint32_t containers_with_high32 = 0; + while (it2.value != NULL && + (uint32_t)combine_key(it2.key, 0) == current_high32) { + containers_with_high32++; + art_iterator_next(&it2); + } + bitmap32 = + roaring_bitmap_create_with_capacity(containers_with_high32); -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); + prev_high32 = current_high32; + } + leaf_t leaf = (leaf_t)*it.value; + ra_append(&bitmap32->high_low_container, + (uint16_t)(current_high48 >> 16), get_container(r, leaf), + get_typecode(leaf)); + art_iterator_next(&it); } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(container_t *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - for (int32_t i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the bucket. + memcpy(buf, &prev_high32, sizeof(prev_high32)); + buf += sizeof(prev_high32); + + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + buf += roaring_bitmap_portable_serialize(bitmap32, buf); + roaring_bitmap_free_without_containers(bitmap32); } -} -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, - size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; + return buf - initial_buf; +} - size_t t_limit = 0; +size_t roaring64_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { + return 0; + } + size_t read_bytes = 0; - bool first = false; - size_t first_skip = 0; + // Read as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t buckets; + if (read_bytes + sizeof(buckets) > maxbytes) { + return 0; + } + memcpy(&buckets, buf, sizeof(buckets)); + buf += sizeof(buckets); + read_bytes += sizeof(buckets); - uint32_t *t_ans = NULL; - size_t cur_len = 0; + // Buckets should be 32 bits with 4 bits of zero padding. + if (buckets > UINT32_MAX) { + return 0; + } - for (int i = 0; i < ra->size; ++i) { - const container_t *c = - container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - t_limit = (const_CAST_bitset(c))->cardinality; - break; - case ARRAY_CONTAINER_TYPE: - t_limit = (const_CAST_array(c))->cardinality; - break; - case RUN_CONTAINER_TYPE: - t_limit = run_container_cardinality(const_CAST_run(c)); - break; + // Iterate through buckets ordered by increasing keys. + for (uint64_t bucket = 0; bucket < buckets; ++bucket) { + // Read as uint32 the most significant 32 bits of the bucket. + uint32_t high32; + if (read_bytes + sizeof(high32) > maxbytes) { + return 0; } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit) { - if (!first) { - // first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * - (first_skip + limit)); - if (t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)); - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len) { - uint32_t *append_ans = (uint32_t *)roaring_malloc( - sizeof(*append_ans) * (cur_len + t_limit)); - if (append_ans == NULL) { - if (t_ans != NULL) roaring_free(t_ans); - return false; - } - memset(append_ans, 0, - sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - roaring_free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_bitset(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_array(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_run(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; + buf += sizeof(high32); + read_bytes += sizeof(high32); + + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( + buf, maxbytes - read_bytes); + if (bitmap32_size == 0) { + return 0; } - ctr += t_limit; - if (dtr - first_skip >= limit) break; - } - if (t_ans != NULL) { - memcpy(ans, t_ans + first_skip, limit * sizeof(uint32_t)); - free(t_ans); + buf += bitmap32_size; + read_bytes += bitmap32_size; } - return true; + return read_bytes; } -bool ra_has_run_container(const roaring_array_t *ra) { - for (int32_t k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE) - return true; +roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( + const char *buf, size_t maxbytes) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { + return NULL; } - return false; -} + size_t read_bytes = 0; + + // Read as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t buckets; + if (read_bytes + sizeof(buckets) > maxbytes) { + return NULL; + } + memcpy(&buckets, buf, sizeof(buckets)); + buf += sizeof(buckets); + read_bytes += sizeof(buckets); + + // Buckets should be 32 bits with 4 bits of zero padding. + if (buckets > UINT32_MAX) { + return NULL; + } + + roaring64_bitmap_t *r = roaring64_bitmap_create(); + // Iterate through buckets ordered by increasing keys. + int64_t previous_high32 = -1; + for (uint64_t bucket = 0; bucket < buckets; ++bucket) { + // Read as uint32 the most significant 32 bits of the bucket. + uint32_t high32; + if (read_bytes + sizeof(high32) > maxbytes) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&high32, buf, sizeof(high32)); + buf += sizeof(high32); + read_bytes += sizeof(high32); + // High 32 bits must be strictly increasing. + if (high32 <= previous_high32) { + roaring64_bitmap_free(r); + return NULL; + } + previous_high32 = high32; + + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( + buf, maxbytes - read_bytes); + if (bitmap32_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } + + roaring_bitmap_t *bitmap32 = roaring_bitmap_portable_deserialize_safe( + buf, maxbytes - read_bytes); + if (bitmap32 == NULL) { + roaring64_bitmap_free(r); + return NULL; + } + buf += bitmap32_size; + read_bytes += bitmap32_size; -uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; + // While we don't attempt to validate much, we must ensure that there + // is no duplication in the high 48 bits - inserting into the ART + // assumes (or UB) no duplicate keys. The top 32 bits must be unique + // because we check for strict increasing values of high32, but we + // must also ensure the top 16 bits within each 32-bit bitmap are also + // at least unique (we ensure they're strictly increasing as well, + // which they must be for a _valid_ bitmap, since it's cheaper to check) + int32_t last_bitmap_key = -1; + for (int i = 0; i < bitmap32->high_low_container.size; i++) { + uint16_t key = bitmap32->high_low_container.keys[i]; + if (key <= last_bitmap_key) { + roaring_bitmap_free(bitmap32); + roaring64_bitmap_free(r); + return NULL; + } + last_bitmap_key = key; } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; + + // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. + move_from_roaring32_offset(r, bitmap32, high32); + roaring_bitmap_free(bitmap32); } + return r; } -size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); - - for (int32_t k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); +// Returns an "element count" for the given container. This has a different +// meaning for each container type, but the purpose is the minimal information +// required to serialize the container metadata. +static inline uint32_t container_get_element_count(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return ((bitset_container_t *)c)->cardinality; + } + case ARRAY_CONTAINER_TYPE: { + return ((array_container_t *)c)->cardinality; + } + case RUN_CONTAINER_TYPE: { + return ((run_container_t *)c)->n_runs; + } + default: { + assert(false); + roaring_unreachable; + return 0; + } } - return count; } -// This function is endian-sensitive. -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - for (int32_t i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } +static inline size_t container_get_frozen_size(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - roaring_free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; + case ARRAY_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(uint16_t); } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; - - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); - - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - for (int32_t k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k], - ra->typecodes[k]) - - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - for (int32_t k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + case RUN_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(rle16_t); + } + default: { + assert(false); + roaring_unreachable; + return 0; } } - for (int32_t k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; } -// Quickly checks whether there is a serialized bitmap at the pointer, -// not exceeding size "maxbytes" in bytes. This function does not allocate -// memory dynamically. -// -// This function returns 0 if and only if no valid bitmap is found. -// Otherwise, it returns how many bytes are occupied. -// -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t); // for cookie - if (bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; - } - int32_t size; +uint64_t align_size(uint64_t size, uint64_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if (bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1 << 16)) { +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { + if (!is_shrunken(r)) { return 0; } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if (bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; - } - bytestotal += size * 2 * sizeof(uint16_t); - if (bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if (bytestotal > maxbytes) return 0; - buf += size * 4; + // Flags. + uint64_t size = sizeof(r->flags); + // Container count. + size += sizeof(r->capacity); + // Container element counts. + size += r->capacity * sizeof(uint16_t); + // Total container sizes. + size += 3 * sizeof(uint64_t); + // ART (8 byte aligned). + size = align_size(size, 8); + size += art_size_in_bytes(&r->art); + + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + total_sizes[typecode] += + container_get_frozen_size(get_container(r, leaf), typecode); + art_iterator_next(&it); } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } + // Containers (aligned). + size = align_size(size, CROARING_BITSET_ALIGNMENT); + size += total_sizes[BITSET_CONTAINER_TYPE]; + size = align_size(size, alignof(rle16_t)); + size += total_sizes[ARRAY_CONTAINER_TYPE]; + size = align_size(size, alignof(uint16_t)); + size += total_sizes[RUN_CONTAINER_TYPE]; + // Padding to make overall size a multiple of required alignment. + size = align_size(size, CROARING_BITSET_ALIGNMENT); + return size; +} + +static inline void container_frozen_serialize(const container_t *container, + uint8_t typecode, + uint64_t **bitsets, + uint16_t **arrays, + rle16_t **runs) { + size_t size = container_get_frozen_size(container, typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)container; + memcpy(*bitsets, bitset->words, size); + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + break; } - if (isbitmap) { - size_t containersize = - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if (bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)container; + memcpy(*arrays, array->array, size); + *arrays += container_get_element_count(container, typecode); + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)container; + memcpy(*runs, run->runs, size); + *runs += container_get_element_count(container, typecode); + break; + } + default: { + assert(false); + roaring_unreachable; } } - return bytestotal; } -// This function populates answer from the content of buf (reading up to -// maxbytes bytes). The function returns false if a properly serialized bitmap -// cannot be found. If it returns true, readbytes is populated by how many bytes -// were read, we have that *readbytes <= maxbytes. -// -// This function is endian-sensitive. -bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, - const size_t maxbytes, size_t *readbytes) { - *readbytes = sizeof(int32_t); // for cookie - if (*readbytes > maxbytes) { - // Ran out of bytes while reading first 4 bytes. - return false; - } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - // "I failed to find one of the right cookies. - return false; - } - int32_t size; +static inline char *pad_align(char *buf, const char *initial_buf, + size_t alignment) { + uint64_t buf_size = buf - initial_buf; + uint64_t pad = align_size(buf_size, alignment) - buf_size; + memset(buf, 0, pad); + return buf + pad; +} - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if (*readbytes > maxbytes) { - // Ran out of bytes while reading second part of the cookie. - return false; - } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size < 0) { - // You cannot have a negative number of containers, the data must be - // corrupted. - return false; - } - if (size > (1 << 16)) { - // You cannot have so many containers, the data must be corrupted. - return false; +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf) { + if (buf == NULL) { + return 0; } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if (*readbytes > maxbytes) { // data is corrupted? - // Ran out of bytes while reading run bitmap. - return false; - } - bitmapOfRunContainers = buf; - buf += s; + if (!is_shrunken(r)) { + return 0; } - uint16_t *keyscards = (uint16_t *)buf; + const char *initial_buf = buf; - *readbytes += size * 2 * sizeof(uint16_t); - if (*readbytes > maxbytes) { - // Ran out of bytes while reading key-cardinality array. - return false; - } - buf += size * 2 * sizeof(uint16_t); + // Flags. + memcpy(buf, &r->flags, sizeof(r->flags)); + buf += sizeof(r->flags); - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - // Failed to allocate memory for roaring array. Bailing out. - return false; - } + // Container count. + memcpy(buf, &r->capacity, sizeof(r->capacity)); + buf += sizeof(r->capacity); - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k, sizeof(tmp)); - answer->keys[k] = tmp; + // Container element counts. + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + + uint32_t elem_count = container_get_element_count(container, typecode); + uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); + memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); + buf += sizeof(compressed_elem_count); + + total_sizes[typecode] += container_get_frozen_size(container, typecode); + art_iterator_next(&it); } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if (*readbytes > maxbytes) { // data is corrupted? - // Ran out of bytes while reading offsets. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // skipping the offsets - buf += size * 4; + // Total container sizes. + memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + + // ART. + buf = pad_align(buf, initial_buf, 8); + buf += art_serialize(&r->art, buf); + + // Containers (aligned). + // Runs before arrays as run elements are larger than array elements and + // smaller than bitset elements. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + uint64_t *bitsets = (uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(rle16_t)); + rle16_t *runs = (rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(uint16_t)); + uint16_t *arrays = (uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + + it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + container_frozen_serialize(container, typecode, &bitsets, &arrays, + &runs); + art_iterator_next(&it); } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { - // Running out of bytes while reading a bitset container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for a bitset container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if (*readbytes > maxbytes) { - // Running out of bytes while reading a run container (header). - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { // data is corrupted? - // Running out of bytes while reading a run container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // it is now safe to read - run_container_t *c = run_container_create(); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for a run container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { // data is corrupted? - // Running out of bytes while reading an array container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // it is now safe to read + // Padding to make overall size a multiple of required alignment. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + + return buf - initial_buf; +} + +static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, + const uint64_t **bitsets, + const uint16_t **arrays, + const rle16_t **runs) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *c = (bitset_container_t *)roaring_malloc( + sizeof(bitset_container_t)); + c->cardinality = elem_count; + c->words = (uint64_t *)*bitsets; + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + return (container_t *)c; + } + case ARRAY_CONTAINER_TYPE: { array_container_t *c = - array_container_create_given_capacity(thiscard); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for an array container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE; + (array_container_t *)roaring_malloc(sizeof(array_container_t)); + c->cardinality = elem_count; + c->capacity = elem_count; + c->array = (uint16_t *)*arrays; + *arrays += elem_count; + return (container_t *)c; + } + case RUN_CONTAINER_TYPE: { + run_container_t *c = + (run_container_t *)roaring_malloc(sizeof(run_container_t)); + c->n_runs = elem_count; + c->capacity = elem_count; + c->runs = (rle16_t *)*runs; + *runs += elem_count; + return (container_t *)c; + } + default: { + assert(false); + roaring_unreachable; + return NULL; } } - return true; } -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/roaring_array.c */ -/* begin file src/roaring_priority_queue.c */ +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes) { + if (buf == NULL) { + return NULL; + } + if ((uintptr_t)buf % CROARING_BITSET_ALIGNMENT != 0) { + return NULL; + } -#ifdef __cplusplus -using namespace ::roaring::internal; + roaring64_bitmap_t *r = roaring64_bitmap_create(); -extern "C" { -namespace roaring { -namespace api { -#endif + // Flags. + if (maxbytes < sizeof(r->flags)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->flags, buf, sizeof(r->flags)); + buf += sizeof(r->flags); + maxbytes -= sizeof(r->flags); + r->flags |= ROARING_FLAG_FROZEN; -struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; -}; + // Container count. + if (maxbytes < sizeof(r->capacity)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->capacity, buf, sizeof(r->capacity)); + buf += sizeof(r->capacity); + maxbytes -= sizeof(r->capacity); -typedef struct roaring_pq_element_s roaring_pq_element_t; + r->containers = + (container_t **)roaring_malloc(r->capacity * sizeof(container_t *)); -struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; -}; + // Container element counts. + if (maxbytes < r->capacity * sizeof(uint16_t)) { + roaring64_bitmap_free(r); + return NULL; + } + const char *elem_counts = buf; + buf += r->capacity * sizeof(uint16_t); + maxbytes -= r->capacity * sizeof(uint16_t); -typedef struct roaring_pq_s roaring_pq_t; + // Total container sizes. + uint64_t total_sizes[4]; + if (maxbytes < sizeof(uint64_t) * 3) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + + // ART (8 byte aligned). + buf = CROARING_ALIGN_BUF(buf, 8); + size_t art_size = art_frozen_view(buf, maxbytes, &r->art); + if (art_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } + buf += art_size; + maxbytes -= art_size; + + // Containers (aligned). + const char *before_containers = buf; + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + const uint64_t *bitsets = (const uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); + const rle16_t *runs = (const rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); + const uint16_t *arrays = (const uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + if (maxbytes < (uint64_t)(buf - before_containers)) { + roaring64_bitmap_free(r); + return NULL; + } + maxbytes -= buf - before_containers; -static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; -} + // Deserialize in ART iteration order. + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + for (size_t i = 0; it.value != NULL; ++i) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); -static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; + uint16_t compressed_elem_count; + memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), + sizeof(compressed_elem_count)); + uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; + + // The container index is unrelated to the iteration order. + uint64_t index = get_index(leaf); + r->containers[index] = container_frozen_view(typecode, elem_count, + &bitsets, &arrays, &runs); + + art_iterator_next(&it); } - pq->elements[i] = *t; -} -static void pq_free(roaring_pq_t *pq) { roaring_free(pq); } + // Padding to make overall size a multiple of required alignment. + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); -static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; - } - } - if (!compare(&bestc, &ai)) { - break; + return r; +} + +bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, + roaring_iterator64 iterator, void *ptr) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + uint64_t high48 = combine_key(it.key, 0); + uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; + uint32_t low32 = high48; + leaf_t leaf = (leaf_t)*it.value; + if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), + low32, iterator, high32, ptr)) { + return false; } - pq->elements[i] = bestc; - i = l; + art_iterator_next(&it); } - pq->elements[i] = ai; + return true; } -static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - size_t alloc_size = - sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; - roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); - answer->elements = (roaring_pq_element_t *)(answer + 1); - answer->size = length; - for (uint32_t i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); - } - for (int32_t i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); - } - return answer; +void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, + uint64_t *out) { + roaring64_iterator_t it; // gets initialized in the next line + roaring64_iterator_init_at(r, &it, /*first=*/true); + roaring64_iterator_read(&it, out, UINT64_MAX); +} + +roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) { + roaring64_iterator_t *it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + return roaring64_iterator_init_at(r, it, /*first=*/true); } -static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; +roaring64_iterator_t *roaring64_iterator_create_last( + const roaring64_bitmap_t *r) { + roaring64_iterator_t *it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + return roaring64_iterator_init_at(r, it, /*first=*/false); } -// this function consumes and frees the inputs -static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); - if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; - } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. +void roaring64_iterator_reinit(const roaring64_bitmap_t *r, + roaring64_iterator_t *it) { + roaring64_iterator_init_at(r, it, /*first=*/true); +} - ra_unshare_container_at_index(&x1->high_low_container, - (uint16_t)pos1); - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - assert(type1 != SHARED_CONTAINER_TYPE); +void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, + roaring64_iterator_t *it) { + roaring64_iterator_init_at(r, it, /*first=*/false); +} - ra_unshare_container_at_index(&x2->high_low_container, - (uint16_t)pos2); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - assert(type2 != SHARED_CONTAINER_TYPE); +roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it) { + roaring64_iterator_t *new_it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + memcpy(new_it, it, sizeof(*it)); + return new_it; +} - container_t *c; +void roaring64_iterator_free(roaring64_iterator_t *it) { roaring_free(it); } - if ((type2 == BITSET_CONTAINER_TYPE) && - (type1 != BITSET_CONTAINER_TYPE)) { - c = container_lazy_ior(c2, type2, c1, type1, &result_type); - container_free(c1, type1); - if (c != c2) { - container_free(c2, type2); - } - } else { - c = container_lazy_ior(c1, type1, c2, type2, &result_type); - container_free(c2, type2); - if (c != c1) { - container_free(c1, type1); - } - } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +bool roaring64_iterator_has_value(const roaring64_iterator_t *it) { + return it->has_value; +} - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); +uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) { + return it->value; +} - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +bool roaring64_iterator_advance(roaring64_iterator_t *it) { + if (it->art_it.value == NULL) { + if (it->saturated_forward) { + return (it->has_value = false); } + roaring64_iterator_init_at(it->r, it, /*first=*/true); + return it->has_value; } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - roaring_free(x1); - roaring_free(x2); - return answer; + if (art_iterator_next(&it->art_it)) { + return roaring64_iterator_init_at_leaf_first(it); + } + it->saturated_forward = true; + return (it->has_value = false); } -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); +bool roaring64_iterator_previous(roaring64_iterator_t *it) { + if (it->art_it.value == NULL) { + if (!it->saturated_forward) { + // Saturated backward. + return (it->has_value = false); + } + roaring64_iterator_init_at(it->r, it, /*first=*/false); + return it->has_value; } - if (number == 1) { - return roaring_bitmap_copy(x[0]); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); + if (art_iterator_prev(&it->art_it)) { + return roaring64_iterator_init_at_leaf_last(it); + } + it->saturated_forward = false; // Saturated backward. + return (it->has_value = false); +} - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); +bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, + uint64_t val) { + uint8_t val_high48[ART_KEY_BYTES]; + uint16_t val_low16 = split_key(val, val_high48); + if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { + // The ART iterator is before or after the high48 bits of `val` (or + // beyond the ART altogether), so we need to move to a leaf with a + // key equal or greater. + if (!art_iterator_lower_bound(&it->art_it, val_high48)) { + // Only smaller keys found. + it->saturated_forward = true; + return (it->has_value = false); + } + it->high48 = combine_key(it->art_it.key, 0); + // Fall through to the next if statement. + } - pq_add(pq, &x1); - } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; + if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { + // We're at equal high bits, check if a suitable value can be found + // in this container. + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_lower_bound( + get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16, val_low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); + } + // Only smaller entries in this container, move to the next. + if (!art_iterator_next(&it->art_it)) { + it->saturated_forward = true; + return (it->has_value = false); + } + } - pq_add(pq, &newelement); + // We're at a leaf with high bits greater than `val`, so the first entry + // in this container is our result. + return roaring64_iterator_init_at_leaf_first(it); +} + +uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, + uint64_t count) { + uint64_t consumed = 0; + while (it->has_value && consumed < count) { + uint32_t container_consumed; + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + uint32_t container_count = UINT32_MAX; + if (count - consumed < (uint64_t)UINT32_MAX) { + container_count = count - consumed; + } + bool has_value = container_iterator_read_into_uint64( + get_container(it->r, leaf), get_typecode(leaf), &it->container_it, + it->high48, buf, container_count, &container_consumed, &low16); + consumed += container_consumed; + buf += container_consumed; + if (has_value) { + it->has_value = true; + it->value = it->high48 | low16; + assert(consumed == count); + return consumed; + } + it->has_value = art_iterator_next(&it->art_it); + if (it->has_value) { + roaring64_iterator_init_at_leaf_first(it); } } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; + return consumed; } #ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace api { +} // extern "C" +} // namespace roaring +} // namespace api #endif -/* end file src/roaring_priority_queue.c */ +/* end file src/roaring64.c */ diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index faf8587..02aa6db 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-07-30T19:32:00Z +// Created by amalgamation.sh on 2025-06-05T04:01:50Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.1.1" +#define ROARING_VERSION "4.3.5" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 1 + ROARING_VERSION_MINOR = 3, + ROARING_VERSION_REVISION = 5 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -86,9 +86,10 @@ enum { #ifndef CROARING_INCLUDE_PORTABILITY_H_ #define CROARING_INCLUDE_PORTABILITY_H_ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif // _GNU_SOURCE +// Users who need _GNU_SOURCE should define it? +// #ifndef _GNU_SOURCE +// #define _GNU_SOURCE 1 +// #endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS @@ -118,21 +119,6 @@ enum { #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif -#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) -#undef _POSIX_C_SOURCE -#endif - -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) -#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) -#define _XOPEN_SOURCE 700 -#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) - -#ifdef __illumos__ -#define __EXTENSIONS__ -#endif - #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above @@ -655,6 +641,8 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { #if defined(__GNUC__) || defined(__clang__) #define CROARING_DEPRECATED __attribute__((deprecated)) +#elif defined(_MSC_VER) +#define CROARING_DEPRECATED __declspec(deprecated) #else #define CROARING_DEPRECATED #endif // defined(__GNUC__) || defined(__clang__) @@ -669,6 +657,12 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { { 0 } #endif +#if defined(__cplusplus) +#define CROARING_STATIC_ASSERT(x, y) static_assert(x, y) +#else +#define CROARING_STATIC_ASSERT(x, y) _Static_assert(x, y) +#endif + // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). @@ -965,7 +959,10 @@ inline bool bitset_get(const bitset_t *bitset, size_t i) { /* Count number of bits set. */ size_t bitset_count(const bitset_t *bitset); -/* Find the index of the first bit set. Or zero if the bitset is empty. */ +/* Returns true if no bit is set. */ +bool bitset_empty(const bitset_t *bitset); + +/* Find the index of the first bit set. Or SIZE_MAX if the bitset is empty. */ size_t bitset_minimum(const bitset_t *bitset); /* Find the index of the last bit set. Or zero if the bitset is empty. */ @@ -1067,7 +1064,8 @@ inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, return 0; // nothing more to iterate over } uint64_t w = bitset->array[x]; - w >>= (*startfrom & 63); + // unset low bits inside the word less than *startfrom + w &= ~((UINT64_C(1) << (*startfrom & 63)) - 1); size_t howmany = 0; size_t base = x << 6; while (howmany < capacity) { @@ -1188,12 +1186,14 @@ inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) { /** * Add all the values between min (included) and max (excluded) that are at a * distance k*step from min. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); /** * Creates a new bitmap from a pointer of uint32_t integers + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); @@ -1217,6 +1217,11 @@ inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) { } } +/** + * Return a copy of the bitmap with all values shifted by offset. + * The returned pointer may be NULL in case of errors. The caller is responsible + * for freeing the return bitmap. + */ roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset); /** @@ -1275,6 +1280,7 @@ CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); /** * Copies a bitmap (this does memory allocation). * The caller is responsible for memory management. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); @@ -1306,6 +1312,7 @@ void roaring_bitmap_printf(const roaring_bitmap_t *r); * bitmaps, two-by-two, it is best to start with the smallest bitmap. * You may also rely on roaring_bitmap_and_inplace to avoid creating * many temporary bitmaps. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1368,6 +1375,7 @@ void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1383,6 +1391,7 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, * Compute the union of 'number' bitmaps. * Caller is responsible for freeing the result. * See also `roaring_bitmap_or_many_heap()` + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **rs); @@ -1398,6 +1407,7 @@ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, /** * Computes the symmetric difference (xor) between two bitmaps * and returns new bitmap. The caller is responsible for memory management. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1411,6 +1421,7 @@ void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, /** * Compute the xor of 'number' bitmaps. * Caller is responsible for freeing the result. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, const roaring_bitmap_t **rs); @@ -1418,6 +1429,7 @@ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, /** * Computes the difference (andnot) between two bitmaps and returns new bitmap. * Caller is responsible for freeing the result. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1512,7 +1524,9 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min) return; + if (max <= min || min > (uint64_t)UINT32_MAX + 1) { + return; + } roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1532,7 +1546,9 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min) return; + if (max <= min || min > (uint64_t)UINT32_MAX + 1) { + return; + } roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1560,6 +1576,14 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Check whether a range of values from range_start (included) + * to range_end (included) is present + */ +bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end); + /** * Check if an items is present, using context from a previous insert or search * for speed optimization. @@ -1591,6 +1615,12 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Returns the number of elements in the range [range_start, range_end]. + */ +uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ @@ -1677,6 +1707,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -1689,6 +1723,8 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); @@ -1705,6 +1741,8 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); * The difference with `roaring_bitmap_deserialize()` is that this function * checks that the input buffer is a valid bitmap. If the buffer is too small, * NULL is returned. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes); @@ -1729,6 +1767,8 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); @@ -1740,7 +1780,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * https://github.com/RoaringBitmap/RoaringFormatSpec * * The function itself is safe in the sense that it will not cause buffer - * overflows. However, for correct operations, it is assumed that the bitmap + * overflows: it will not read beyond the scope of the provided buffer + * (buf,maxbytes). + * + * However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -1749,13 +1792,20 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * You may use roaring_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. You may also use other strategies to check for - * corrupted inputs (e.g., checksums). + * If the source is untrusted, you should call + * roaring_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. Only after calling roaring_bitmap_internal_validate + * is the bitmap considered safe for use. + * + * We also recommend that you use checksums to check that serialized data + * corresponds to the serialized bitmap. The CRoaring library does not provide + * checksumming. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); @@ -1779,6 +1829,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); @@ -1813,6 +1865,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -1847,6 +1903,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -1920,6 +1980,8 @@ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, * * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2, @@ -1955,6 +2017,8 @@ void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); * * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on * the result. + * + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1971,10 +2035,20 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, * Compute the negation of the bitmap in the interval [range_start, range_end). * The number of negated values is range_end - range_start. * Areas outside the range are passed through unchanged. + * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); +/** + * Compute the negation of the bitmap in the interval [range_start, range_end]. + * The number of negated values is range_end - range_start + 1. + * Areas outside the range are passed through unchanged. + * The returned pointer may be NULL in case of errors. + */ +roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end); /** * compute (in place) the negation of the roaring bitmap within a specified * interval: [range_start, range_end). The number of negated values is @@ -1984,6 +2058,16 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); +/** + * compute (in place) the negation of the roaring bitmap within a specified + * interval: [range_start, range_end]. The number of negated values is + * range_end - range_start + 1. + * Areas outside the range are passed through unchanged. + */ +void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *r1, + uint32_t range_start, + uint32_t range_end); + /** * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the roaring bitmap is strictly greater than rank, then this @@ -2255,17 +2339,21 @@ CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator( using namespace ::roaring::api; #endif #endif + +// roaring64 will include roaring.h, but we would +// prefer to avoid having our users include roaring64.h +// in addition to roaring.h. /* end file include/roaring/roaring.h */ /* begin file include/roaring/memory.h */ #ifndef INCLUDE_ROARING_MEMORY_H_ #define INCLUDE_ROARING_MEMORY_H_ +#include // for size_t + #ifdef __cplusplus extern "C" { #endif -#include // for size_t - typedef void* (*roaring_malloc_p)(size_t); typedef void* (*roaring_realloc_p)(void*, size_t); typedef void* (*roaring_calloc_p)(size_t, size_t); @@ -2313,7 +2401,7 @@ namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; -typedef struct roaring64_leaf_s roaring64_leaf_t; +typedef uint64_t roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** @@ -2335,12 +2423,14 @@ typedef struct roaring64_bulk_context_s { /** * Dynamically allocates a new bitmap (initially empty). * Client is responsible for calling `roaring64_bitmap_free()`. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_create(void); void roaring64_bitmap_free(roaring64_bitmap_t *r); /** * Returns a copy of a bitmap. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r); @@ -2389,9 +2479,18 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, &((const uint64_t[]){0, __VA_ARGS__})[1]) #endif +/** + * Create a new bitmap by moving containers from a 32 bit roaring bitmap. + * + * After calling this function, the original bitmap will be empty, and the + * returned bitmap will contain all the values from the original bitmap. + */ +roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32(roaring_bitmap_t *r); + /** * Create a new bitmap containing all the values in [min, max) that are at a * distance k*step from min. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint64_t step); @@ -2600,6 +2699,12 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); +/** + * Shrinks internal arrays to eliminate any unused capacity. Returns the number + * of bytes freed. + */ +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r); + /** * (For advanced users.) * Collect statistics about the bitmap @@ -2648,6 +2753,8 @@ bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary * bitmaps. + * + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2692,6 +2799,7 @@ double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for free-ing the result. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2711,6 +2819,7 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, /** * Computes the symmetric difference (xor) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2731,6 +2840,7 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, /** * Computes the difference (andnot) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2752,6 +2862,7 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, * Compute the negation of the bitmap in the interval [min, max). * The number of negated values is `max - min`. Areas outside the range are * passed through unchanged. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); @@ -2760,6 +2871,7 @@ roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, * Compute the negation of the bitmap in the interval [min, max]. * The number of negated values is `max - min + 1`. Areas outside the range are * passed through unchanged. + * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); @@ -2799,6 +2911,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf); @@ -2813,14 +2929,17 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** - * Read a bitmap from a serialized buffer safely (reading up to maxbytes). + * Read a bitmap from a serialized buffer (reading up to maxbytes). * In case of failure, NULL is returned. * * This is meant to be compatible with other languages * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations * * The function itself is safe in the sense that it will not cause buffer - * overflows. However, for correct operations, it is assumed that the bitmap + * overflows: it will not read beyond the scope of the provided buffer + * (buf,maxbytes). + * + * However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -2829,6 +2948,15 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * + * If the source is untrusted, you should call + * roaring64_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. Only after calling + * roaring64_bitmap_internal_validate is the bitmap considered safe for use. + * + * We also recommend that you use checksums to check that serialized data + * corresponds to the serialized bitmap. The CRoaring library does not provide + * checksumming. + * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. @@ -2836,6 +2964,53 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); +/** + * Returns the number of bytes required to serialize this bitmap in a "frozen" + * format. This is not compatible with any other serialization formats. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + */ +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r); + +/** + * Serializes the bitmap in a "frozen" format. The given buffer must be at least + * `roaring64_bitmap_frozen_size_in_bytes()` in size. Returns the number of + * bytes used for serialization. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + * + * The frozen format is optimized for speed of (de)serialization, as well as + * allowing the user to create a bitmap based on a memory mapped file, which is + * possible because the format mimics the memory layout of the bitmap. + * + * Because the format mimics the memory layout of the bitmap, the format is not + * fixed across releases of Roaring Bitmaps, and may change in future releases. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf); + +/** + * Creates a readonly bitmap that is a view of the given buffer. The buffer + * must be created with `roaring64_bitmap_frozen_serialize()`, and must be + * aligned by 64 bytes. + * + * Returns NULL if deserialization fails. + * + * The returned bitmap must only be used in a readonly manner. The bitmap must + * be freed using `roaring64_bitmap_free()` as normal. The backing buffer must + * only be freed after the bitmap. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes); + /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call. From 1cbd3c83518ccfd0bb90332e49236bd206230e40 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:03:42 +0200 Subject: [PATCH 02/11] Version v4.3.0 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 15288 ++++++++++++++++--------------- pyroaring/roaring.h | 52 +- 3 files changed, 7653 insertions(+), 7689 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index ae31b3f..007b74b 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.3.5" \ No newline at end of file +__croaring_version__ = "v4.3.0" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index a914df2..2e76a6d 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-06-05T04:01:50Z +// Created by amalgamation.sh on 2025-02-28T15:35:21Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -1494,7 +1494,7 @@ bool array_container_validate(const array_container_t *v, const char **reason); * Return the serialized size in bytes of a container having cardinality "card". */ static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { - return card * sizeof(uint16_t); + return card * 2 + 2; } /** @@ -9386,6 +9386,10 @@ static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], return art_to_ref(index, CROARING_ART_LEAF_TYPE); } +static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { + leaf->next_free = next_free; +} + static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); static art_node16_t *art_node16_create(art_t *art, @@ -9417,6 +9421,11 @@ static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], return node; } +static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} + static inline art_ref_t art_node4_find_child(const art_node4_t *node, art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { @@ -9619,6 +9628,11 @@ static art_node16_t *art_node16_create(art_t *art, return node; } +static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} + static inline art_ref_t art_node16_find_child(const art_node16_t *node, art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { @@ -9803,6 +9817,11 @@ static art_node48_t *art_node48_create(art_t *art, return node; } +static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} + static inline art_ref_t art_node48_find_child(const art_node48_t *node, art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; @@ -10006,6 +10025,11 @@ static art_node256_t *art_node256_create(art_t *art, return node; } +static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} + static inline art_ref_t art_node256_find_child(const art_node256_t *node, art_key_chunk_t key) { return node->children[key]; @@ -11433,14 +11457,14 @@ bool art_internal_validate(const art_t *art, const char **reason, return art_internal_validate_at(art, art->root, validator); } -CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node4_t), - "Serialization assumes node type alignment is equal"); -CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node16_t), - "Serialization assumes node type alignment is equal"); -CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node48_t), - "Serialization assumes node type alignment is equal"); -CROARING_STATIC_ASSERT(alignof(art_leaf_t) == alignof(art_node256_t), - "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node4_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node16_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node48_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), + "Serialization assumes node type alignment is equal"); size_t art_size_in_bytes(const art_t *art) { if (!art_is_shrunken(art)) { @@ -11512,8 +11536,8 @@ size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { if (maxbytes < sizeof(art->capacities)) { return 0; } - CROARING_STATIC_ASSERT(sizeof(art->first_free) == sizeof(art->capacities), - "first_free is read from capacities"); + _Static_assert(sizeof(art->first_free) == sizeof(art->capacities), + "first_free is read from capacities"); memcpy(art->first_free, buf, sizeof(art->capacities)); memcpy(art->capacities, buf, sizeof(art->capacities)); buf += sizeof(art->capacities); @@ -11548,1643 +11572,1643 @@ size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { } // namespace internal #endif /* end file src/art/art.c */ -/* begin file src/bitset_util.c */ -#include +/* begin file src/bitset.c */ +#include #include #include #include #include -#if CROARING_IS_X64 -#ifndef CROARING_COMPILER_SUPPORTS_AVX512 -#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." -#endif // CROARING_COMPILER_SUPPORTS_AVX512 -#endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wuninitialized" -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif #ifdef __cplusplus -using namespace ::roaring::internal; extern "C" { namespace roaring { -namespace api { +namespace internal { #endif -#if CROARING_IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif +extern inline void bitset_print(const bitset_t *b); +extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, + void *ptr); +extern inline size_t bitset_next_set_bits(const bitset_t *bitset, + size_t *buffer, size_t capacity, + size_t *startfrom); +extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag); +extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i); +extern inline void bitset_set(bitset_t *bitset, size_t i); +extern inline bool bitset_get(const bitset_t *bitset, size_t i); +extern inline size_t bitset_size_in_words(const bitset_t *bitset); +extern inline size_t bitset_size_in_bits(const bitset_t *bitset); +extern inline size_t bitset_size_in_bytes(const bitset_t *bitset); -#if CROARING_IS_X64 -ALIGNED(32) -static uint32_t vecDecodeTable[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; +/* Create a new bitset. Return NULL in case of failure. */ +bitset_t *bitset_create(void) { + bitset_t *bitset = NULL; + /* Allocate the bitset itself. */ + if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; + } + bitset->array = NULL; + bitset->arraysize = 0; + bitset->capacity = 0; + return bitset; +} -#endif // #if CROARING_IS_X64 +/* Create a new bitset able to contain size bits. Return NULL in case of + * failure. */ +bitset_t *bitset_create_with_capacity(size_t size) { + bitset_t *bitset = NULL; + /* Allocate the bitset itself. */ + if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; + } + bitset->arraysize = + (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); + bitset->capacity = bitset->arraysize; + if ((bitset->array = (uint64_t *)roaring_calloc( + bitset->arraysize, sizeof(uint64_t))) == NULL) { + roaring_free(bitset); + return NULL; + } + return bitset; +} -#if CROARING_IS_X64 -// same as vecDecodeTable but in 16 bits -ALIGNED(32) -static uint16_t vecDecodeTable_uint16[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; - -#endif - -#if CROARING_IS_X64 -#if CROARING_COMPILER_SUPPORTS_AVX512 -CROARING_TARGET_AVX512 -const uint8_t vbmi2_table[64] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; -size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, - uint32_t *vout, size_t outcapacity, - uint32_t base) { - uint32_t *out = (uint32_t *)vout; - uint32_t *initout = out; - uint32_t *safeout = out + outcapacity; - __m512i base_v = _mm512_set1_epi32(base); - __m512i index_table = _mm512_loadu_si512(vbmi2_table); - size_t i = 0; - - for (; (i < length) && ((out + 64) < safeout); i += 1) { - uint64_t v = words[i]; - __m512i vec = _mm512_maskz_compress_epi8(v, index_table); - - uint8_t advance = (uint8_t)roaring_hamming(v); +/* Create a copy */ +bitset_t *bitset_copy(const bitset_t *bitset) { + bitset_t *copy = NULL; + /* Allocate the bitset itself. */ + if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { + return NULL; + } + memcpy(copy, bitset, sizeof(bitset_t)); + copy->capacity = copy->arraysize; + if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) * + bitset->arraysize)) == NULL) { + roaring_free(copy); + return NULL; + } + memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize); + return copy; +} - __m512i vbase = - _mm512_add_epi32(base_v, _mm512_set1_epi32((int)(i * 64))); - __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 0)); - __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 1)); - __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 2)); - __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 3)); +void bitset_clear(bitset_t *bitset) { + memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize); +} - r1 = _mm512_add_epi32(r1, vbase); - r2 = _mm512_add_epi32(r2, vbase); - r3 = _mm512_add_epi32(r3, vbase); - r4 = _mm512_add_epi32(r4, vbase); - _mm512_storeu_si512((__m512i *)out, r1); - _mm512_storeu_si512((__m512i *)(out + 16), r2); - _mm512_storeu_si512((__m512i *)(out + 32), r3); - _mm512_storeu_si512((__m512i *)(out + 48), r4); +void bitset_fill(bitset_t *bitset) { + memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize); +} - out += advance; +void bitset_shift_left(bitset_t *bitset, size_t s) { + size_t extra_words = s / 64; + int inword_shift = s % 64; + size_t as = bitset->arraysize; + if (inword_shift == 0) { + bitset_resize(bitset, as + extra_words, false); + // could be done with a memmove + for (size_t i = as + extra_words; i > extra_words; i--) { + bitset->array[i - 1] = bitset->array[i - 1 - extra_words]; + } + } else { + bitset_resize(bitset, as + extra_words + 1, true); + bitset->array[as + extra_words] = + bitset->array[as - 1] >> (64 - inword_shift); + for (size_t i = as + extra_words; i >= extra_words + 2; i--) { + bitset->array[i - 1] = + (bitset->array[i - 1 - extra_words] << inword_shift) | + (bitset->array[i - 2 - extra_words] >> (64 - inword_shift)); + } + bitset->array[extra_words] = bitset->array[0] << inword_shift; + } + for (size_t i = 0; i < extra_words; i++) { + bitset->array[i] = 0; } +} - base += i * 64; +void bitset_shift_right(bitset_t *bitset, size_t s) { + size_t extra_words = s / 64; + int inword_shift = s % 64; + size_t as = bitset->arraysize; + if (inword_shift == 0) { + // could be done with a memmove + for (size_t i = 0; i < as - extra_words; i++) { + bitset->array[i] = bitset->array[i + extra_words]; + } + bitset_resize(bitset, as - extra_words, false); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w &= (w - 1); + } else { + for (size_t i = 0; i + extra_words + 1 < as; i++) { + bitset->array[i] = + (bitset->array[i + extra_words] >> inword_shift) | + (bitset->array[i + extra_words + 1] << (64 - inword_shift)); } - base += 64; + bitset->array[as - extra_words - 1] = + (bitset->array[as - 1] >> inword_shift); + bitset_resize(bitset, as - extra_words, false); } - - return out - initout; } -// Reference: -// https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/ -size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, - size_t length, uint16_t *vout, - size_t capacity, uint16_t base) { - uint16_t *out = (uint16_t *)vout; - uint16_t *initout = out; - uint16_t *safeout = vout + capacity; - - __m512i base_v = _mm512_set1_epi16(base); - __m512i index_table = _mm512_loadu_si512(vbmi2_table); - size_t i = 0; - - for (; (i < length) && ((out + 64) < safeout); i++) { - uint64_t v = array[i]; - __m512i vec = _mm512_maskz_compress_epi8(v, index_table); - - uint8_t advance = (uint8_t)roaring_hamming(v); - - __m512i vbase = - _mm512_add_epi16(base_v, _mm512_set1_epi16((short)(i * 64))); - __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 0)); - __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 1)); - - r1 = _mm512_add_epi16(r1, vbase); - r2 = _mm512_add_epi16(r2, vbase); - - _mm512_storeu_si512((__m512i *)out, r1); - _mm512_storeu_si512((__m512i *)(out + 32), r2); - out += advance; +/* Free memory. */ +void bitset_free(bitset_t *bitset) { + if (bitset == NULL) { + return; } + roaring_free(bitset->array); + roaring_free(bitset); +} - base += i * 64; - - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = array[i]; - while ((w != 0) && (out < safeout)) { - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, sizeof(uint16_t)); - out++; - w &= (w - 1); +/* Resize the bitset so that it can support newarraysize * 64 bits. Return true + * in case of success, false for failure. */ +bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) { + if (newarraysize > SIZE_MAX / 64) { + return false; + } + size_t smallest = + newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize; + if (bitset->capacity < newarraysize) { + uint64_t *newarray; + size_t newcapacity = bitset->capacity; + if (newcapacity == 0) { + newcapacity = 1; } - base += 64; + while (newcapacity < newarraysize) { + newcapacity *= 2; + } + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { + return false; + } + bitset->capacity = newcapacity; + bitset->array = newarray; } + if (padwithzeroes && (newarraysize > smallest)) + memset(bitset->array + smallest, 0, + sizeof(uint64_t) * (newarraysize - smallest)); + bitset->arraysize = newarraysize; + return true; // success! +} - return out - initout; +size_t bitset_count(const bitset_t *bitset) { + size_t card = 0; + size_t k = 0; + for (; k + 7 < bitset->arraysize; k += 8) { + card += roaring_hamming(bitset->array[k]); + card += roaring_hamming(bitset->array[k + 1]); + card += roaring_hamming(bitset->array[k + 2]); + card += roaring_hamming(bitset->array[k + 3]); + card += roaring_hamming(bitset->array[k + 4]); + card += roaring_hamming(bitset->array[k + 5]); + card += roaring_hamming(bitset->array[k + 6]); + card += roaring_hamming(bitset->array[k + 7]); + } + for (; k + 3 < bitset->arraysize; k += 4) { + card += roaring_hamming(bitset->array[k]); + card += roaring_hamming(bitset->array[k + 1]); + card += roaring_hamming(bitset->array[k + 2]); + card += roaring_hamming(bitset->array[k + 3]); + } + for (; k < bitset->arraysize; k++) { + card += roaring_hamming(bitset->array[k]); + } + return card; } -CROARING_UNTARGET_AVX512 -#endif -CROARING_TARGET_AVX2 -size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, - uint32_t *out, size_t outcapacity, - uint32_t base) { - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; - } - } +bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + for (size_t k = 0; k < minlength; ++k) { + b1->array[k] |= b2->array[k]; } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w &= (w - 1); - } - base += 64; + if (b2->arraysize > b1->arraysize) { + size_t oldsize = b1->arraysize; + if (!bitset_resize(b1, b2->arraysize, false)) return false; + memcpy(b1->array + oldsize, b2->array + oldsize, + (b2->arraysize - oldsize) * sizeof(uint64_t)); } - return out - initout; + return true; } -CROARING_UNTARGET_AVX2 -#endif // CROARING_IS_X64 -size_t bitset_extract_setbits(const uint64_t *words, size_t length, - uint32_t *out, uint32_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - int r = - roaring_trailing_zeroes(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - w &= (w - 1); +bool bitset_empty(const bitset_t *bitset) { + for (size_t k = 0; k < bitset->arraysize; k++) { + if (bitset->array[k] != 0) { + return false; } - base += 64; } - return outpos; + return true; } -size_t bitset_extract_intersection_setbits_uint16( - const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2, - size_t length, uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words1[i] & words2[i]; - while (w != 0) { - int r = roaring_trailing_zeroes(w); - out[outpos++] = (uint16_t)(r + base); - w &= (w - 1); +size_t bitset_minimum(const bitset_t *bitset) { + for (size_t k = 0; k < bitset->arraysize; k++) { + uint64_t w = bitset->array[k]; + if (w != 0) { + return roaring_trailing_zeroes(w) + k * 64; } - base += 64; } - return outpos; + return SIZE_MAX; } -#if CROARING_IS_X64 -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function uses SSE decoding. - */ -CROARING_TARGET_AVX2 -size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = words[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_loadu_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_loadu_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; - } - } +bool bitset_grow(bitset_t *bitset, size_t newarraysize) { + if (newarraysize < bitset->arraysize) { + return false; } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = words[i]; - while ((w != 0) && (out < safeout)) { - int r = roaring_trailing_zeroes(w); - *out = (uint16_t)(r + base); - out++; - w &= (w - 1); + if (newarraysize > SIZE_MAX / 64) { + return false; + } + if (bitset->capacity < newarraysize) { + uint64_t *newarray; + size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> + roaring_leading_zeroes(newarraysize)) + + 1; + while (newcapacity < newarraysize) { + newcapacity *= 2; } - base += 64; + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { + return false; + } + bitset->capacity = newcapacity; + bitset->array = newarray; } - return out - initout; + memset(bitset->array + bitset->arraysize, 0, + sizeof(uint64_t) * (newarraysize - bitset->arraysize)); + bitset->arraysize = newarraysize; + return true; // success! } -CROARING_UNTARGET_AVX2 -#endif -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = words[i]; - while (w != 0) { - int r = roaring_trailing_zeroes(w); - out[outpos++] = (uint16_t)(r + base); - w &= (w - 1); +size_t bitset_maximum(const bitset_t *bitset) { + for (size_t k = bitset->arraysize; k > 0; k--) { + uint64_t w = bitset->array[k - 1]; + if (w != 0) { + return 63 - roaring_leading_zeroes(w) + (k - 1) * 64; } - base += 64; } - return outpos; + return 0; } -#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) +/* Returns true if bitsets share no common elements, false otherwise. + * + * Performs early-out if common element found. */ +bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; -static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, - uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); - return card; + for (size_t k = 0; k < minlength; k++) { + if ((b1->array[k] & b2->array[k]) != 0) return false; + } + return true; } -static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, - uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; +/* Returns true if bitsets contain at least 1 common element, false if they are + * disjoint. + * + * Performs early-out if common element found. */ +bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + for (size_t k = 0; k < minlength; k++) { + if ((b1->array[k] & b2->array[k]) != 0) return true; } + return false; +} - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); - list++; +/* Returns true if b has any bits set in or after b->array[starting_loc]. */ +static bool any_bits_set(const bitset_t *b, size_t starting_loc) { + if (starting_loc >= b->arraysize) { + return false; + } + for (size_t k = starting_loc; k < b->arraysize; k++) { + if (b->array[k] != 0) return true; } + return false; } -static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[words],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[words],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; +/* Returns true if b1 has all of b2's bits set. + * + * Performs early out if a bit is found in b2 that is not found in b1. */ +bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t min_size = b1->arraysize; + if (b1->arraysize > b2->arraysize) { + min_size = b2->arraysize; + } + for (size_t k = 0; k < min_size; k++) { + if ((b1->array[k] & b2->array[k]) != b2->array[k]) { + return false; + } + } + if (b2->arraysize > b1->arraysize) { + /* Need to check if b2 has any bits set beyond b1's array */ + return !any_bits_set(b2, b1->arraysize); + } + return true; } -static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; +size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t answer = 0; + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k + 3 < minlength; k += 4) { + answer += roaring_hamming(b1->array[k] | b2->array[k]); + answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]); + answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]); + answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]); } - return card; + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] | b2->array[k]); + } + if (b2->arraysize > b1->arraysize) { + // k is equal to b1->arraysize + for (; k + 3 < b2->arraysize; k += 4) { + answer += roaring_hamming(b2->array[k]); + answer += roaring_hamming(b2->array[k + 1]); + answer += roaring_hamming(b2->array[k + 2]); + answer += roaring_hamming(b2->array[k + 3]); + } + for (; k < b2->arraysize; ++k) { + answer += roaring_hamming(b2->array[k]); + } + } else { + // k is equal to b2->arraysize + for (; k + 3 < b1->arraysize; k += 4) { + answer += roaring_hamming(b1->array[k]); + answer += roaring_hamming(b1->array[k + 1]); + answer += roaring_hamming(b1->array[k + 2]); + answer += roaring_hamming(b1->array[k + 3]); + } + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); + } + } + return answer; } -static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, - uint64_t card, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; - } - return card; -} +void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] &= b2->array[k]; + } + for (; k < b1->arraysize; ++k) { + b1->array[k] = 0; // memset could, maybe, be a tiny bit faster + } +} + +size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t answer = 0; + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + for (size_t k = 0; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] & b2->array[k]); + } + return answer; +} + +void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] &= ~(b2->array[k]); + } +} + +size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + size_t answer = 0; + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] & ~(b2->array[k])); + } + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); + } + return answer; +} + +bool bitset_inplace_symmetric_difference( + bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + for (; k < minlength; ++k) { + b1->array[k] ^= b2->array[k]; + } + if (b2->arraysize > b1->arraysize) { + size_t oldsize = b1->arraysize; + if (!bitset_resize(b1, b2->arraysize, false)) return false; + memcpy(b1->array + oldsize, b2->array + oldsize, + (b2->arraysize - oldsize) * sizeof(uint64_t)); + } + return true; +} + +size_t bitset_symmetric_difference_count( + const bitset_t *CROARING_CBITSET_RESTRICT b1, + const bitset_t *CROARING_CBITSET_RESTRICT b2) { + size_t minlength = + b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + size_t k = 0; + size_t answer = 0; + for (; k < minlength; ++k) { + answer += roaring_hamming(b1->array[k] ^ b2->array[k]); + } + if (b2->arraysize > b1->arraysize) { + for (; k < b2->arraysize; ++k) { + answer += roaring_hamming(b2->array[k]); + } + } else { + for (; k < b1->arraysize; ++k) { + answer += roaring_hamming(b1->array[k]); + } + } + return answer; +} + +bool bitset_trim(bitset_t *bitset) { + size_t newsize = bitset->arraysize; + while (newsize > 0) { + if (bitset->array[newsize - 1] == 0) + newsize -= 1; + else + break; + } + if (bitset->capacity == newsize) return true; // nothing to do + uint64_t *newarray; + if ((newarray = (uint64_t *)roaring_realloc( + bitset->array, sizeof(uint64_t) * newsize)) == NULL) { + return false; + } + bitset->array = newarray; + bitset->capacity = newsize; + bitset->arraysize = newsize; + return true; +} + +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/bitset.c */ +/* begin file src/bitset_util.c */ +#include +#include +#include +#include +#include + + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif +#ifdef __cplusplus +using namespace ::roaring::internal; +extern "C" { +namespace roaring { +namespace api { +#endif + +#if CROARING_IS_X64 +static uint8_t lengthTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; +#endif + +#if CROARING_IS_X64 +ALIGNED(32) +static uint32_t vecDecodeTable[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif // #if CROARING_IS_X64 + +#if CROARING_IS_X64 +// same as vecDecodeTable but in 16 bits +ALIGNED(32) +static uint16_t vecDecodeTable_uint16[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif -static inline void _scalar_bitset_set_list(uint64_t *words, - const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; - } -} +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +const uint8_t vbmi2_table[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; +size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, + uint32_t *vout, size_t outcapacity, + uint32_t base) { + uint32_t *out = (uint32_t *)vout; + uint32_t *initout = out; + uint32_t *safeout = out + outcapacity; + __m512i base_v = _mm512_set1_epi32(base); + __m512i index_table = _mm512_loadu_si512(vbmi2_table); + size_t i = 0; -uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _asm_bitset_clear_list(words, card, list, length); - } else { - return _scalar_bitset_clear_list(words, card, list, length); - } -} + for (; (i < length) && ((out + 64) < safeout); i += 1) { + uint64_t v = words[i]; + __m512i vec = _mm512_maskz_compress_epi8(v, index_table); -uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - return _asm_bitset_set_list_withcard(words, card, list, length); - } else { - return _scalar_bitset_set_list_withcard(words, card, list, length); - } -} + uint8_t advance = (uint8_t)roaring_hamming(v); -void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { - _asm_bitset_set_list(words, list, length); - } else { - _scalar_bitset_set_list(words, list, length); - } -} -#else -uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - words[offset] = newload; - list++; - } - return card; -} + __m512i vbase = + _mm512_add_epi32(base_v, _mm512_set1_epi32((int)(i * 64))); + __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 0)); + __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 1)); + __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 2)); + __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec, 3)); -uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - words[offset] = newload; - list++; - } - return card; -} + r1 = _mm512_add_epi32(r1, vbase); + r2 = _mm512_add_epi32(r2, vbase); + r3 = _mm512_add_epi32(r3, vbase); + r4 = _mm512_add_epi32(r4, vbase); + _mm512_storeu_si512((__m512i *)out, r1); + _mm512_storeu_si512((__m512i *)(out + 16), r2); + _mm512_storeu_si512((__m512i *)(out + 32), r3); + _mm512_storeu_si512((__m512i *)(out + 48), r4); -void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load | (UINT64_C(1) << index); - words[offset] = newload; - list++; + out += advance; } -} - -#endif -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ + base += i * 64; -uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - words[offset] = newload; - list++; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w &= (w - 1); + } + base += 64; } - return card; -} -void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *list; - offset = pos >> 6; - index = pos % 64; - load = words[offset]; - newload = load ^ (UINT64_C(1) << index); - words[offset] = newload; - list++; - } + return out - initout; } -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace api { -#endif -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif -/* end file src/bitset_util.c */ -/* begin file src/bitset.c */ -#include -#include -#include -#include -#include +// Reference: +// https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/ +size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, + size_t length, uint16_t *vout, + size_t capacity, uint16_t base) { + uint16_t *out = (uint16_t *)vout; + uint16_t *initout = out; + uint16_t *safeout = vout + capacity; + + __m512i base_v = _mm512_set1_epi16(base); + __m512i index_table = _mm512_loadu_si512(vbmi2_table); + size_t i = 0; + for (; (i < length) && ((out + 64) < safeout); i++) { + uint64_t v = array[i]; + __m512i vec = _mm512_maskz_compress_epi8(v, index_table); -#ifdef __cplusplus -extern "C" { -namespace roaring { -namespace internal { -#endif + uint8_t advance = (uint8_t)roaring_hamming(v); -extern inline void bitset_print(const bitset_t *b); -extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, - void *ptr); -extern inline size_t bitset_next_set_bits(const bitset_t *bitset, - size_t *buffer, size_t capacity, - size_t *startfrom); -extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag); -extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i); -extern inline void bitset_set(bitset_t *bitset, size_t i); -extern inline bool bitset_get(const bitset_t *bitset, size_t i); -extern inline size_t bitset_size_in_words(const bitset_t *bitset); -extern inline size_t bitset_size_in_bits(const bitset_t *bitset); -extern inline size_t bitset_size_in_bytes(const bitset_t *bitset); + __m512i vbase = + _mm512_add_epi16(base_v, _mm512_set1_epi16((short)(i * 64))); + __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 0)); + __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec, 1)); + + r1 = _mm512_add_epi16(r1, vbase); + r2 = _mm512_add_epi16(r2, vbase); -/* Create a new bitset. Return NULL in case of failure. */ -bitset_t *bitset_create(void) { - bitset_t *bitset = NULL; - /* Allocate the bitset itself. */ - if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; + _mm512_storeu_si512((__m512i *)out, r1); + _mm512_storeu_si512((__m512i *)(out + 32), r2); + out += advance; } - bitset->array = NULL; - bitset->arraysize = 0; - bitset->capacity = 0; - return bitset; -} -/* Create a new bitset able to contain size bits. Return NULL in case of - * failure. */ -bitset_t *bitset_create_with_capacity(size_t size) { - bitset_t *bitset = NULL; - /* Allocate the bitset itself. */ - if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; - } - bitset->arraysize = - (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); - bitset->capacity = bitset->arraysize; - if ((bitset->array = (uint64_t *)roaring_calloc( - bitset->arraysize, sizeof(uint64_t))) == NULL) { - roaring_free(bitset); - return NULL; - } - return bitset; -} + base += i * 64; -/* Create a copy */ -bitset_t *bitset_copy(const bitset_t *bitset) { - bitset_t *copy = NULL; - /* Allocate the bitset itself. */ - if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { - return NULL; - } - memcpy(copy, bitset, sizeof(bitset_t)); - copy->capacity = copy->arraysize; - if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) * - bitset->arraysize)) == NULL) { - roaring_free(copy); - return NULL; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = array[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, sizeof(uint16_t)); + out++; + w &= (w - 1); + } + base += 64; } - memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize); - return copy; -} - -void bitset_clear(bitset_t *bitset) { - memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize); -} -void bitset_fill(bitset_t *bitset) { - memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize); + return out - initout; } +CROARING_UNTARGET_AVX512 +#endif -void bitset_shift_left(bitset_t *bitset, size_t s) { - size_t extra_words = s / 64; - int inword_shift = s % 64; - size_t as = bitset->arraysize; - if (inword_shift == 0) { - bitset_resize(bitset, as + extra_words, false); - // could be done with a memmove - for (size_t i = as + extra_words; i > extra_words; i--) { - bitset->array[i - 1] = bitset->array[i - 1 - extra_words]; - } - } else { - bitset_resize(bitset, as + extra_words + 1, true); - bitset->array[as + extra_words] = - bitset->array[as - 1] >> (64 - inword_shift); - for (size_t i = as + extra_words; i >= extra_words + 2; i--) { - bitset->array[i - 1] = - (bitset->array[i - 1 - extra_words] << inword_shift) | - (bitset->array[i - 2 - extra_words] >> (64 - inword_shift)); +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, + uint32_t *out, size_t outcapacity, + uint32_t base) { + uint32_t *initout = out; + __m256i baseVec = _mm256_set1_epi32(base - 1); + __m256i incVec = _mm256_set1_epi32(64); + __m256i add8 = _mm256_set1_epi32(8); + uint32_t *safeout = out + outcapacity; + size_t i = 0; + for (; (i < length) && (out + 64 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm256_add_epi32(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m256i vecA = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); + __m256i vecB = + _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm256_add_epi32(baseVec, vecA); + baseVec = _mm256_add_epi32(baseVec, add8); + vecB = _mm256_add_epi32(baseVec, vecB); + baseVec = _mm256_add_epi32(baseVec, add8); + _mm256_storeu_si256((__m256i *)out, vecA); + out += advanceA; + _mm256_storeu_si256((__m256i *)out, vecB); + out += advanceB; + } } - bitset->array[extra_words] = bitset->array[0] << inword_shift; } - for (size_t i = 0; i < extra_words; i++) { - bitset->array[i] = 0; + base += i * 64; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w &= (w - 1); + } + base += 64; } + return out - initout; } +CROARING_UNTARGET_AVX2 +#endif // CROARING_IS_X64 -void bitset_shift_right(bitset_t *bitset, size_t s) { - size_t extra_words = s / 64; - int inword_shift = s % 64; - size_t as = bitset->arraysize; - if (inword_shift == 0) { - // could be done with a memmove - for (size_t i = 0; i < as - extra_words; i++) { - bitset->array[i] = bitset->array[i + extra_words]; - } - bitset_resize(bitset, as - extra_words, false); - - } else { - for (size_t i = 0; i + extra_words + 1 < as; i++) { - bitset->array[i] = - (bitset->array[i + extra_words] >> inword_shift) | - (bitset->array[i + extra_words + 1] << (64 - inword_shift)); +size_t bitset_extract_setbits(const uint64_t *words, size_t length, + uint32_t *out, uint32_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + int r = + roaring_trailing_zeroes(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + w &= (w - 1); } - bitset->array[as - extra_words - 1] = - (bitset->array[as - 1] >> inword_shift); - bitset_resize(bitset, as - extra_words, false); + base += 64; } + return outpos; } -/* Free memory. */ -void bitset_free(bitset_t *bitset) { - if (bitset == NULL) { - return; +size_t bitset_extract_intersection_setbits_uint16( + const uint64_t *__restrict__ words1, const uint64_t *__restrict__ words2, + size_t length, uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words1[i] & words2[i]; + while (w != 0) { + int r = roaring_trailing_zeroes(w); + out[outpos++] = (uint16_t)(r + base); + w &= (w - 1); + } + base += 64; } - roaring_free(bitset->array); - roaring_free(bitset); + return outpos; } -/* Resize the bitset so that it can support newarraysize * 64 bits. Return true - * in case of success, false for failure. */ -bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) { - if (newarraysize > SIZE_MAX / 64) { - return false; - } - size_t smallest = - newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize; - if (bitset->capacity < newarraysize) { - uint64_t *newarray; - size_t newcapacity = bitset->capacity; - if (newcapacity == 0) { - newcapacity = 1; - } - while (newcapacity < newarraysize) { - newcapacity *= 2; +#if CROARING_IS_X64 +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function uses SSE decoding. + */ +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, + uint16_t *out, size_t outcapacity, + uint16_t base) { + uint16_t *initout = out; + __m128i baseVec = _mm_set1_epi16(base - 1); + __m128i incVec = _mm_set1_epi16(64); + __m128i add8 = _mm_set1_epi16(8); + uint16_t *safeout = out + outcapacity; + const int numberofbytes = 2; // process two bytes at a time + size_t i = 0; + for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { + uint64_t w = words[i]; + if (w == 0) { + baseVec = _mm_add_epi16(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m128i vecA = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteA]); + __m128i vecB = _mm_loadu_si128( + (const __m128i *)vecDecodeTable_uint16[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm_add_epi16(baseVec, vecA); + baseVec = _mm_add_epi16(baseVec, add8); + vecB = _mm_add_epi16(baseVec, vecB); + baseVec = _mm_add_epi16(baseVec, add8); + _mm_storeu_si128((__m128i *)out, vecA); + out += advanceA; + _mm_storeu_si128((__m128i *)out, vecB); + out += advanceB; + } } - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { - return false; + } + base += (uint16_t)(i * 64); + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = words[i]; + while ((w != 0) && (out < safeout)) { + int r = roaring_trailing_zeroes(w); + *out = (uint16_t)(r + base); + out++; + w &= (w - 1); } - bitset->capacity = newcapacity; - bitset->array = newarray; + base += 64; } - if (padwithzeroes && (newarraysize > smallest)) - memset(bitset->array + smallest, 0, - sizeof(uint64_t) * (newarraysize - smallest)); - bitset->arraysize = newarraysize; - return true; // success! + return out - initout; } +CROARING_UNTARGET_AVX2 +#endif -size_t bitset_count(const bitset_t *bitset) { - size_t card = 0; - size_t k = 0; - for (; k + 7 < bitset->arraysize; k += 8) { - card += roaring_hamming(bitset->array[k]); - card += roaring_hamming(bitset->array[k + 1]); - card += roaring_hamming(bitset->array[k + 2]); - card += roaring_hamming(bitset->array[k + 3]); - card += roaring_hamming(bitset->array[k + 4]); - card += roaring_hamming(bitset->array[k + 5]); - card += roaring_hamming(bitset->array[k + 6]); - card += roaring_hamming(bitset->array[k + 7]); - } - for (; k + 3 < bitset->arraysize; k += 4) { - card += roaring_hamming(bitset->array[k]); - card += roaring_hamming(bitset->array[k + 1]); - card += roaring_hamming(bitset->array[k + 2]); - card += roaring_hamming(bitset->array[k + 3]); - } - for (; k < bitset->arraysize; k++) { - card += roaring_hamming(bitset->array[k]); +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" (can be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, + uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = words[i]; + while (w != 0) { + int r = roaring_trailing_zeroes(w); + out[outpos++] = (uint16_t)(r + base); + w &= (w - 1); + } + base += 64; } - return card; + return outpos; } -bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - for (size_t k = 0; k < minlength; ++k) { - b1->array[k] |= b2->array[k]; - } - if (b2->arraysize > b1->arraysize) { - size_t oldsize = b1->arraysize; - if (!bitset_resize(b1, b2->arraysize, false)) return false; - memcpy(b1->array + oldsize, b2->array + oldsize, - (b2->arraysize - oldsize) * sizeof(uint64_t)); - } - return true; -} +#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) -bool bitset_empty(const bitset_t *bitset) { - for (size_t k = 0; k < bitset->arraysize; k++) { - if (bitset->array[k] != 0) { - return false; - } - } - return true; +static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, + uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // TODO: could unroll for performance, see bitset_set_list + // bts is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $-1, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); + return card; } -size_t bitset_minimum(const bitset_t *bitset) { - for (size_t k = 0; k < bitset->arraysize; k++) { - uint64_t w = bitset->array[k]; - if (w != 0) { - return roaring_trailing_zeroes(w) + k * 64; - } - } - return SIZE_MAX; -} +static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, + uint64_t length) { + uint64_t pos; + const uint16_t *end = list + length; -bool bitset_grow(bitset_t *bitset, size_t newarraysize) { - if (newarraysize < bitset->arraysize) { - return false; - } - if (newarraysize > SIZE_MAX / 64) { - return false; - } - if (bitset->capacity < newarraysize) { - uint64_t *newarray; - size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> - roaring_leading_zeroes(newarraysize)) + - 1; - while (newcapacity < newarraysize) { - newcapacity *= 2; - } - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { - return false; - } - bitset->capacity = newcapacity; - bitset->array = newarray; + uint64_t shift = 6; + uint64_t offset; + uint64_t load; + for (; list + 3 < end; list += 4) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[1]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[2]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[3]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); } - memset(bitset->array + bitset->arraysize, 0, - sizeof(uint64_t) * (newarraysize - bitset->arraysize)); - bitset->arraysize = newarraysize; - return true; // success! -} -size_t bitset_maximum(const bitset_t *bitset) { - for (size_t k = bitset->arraysize; k > 0; k--) { - uint64_t w = bitset->array[k - 1]; - if (w != 0) { - return 63 - roaring_leading_zeroes(w) + (k - 1) * 64; - } + while (list != end) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); + list++; } - return 0; } -/* Returns true if bitsets share no common elements, false otherwise. - * - * Performs early-out if common element found. */ -bool bitsets_disjoint(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // btr is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[words],%[offset],8), %[load]\n" + "btr %[pos], %[load]\n" + "mov %[load], (%[words],%[offset],8)\n" + "sbb $0, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [words] "r"(words), [shift] "r"(shift) + : + /* clobbers */ "memory"); + return card; +} - for (size_t k = 0; k < minlength; k++) { - if ((b1->array[k] & b2->array[k]) != 0) return false; +static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; } - return true; + return card; } -/* Returns true if bitsets contain at least 1 common element, false if they are - * disjoint. - * - * Performs early-out if common element found. */ -bool bitsets_intersect(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - - for (size_t k = 0; k < minlength; k++) { - if ((b1->array[k] & b2->array[k]) != 0) return true; +static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, + uint64_t card, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; } - return false; + return card; } -/* Returns true if b has any bits set in or after b->array[starting_loc]. */ -static bool any_bits_set(const bitset_t *b, size_t starting_loc) { - if (starting_loc >= b->arraysize) { - return false; - } - for (size_t k = starting_loc; k < b->arraysize; k++) { - if (b->array[k] != 0) return true; +static inline void _scalar_bitset_set_list(uint64_t *words, + const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; } - return false; } -/* Returns true if b1 has all of b2's bits set. - * - * Performs early out if a bit is found in b2 that is not found in b1. */ -bool bitset_contains_all(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t min_size = b1->arraysize; - if (b1->arraysize > b2->arraysize) { - min_size = b2->arraysize; - } - for (size_t k = 0; k < min_size; k++) { - if ((b1->array[k] & b2->array[k]) != b2->array[k]) { - return false; - } - } - if (b2->arraysize > b1->arraysize) { - /* Need to check if b2 has any bits set beyond b1's array */ - return !any_bits_set(b2, b1->arraysize); +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _asm_bitset_clear_list(words, card, list, length); + } else { + return _scalar_bitset_clear_list(words, card, list, length); } - return true; } -size_t bitset_union_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t answer = 0; - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k + 3 < minlength; k += 4) { - answer += roaring_hamming(b1->array[k] | b2->array[k]); - answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]); - answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]); - answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]); - } - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] | b2->array[k]); - } - if (b2->arraysize > b1->arraysize) { - // k is equal to b1->arraysize - for (; k + 3 < b2->arraysize; k += 4) { - answer += roaring_hamming(b2->array[k]); - answer += roaring_hamming(b2->array[k + 1]); - answer += roaring_hamming(b2->array[k + 2]); - answer += roaring_hamming(b2->array[k + 3]); - } - for (; k < b2->arraysize; ++k) { - answer += roaring_hamming(b2->array[k]); - } +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + return _asm_bitset_set_list_withcard(words, card, list, length); } else { - // k is equal to b2->arraysize - for (; k + 3 < b1->arraysize; k += 4) { - answer += roaring_hamming(b1->array[k]); - answer += roaring_hamming(b1->array[k + 1]); - answer += roaring_hamming(b1->array[k + 2]); - answer += roaring_hamming(b1->array[k + 3]); - } - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); - } + return _scalar_bitset_set_list_withcard(words, card, list, length); } - return answer; } -void bitset_inplace_intersection(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] &= b2->array[k]; - } - for (; k < b1->arraysize; ++k) { - b1->array[k] = 0; // memset could, maybe, be a tiny bit faster +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { + _asm_bitset_set_list(words, list, length); + } else { + _scalar_bitset_set_list(words, list, length); } } - -size_t bitset_intersection_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t answer = 0; - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - for (size_t k = 0; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] & b2->array[k]); +#else +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + words[offset] = newload; + list++; } - return answer; + return card; } -void bitset_inplace_difference(bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] &= ~(b2->array[k]); +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + words[offset] = newload; + list++; } + return card; } -size_t bitset_difference_count(const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - size_t answer = 0; - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] & ~(b2->array[k])); - } - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load | (UINT64_C(1) << index); + words[offset] = newload; + list++; } - return answer; } -bool bitset_inplace_symmetric_difference( - bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - for (; k < minlength; ++k) { - b1->array[k] ^= b2->array[k]; - } - if (b2->arraysize > b1->arraysize) { - size_t oldsize = b1->arraysize; - if (!bitset_resize(b1, b2->arraysize, false)) return false; - memcpy(b1->array + oldsize, b2->array + oldsize, - (b2->arraysize - oldsize) * sizeof(uint64_t)); - } - return true; -} +#endif -size_t bitset_symmetric_difference_count( - const bitset_t *CROARING_CBITSET_RESTRICT b1, - const bitset_t *CROARING_CBITSET_RESTRICT b2) { - size_t minlength = - b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; - size_t k = 0; - size_t answer = 0; - for (; k < minlength; ++k) { - answer += roaring_hamming(b1->array[k] ^ b2->array[k]); - } - if (b2->arraysize > b1->arraysize) { - for (; k < b2->arraysize; ++k) { - answer += roaring_hamming(b2->array[k]); - } - } else { - for (; k < b1->arraysize; ++k) { - answer += roaring_hamming(b1->array[k]); - } +/* flip specified bits */ +/* TODO: consider whether worthwhile to make an asm version */ + +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + // todo: is a branch here all that bad? + card += + (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 + words[offset] = newload; + list++; } - return answer; + return card; } -bool bitset_trim(bitset_t *bitset) { - size_t newsize = bitset->arraysize; - while (newsize > 0) { - if (bitset->array[newsize - 1] == 0) - newsize -= 1; - else - break; - } - if (bitset->capacity == newsize) return true; // nothing to do - uint64_t *newarray; - if ((newarray = (uint64_t *)roaring_realloc( - bitset->array, sizeof(uint64_t) * newsize)) == NULL) { - return false; +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *list; + offset = pos >> 6; + index = pos % 64; + load = words[offset]; + newload = load ^ (UINT64_C(1) << index); + words[offset] = newload; + list++; } - bitset->array = newarray; - bitset->capacity = newsize; - bitset->arraysize = newsize; - return true; } #ifdef __cplusplus } } -} // extern "C" { namespace roaring { namespace internal { +} // extern "C" { namespace roaring { namespace api { #endif -/* end file src/bitset.c */ +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +/* end file src/bitset_util.c */ /* begin file src/containers/array.c */ /* * array.c @@ -18457,29 +18481,27 @@ void run_container_offset(const run_container_t *c, container_t **loc, run_container_t *lo = NULL, *hi = NULL; bool split; - unsigned int lo_cap, hi_cap; + int lo_cap, hi_cap; int top, pivot; top = (1 << 16) - offset; pivot = run_container_index_equalorlarger(c, top); - // pivot is the index of the first run that is >= top or -1 if no such run - if (pivot >= 0) { - split = c->runs[pivot].value < top; - lo_cap = pivot + (split ? 1 : 0); - hi_cap = c->n_runs - pivot; - } else { - // here pivot < 0 + if (pivot == -1) { split = false; lo_cap = c->n_runs; hi_cap = 0; + } else { + split = c->runs[pivot].value < top; + lo_cap = pivot + (split ? 1 : 0); + hi_cap = c->n_runs - pivot; } if (loc && lo_cap) { lo = run_container_create_given_capacity(lo_cap); memcpy(lo->runs, c->runs, lo_cap * sizeof(rle16_t)); lo->n_runs = lo_cap; - for (unsigned int i = 0; i < lo_cap; ++i) { + for (int i = 0; i < lo_cap; ++i) { lo->runs[i].value += offset; } *loc = (container_t *)lo; @@ -18489,7 +18511,7 @@ void run_container_offset(const run_container_t *c, container_t **loc, hi = run_container_create_given_capacity(hi_cap); memcpy(hi->runs, c->runs + pivot, hi_cap * sizeof(rle16_t)); hi->n_runs = hi_cap; - for (unsigned int i = 0; i < hi_cap; ++i) { + for (int i = 0; i < hi_cap; ++i) { hi->runs[i].value += offset; } *hic = (container_t *)hi; @@ -19873,1046 +19895,1278 @@ void* roaring_aligned_malloc(size_t alignment, size_t size) { void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); } /* end file src/memory.c */ -/* begin file src/roaring_array.c */ +/* begin file src/roaring.c */ #include #include -#include +#include +#include +#include #include -#include #include +// Include after roaring.h + #ifdef __cplusplus +using namespace ::roaring::internal; + extern "C" { namespace roaring { -namespace internal { +namespace api { #endif -// Convention: [0,ra->size) all elements are initialized -// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - -extern inline int32_t ra_get_size(const roaring_array_t *ra); -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); +#define CROARING_SERIALIZATION_ARRAY_UINT32 1 +#define CROARING_SERIALIZATION_CONTAINER 2 +extern inline int roaring_trailing_zeroes(unsigned long long input_num); +extern inline int roaring_leading_zeroes(unsigned long long input_num); +extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r); +extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r); +extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, + bool cow); +extern inline roaring_bitmap_t *roaring_bitmap_create(void); +extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, + uint64_t max); +extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max); -extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, - uint8_t *typecode); +static inline bool is_cow(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline bool is_frozen(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_FROZEN; +} -extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); +// this is like roaring_bitmap_add, but it populates pointer arguments in such a +// way +// that we can recover the container touched, which, in turn can be used to +// accelerate some functions (when you repeatedly need to add to the same +// container) +static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, + uint32_t val, + uint8_t *type, + int *index) { + roaring_array_t *ra = &r->high_low_container; -extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, - uint16_t key, - container_t *c, - uint8_t typecode); + uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + if (i >= 0) { + ra_unshare_container_at_index(ra, (uint16_t)i); + container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type); + uint8_t new_type = *type; + container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); + *index = i; + if (c2 != c) { + container_free(c, *type); + ra_set_container_at_index(ra, i, c2, new_type); + *type = new_type; + return c2; + } else { + return c; + } + } else { + array_container_t *new_ac = array_container_create(); + container_t *c = + container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); + *index = -i - 1; + return c; + } +} -extern inline void ra_set_container_at_index(const roaring_array_t *ra, - int32_t i, container_t *c, - uint8_t typecode); +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); + if (!is_ok) { + roaring_free(ans); + return NULL; + } + return ans; +} -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // - // Note: not implemented using C's realloc(), because the memory layout is - // Struct-of-Arrays vs. Array-of-Structs: - // https://github.com/RoaringBitmap/CRoaring/issues/256 +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { + return ra_init_with_capacity(&r->high_low_container, cap); +} - if (new_capacity == 0) { - roaring_free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = - new_capacity * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - void *bigalloc = roaring_malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - container_t **newcontainers = (container_t **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if (ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); +static inline void add_bulk_impl(roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + uint8_t typecode; + int idx; + context->container = + containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + context->typecode = typecode; + context->idx = idx; + context->key = key; + } else { + // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion directly, bypassing the roaring_bitmap_add call + uint8_t new_typecode; + container_t *container2 = container_add( + context->container, val & 0xFFFF, context->typecode, &new_typecode); + if (container2 != context->container) { + // rare instance when we need to change the container type + container_free(context->container, context->typecode); + ra_set_container_at_index(&r->high_low_container, context->idx, + container2, new_typecode); + context->typecode = new_typecode; + context->container = container2; + } } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - roaring_free(oldbigalloc); - return true; } -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - ra_init(new_ra); +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + uint32_t val; + const uint32_t *start = vals; + const uint32_t *end = vals + n_args; + const uint32_t *current_val = start; - // Containers hold 64Ki elements, so 64Ki containers is enough to hold - // `0x10000 * 0x10000` (all 2^32) elements - if (cap > 0x10000) { - cap = 0x10000; + if (n_args == 0) { + return; } - if (cap > 0) { - void *bigalloc = roaring_malloc( - cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); - if (bigalloc == NULL) return false; - new_ra->containers = (container_t **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - // Narrowing is safe because of above check - new_ra->allocation_size = (int32_t)cap; + uint8_t typecode; + int idx; + container_t *container; + val = *current_val; + container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), + typecode}; + + for (; current_val != end; current_val++) { + memcpy(&val, current_val, sizeof(val)); + add_bulk_impl(r, &context, val); + } +} + +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val) { + add_bulk_impl(r, context, val); +} + +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + int32_t start_idx = -1; + if (context->container != NULL && context->key < key) { + start_idx = context->idx; + } + int idx = ra_advance_until(&r->high_low_container, key, start_idx); + if (idx == ra_get_size(&r->high_low_container)) { + return false; + } + uint8_t typecode; + context->container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)idx, &typecode); + context->typecode = typecode; + context->idx = idx; + context->key = + ra_get_key_at_index(&r->high_low_container, (uint16_t)idx); + // ra_advance_until finds the next key >= the target, we found a later + // container. + if (context->key != key) { + return false; + } } - return true; + // context is now set up + return container_contains(context->container, val & 0xFFFF, + context->typecode); } -int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bitmap_add_many(answer, n_args, vals); + return answer; } -void ra_init(roaring_array_t *new_ra) { - if (!new_ra) { - return; +roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { + // todo: could be greatly optimized but we do not expect this call to ever + // include long lists + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + va_list ap; + va_start(ap, n_args); + for (size_t i = 0; i < n_args; i++) { + uint32_t val = va_arg(ap, uint32_t); + roaring_bitmap_add_bulk(answer, &context, val); } - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; + va_end(ap); + return answer; +} - new_ra->allocation_size = 0; - new_ra->size = 0; - new_ra->flags = 0; +static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { + return (a < b) ? a : b; } -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size - dest->size = 0; // <--- This is important. - return true; // output was just cleared, so they match - } - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step) { + if (max >= UINT64_C(0x100000000)) { + max = UINT64_C(0x100000000); } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(container_t *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } + if (step == 0) return NULL; + if (max <= min) return NULL; + roaring_bitmap_t *answer = roaring_bitmap_create(); + if (step >= (1 << 16)) { + for (uint32_t value = (uint32_t)min; value < max; value += step) { + roaring_bitmap_add(answer, value); } + return answer; } - return true; + uint64_t min_tmp = min; + do { + uint32_t key = (uint32_t)min_tmp >> 16; + uint32_t container_min = min_tmp & 0xFFFF; + uint32_t container_max = + (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); + uint8_t type; + container_t *container = container_from_range( + &type, container_min, container_max, (uint16_t)step); + ra_append(&answer->high_low_container, (uint16_t)key, container, type); + uint32_t gap = container_max - container_min + step - 1; + min_tmp += gap - (gap % step); + } while (min_tmp < max); + // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step + return answer; } -void ra_clear_containers(roaring_array_t *ra) { - for (int32_t i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max) { + if (min > max) { + return; } -} -void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); -} + roaring_array_t *ra = &r->high_low_container; -void ra_clear_without_containers(roaring_array_t *ra) { - roaring_free( - ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; -} + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; -void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); -} + int32_t num_required_containers = max_key - min_key + 1; + int32_t suffix_length = + count_greater(ra->keys, ra->size, (uint16_t)max_key); + int32_t prefix_length = + count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key); + int32_t common_length = ra->size - prefix_length - suffix_length; -bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - const int32_t max_containers = 65536; - assert(desired_size <= max_containers); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > max_containers) { - new_capacity = max_containers; - } + if (num_required_containers > common_length) { + ra_shift_tail(ra, suffix_length, + num_required_containers - common_length); + } - return realloc_array(ra, new_capacity); + int32_t src = prefix_length + common_length - 1; + int32_t dst = ra->size - suffix_length - 1; + for (uint32_t key = max_key; key != min_key - 1; + key--) { // beware of min_key==0 + uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; + container_t *new_container; + uint8_t new_type; + + if (src >= 0 && ra->keys[src] == key) { + ra_unshare_container_at_index(ra, (uint16_t)src); + new_container = + container_add_range(ra->containers[src], ra->typecodes[src], + container_min, container_max, &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], ra->typecodes[src]); + } + src--; + } else { + new_container = container_from_range(&new_type, container_min, + container_max + 1, 1); + } + ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key, + new_container, new_type); + dst--; } - return true; } -void ra_append(roaring_array_t *ra, uint16_t key, container_t *c, - uint8_t typecode) { - extend_array(ra, 1); - const int32_t pos = ra->size; +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, + uint32_t max) { + if (min > max) { + return; + } - ra->keys[pos] = key; - ra->containers[pos] = c; - ra->typecodes[pos] = typecode; - ra->size++; -} + roaring_array_t *ra = &r->high_low_container; -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; - // old contents is junk that does not need freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; + int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key); + int32_t dst = src; + while (src < ra->size && ra->keys[src] <= max_key) { + uint32_t container_min = + (min_key == ra->keys[src]) ? (min & 0xffff) : 0; + uint32_t container_max = + (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; + ra_unshare_container_at_index(ra, (uint16_t)src); + container_t *new_container; + uint8_t new_type; + new_container = + container_remove_range(ra->containers[src], ra->typecodes[src], + container_min, container_max, &new_type); + if (new_container != ra->containers[src]) { + container_free(ra->containers[src], ra->typecodes[src]); + } + if (new_container) { + ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], + new_container, new_type); + dst++; + } + src++; + } + if (src > dst) { + ra_shift_tail(ra, ra->size - src, dst - src); } - ra->size++; } -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - for (int32_t i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, (uint16_t)i, copy_on_write); - } -} +void roaring_bitmap_printf(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; + printf("{"); + for (int i = 0; i < ra->size; ++i) { + container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + + if (i + 1 < ra->size) { + printf(","); } - ra->size++; } + printf("}"); } -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); -} - -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; + printf("{"); + for (int i = 0; i < ra->size; ++i) { + printf("%d: %s (%d)", ra->keys[i], + get_full_container_name(ra->containers[i], ra->typecodes[i]), + container_get_cardinality(ra->containers[i], ra->typecodes[i])); + if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { + printf("(shared count = %" PRIu32 " )", + croaring_refcount_get( + &(CAST_shared(ra->containers[i])->counter))); + } - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; + if (i + 1 < ra->size) { + printf(", "); + } } + printf("}"); } -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); +/** + * (For advanced users.) + * Collect statistics about the bitmap + */ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, + roaring_statistics_t *stat) { + const roaring_array_t *ra = &r->high_low_container; - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; + memset(stat, 0, sizeof(*stat)); + stat->n_containers = ra->size; + stat->min_value = roaring_bitmap_minimum(r); + stat->max_value = roaring_bitmap_maximum(r); + + for (int i = 0; i < ra->size; ++i) { + uint8_t truetype = + get_container_type(ra->containers[i], ra->typecodes[i]); + uint32_t card = + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + uint32_t sbytes = + container_size_in_bytes(ra->containers[i], ra->typecodes[i]); + stat->cardinality += card; + switch (truetype) { + case BITSET_CONTAINER_TYPE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + roaring_unreachable; } - ra->size++; } } -container_t *ra_get_container(roaring_array_t *ra, uint16_t x, - uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, - uint8_t *typecode); - -extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, - uint16_t i); +/* + * Checks that: + * - Array containers are sorted and contain no duplicates + * - Range containers are sorted and contain no overlapping ranges + * - Roaring containers are sorted by key and there are no duplicate keys + * - The correct container type is use for each container (e.g. bitmaps aren't + * used for small containers) + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, + const char **reason) { + const char *reason_local; + if (reason == NULL) { + // Always allow assigning through *reason + reason = &reason_local; + } + *reason = NULL; + const roaring_array_t *ra = &r->high_low_container; + if (ra->size < 0) { + *reason = "negative size"; + return false; + } + if (ra->allocation_size < 0) { + *reason = "negative allocation size"; + return false; + } + if (ra->size > ra->allocation_size) { + *reason = "more containers than allocated space"; + return false; + } + if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { + *reason = "invalid flags"; + return false; + } + if (ra->size == 0) { + return true; + } -extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); + if (ra->keys == NULL) { + *reason = "keys is NULL"; + return false; + } + if (ra->typecodes == NULL) { + *reason = "typecodes is NULL"; + return false; + } + if (ra->containers == NULL) { + *reason = "containers is NULL"; + return false; + } -extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); + uint32_t prev_key = ra->keys[0]; + for (int32_t i = 1; i < ra->size; ++i) { + if (ra->keys[i] <= prev_key) { + *reason = "keys not strictly increasing"; + return false; + } + prev_key = ra->keys[i]; + } -// everything skipped over is freed -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; + for (int32_t i = 0; i < ra->size; ++i) { + if (!container_internal_validate(ra->containers[i], ra->typecodes[i], + reason)) { + // reason should already be set + if (*reason == NULL) { + *reason = "container failed to validate but no reason given"; + } + return false; + } } - return pos; -} -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - container_t *c, uint8_t typecode) { - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(container_t *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; - ra->size++; + return true; } -// note: Java routine set things to 0, enabling GC. -// Java called it "resize" but it was always used to downsize. -// Allowing upsize would break the conventions about -// valid containers below ra->size. - -void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + if (!ra_init_with_capacity( // allocation of list of containers can fail + &ans->high_low_container, r->high_low_container.size)) { + roaring_free(ans); + return NULL; + } + if (!ra_overwrite( // memory allocation of individual containers may fail + &r->high_low_container, &ans->high_low_container, is_cow(r))) { + roaring_bitmap_free(ans); // overwrite should leave in freeable state + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, is_cow(r)); + return ans; } -void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(container_t *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src) { + roaring_bitmap_set_copy_on_write(dest, is_cow(src)); + return ra_overwrite(&src->high_low_container, &dest->high_low_container, + is_cow(src)); } -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); +void roaring_bitmap_free(const roaring_bitmap_t *r) { + if (r == NULL) { + return; + } + if (!is_frozen(r)) { + ra_clear((roaring_array_t *)&r->high_low_container); + } + roaring_free((roaring_bitmap_t *)r); } -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. In use it should be followed by a call to -// downsize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); - - const int range = end - begin; - - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere - - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(container_t *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); +void roaring_bitmap_clear(roaring_bitmap_t *r) { + ra_reset(&r->high_low_container); } -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); - } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(container_t *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { + roaring_array_t *ra = &r->high_low_container; -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - for (int32_t i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; + const uint16_t hb = val >> 16; + const int i = ra_get_index(ra, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(ra, (uint16_t)i); + container_t *container = + ra_get_container_at_index(ra, (uint16_t)i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + } else { + array_container_t *newac = array_container_create(); + container_t *container = + container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); } } -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, - size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); - size_t t_limit = 0; + const int oldCardinality = + container_get_cardinality(container, typecode); - bool first = false; - size_t first_skip = 0; + uint8_t newtypecode = typecode; + container_t *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + result = true; + } else { + const int newCardinality = + container_get_cardinality(container, newtypecode); - uint32_t *t_ans = NULL; - size_t cur_len = 0; + result = oldCardinality != newCardinality; + } + } else { + array_container_t *newac = array_container_create(); + container_t *container = + container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + result = true; + } - for (int i = 0; i < ra->size; ++i) { - const container_t *c = - container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - t_limit = (const_CAST_bitset(c))->cardinality; - break; - case ARRAY_CONTAINER_TYPE: - t_limit = (const_CAST_array(c))->cardinality; - break; - case RUN_CONTAINER_TYPE: - t_limit = run_container_cardinality(const_CAST_run(c)); - break; + return result; +} + +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit) { - if (!first) { - // first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * - (first_skip + limit)); - if (t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)); - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len) { - uint32_t *append_ans = (uint32_t *)roaring_malloc( - sizeof(*append_ans) * (cur_len + t_limit)); - if (append_ans == NULL) { - if (t_ans != NULL) roaring_free(t_ans); - return false; - } - memset(append_ans, 0, - sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - roaring_free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_bitset(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_array(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE: - container_to_uint32_array(t_ans + dtr, const_CAST_run(c), - ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; + if (container_get_cardinality(container2, newtypecode) != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); } - ctr += t_limit; - if (dtr - first_skip >= limit) break; } - if (t_ans != NULL) { - memcpy(ans, t_ans + first_skip, limit * sizeof(uint32_t)); - free(t_ans); - } - return true; } -bool ra_has_run_container(const roaring_array_t *ra) { - for (int32_t k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE) - return true; +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); + container_t *container = ra_get_container_at_index( + &r->high_low_container, (uint16_t)i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + container_t *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + + const int newCardinality = + container_get_cardinality(container2, newtypecode); + + if (newCardinality != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); + } + + result = oldCardinality != newCardinality; } - return false; + return result; } -uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + if (n_args == 0 || r->high_low_container.size == 0) { + return; + } + int32_t pos = + -1; // position of the container used in the previous iteration + for (size_t i = 0; i < n_args; i++) { + uint16_t key = (uint16_t)(vals[i] >> 16); + if (pos < 0 || key != r->high_low_container.keys[pos]) { + pos = ra_get_index(&r->high_low_container, key); + } + if (pos >= 0) { + uint8_t new_typecode; + container_t *new_container; + new_container = container_remove( + r->high_low_container.containers[pos], vals[i] & 0xffff, + r->high_low_container.typecodes[pos], &new_typecode); + if (new_container != r->high_low_container.containers[pos]) { + container_free(r->high_low_container.containers[pos], + r->high_low_container.typecodes[pos]); + ra_replace_key_and_container_at_index(&r->high_low_container, + pos, key, new_container, + new_typecode); + } + if (!container_nonzero_cardinality(new_container, new_typecode)) { + container_free(new_container, new_typecode); + ra_remove_at_index(&r->high_low_container, pos); + pos = -1; + } } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; } } -size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); +// there should be some SIMD optimizations possible here +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - for (int32_t k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - return count; -} + int pos1 = 0, pos2 = 0; -// This function is endian-sensitive. -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - for (int32_t i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } - } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - roaring_free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; - } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = container_and(c1, type1, c2, type2, &result_type); - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - for (int32_t k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k], - ra->typecodes[k]) - - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - for (int32_t k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); // otherwise: memory leak! + } + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } } - for (int32_t k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; + return answer; } -// Quickly checks whether there is a serialized bitmap at the pointer, -// not exceeding size "maxbytes" in bytes. This function does not allocate -// memory dynamically. -// -// This function returns 0 if and only if no valid bitmap is found. -// Otherwise, it returns how many bytes are occupied. -// -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t); // for cookie - if (bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; +/** + * Compute the union of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if (bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); + if (number == 1) { + return roaring_bitmap_copy(x[0]); } - if (size > (1 << 16) || size < 0) { - return 0; + roaring_bitmap_t *answer = + roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if (bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +/** + * Compute the xor of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); } - bytestotal += size * 2 * sizeof(uint16_t); - if (bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if (bytestotal > maxbytes) return 0; - buf += size * 4; + if (number == 1) { + return roaring_bitmap_copy(x[0]); } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; + roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_xor_inplace(answer, x[i]); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +// inplace and (modifies its first argument). +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + if (x1 == x2) return; + int pos1 = 0, pos2 = 0, intersection_size = 0; + const int length1 = ra_get_size(&x1->high_low_container); + const int length2 = ra_get_size(&x2->high_low_container); + + // any skipped-over or newly emptied containers in x1 + // have to be freed. + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + if (s1 == s2) { + uint8_t type1, type2, result_type; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_and(c1, type1, c2, type2, &result_type) + : container_iand(c1, type1, c2, type2, &result_type); + + if (c != c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, type1); } - } - if (isbitmap) { - size_t containersize = - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if (bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if (bytestotal > maxbytes) return 0; - buf += containersize; + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c, + result_type); + intersection_size++; + } else { + container_free(c, result_type); + } + ++pos1; + ++pos2; + } else if (s1 < s2) { + pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } } - return bytestotal; + + // if we ended early because x2 ran out, then all remaining in x1 should be + // freed + while (pos1 < length1) { + container_free(x1->high_low_container.containers[pos1], + x1->high_low_container.typecodes[pos1]); + ++pos1; + } + + // all containers after this have either been copied or freed + ra_downsize(&x1->high_low_container, intersection_size); } -// This function populates answer from the content of buf (reading up to -// maxbytes bytes). The function returns false if a properly serialized bitmap -// cannot be found. If it returns true, readbytes is populated by how many bytes -// were read, we have that *readbytes <= maxbytes. -// -// This function is endian-sensitive. -bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, - const size_t maxbytes, size_t *readbytes) { - *readbytes = sizeof(int32_t); // for cookie - if (*readbytes > maxbytes) { - // Ran out of bytes while reading first 4 bytes. - return false; +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - // "I failed to find one of the right cookies. - return false; + if (0 == length2) { + return roaring_bitmap_copy(x1); } - int32_t size; + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = container_or(c1, type1, c2, type2, &result_type); - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if (*readbytes > maxbytes) { - // Ran out of bytes while reading second part of the cookie. - return false; + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + // c1 = container_clone(c1, type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // c2 = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size < 0) { - // You cannot have a negative number of containers, the data must be - // corrupted. - return false; - } - if (size > (1 << 16)) { - // You cannot have so many containers, the data must be corrupted. - return false; } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if (*readbytes > maxbytes) { // data is corrupted? - // Ran out of bytes while reading run bitmap. - return false; - } - bitmapOfRunContainers = buf; - buf += s; + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); } - uint16_t *keyscards = (uint16_t *)buf; + return answer; +} - *readbytes += size * 2 * sizeof(uint16_t); - if (*readbytes > maxbytes) { - // Ran out of bytes while reading key-cardinality array. - return false; +// inplace or (modifies its first argument). +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; } - buf += size * 2 * sizeof(uint16_t); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + if (!container_is_full(c1, type1)) { + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, (uint16_t)pos2, &type2); + container_t *c = + (type1 == SHARED_CONTAINER_TYPE) + ? container_or(c1, type1, c2, type2, &result_type) + : container_ior(c1, type1, c2, type2, &result_type); - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - // Failed to allocate memory for roaring array. Bailing out. - return false; + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + + // container_t *c2_clone = container_clone(c2, type2); + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); } +} - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k, sizeof(tmp)); - answer->keys[k] = tmp; +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if (*readbytes > maxbytes) { // data is corrupted? - // Ran out of bytes while reading offsets. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - - // skipping the offsets - buf += size * 4; + if (0 == length2) { + return roaring_bitmap_copy(x1); } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { - // Running out of bytes while reading a bitset container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for a bitset container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if (*readbytes > maxbytes) { - // Running out of bytes while reading a run container (header). - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { // data is corrupted? - // Running out of bytes while reading a run container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; - } - // it is now safe to read + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = container_xor(c1, type1, c2, type2, &result_type); - run_container_t *c = run_container_create(); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for a run container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if (*readbytes > maxbytes) { // data is corrupted? - // Running out of bytes while reading an array container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if (c == NULL) { // memory allocation failure - // Failed to allocate memory for an array container. - ra_clear(answer); // we need to clear the containers already - // allocated, and the roaring array - return false; + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE; + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - return true; + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; } -#ifdef __cplusplus -} -} -} // extern "C" { namespace roaring { namespace internal { -#endif -/* end file src/roaring_array.c */ -/* begin file src/roaring_priority_queue.c */ +// inplace xor (modifies its first argument). -#ifdef __cplusplus -using namespace ::roaring::internal; +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; -extern "C" { -namespace roaring { -namespace api { -#endif + if (0 == length2) return; -struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; -}; + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } -typedef struct roaring_pq_element_s roaring_pq_element_t; + // XOR can have new containers inserted from x2, but can also + // lose containers when x1 and x2 are nonempty and identical. -struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; -}; + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); -typedef struct roaring_pq_s roaring_pq_t; + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. + + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // so release + } else { + c = container_ixor(c1, type1, c2, type2, &result_type); + } + + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; + } else { + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } -static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } } -static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(empty_bitmap, + is_cow(x1) || is_cow(x2)); + return empty_bitmap; } - pq->elements[i] = *t; -} + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); -static void pq_free(roaring_pq_t *pq) { roaring_free(pq); } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = 0; + uint16_t s2 = 0; + while (true) { + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); -static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = + container_andnot(c1, type1, c2, type2, &result_type); + + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + } else if (s1 < s2) { // s1 < s2 + const int next_pos1 = + ra_advance_until(&x1->high_low_container, s2, pos1); + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, next_pos1, + is_cow(x1)); + // TODO : perhaps some of the copy_on_write should be based on + // answer rather than x1 (more stringent?). Many similar cases + pos1 = next_pos1; + if (pos1 == length1) break; + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; } - if (!compare(&bestc, &ai)) { - break; - } - pq->elements[i] = bestc; - i = l; - } - pq->elements[i] = ai; -} - -static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - size_t alloc_size = - sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; - roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); - answer->elements = (roaring_pq_element_t *)(answer + 1); - answer->size = length; - for (uint32_t i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); } - for (int32_t i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); + if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); } return answer; } -static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; -} +// inplace andnot (modifies its first argument). + +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); -// this function consumes and frees the inputs -static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { uint8_t result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + int intersection_size = 0; + + if (0 == length2) return; + if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; + roaring_bitmap_clear(x1); + return; } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + int pos1 = 0, pos2 = 0; uint8_t type1, type2; uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); while (true) { if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. - - ra_unshare_container_at_index(&x1->high_low_container, - (uint16_t)pos1); container_t *c1 = ra_get_container_at_index(&x1->high_low_container, (uint16_t)pos1, &type1); - assert(type1 != SHARED_CONTAINER_TYPE); - - ra_unshare_container_at_index(&x2->high_low_container, - (uint16_t)pos2); container_t *c2 = ra_get_container_at_index(&x2->high_low_container, (uint16_t)pos2, &type2); - assert(type2 != SHARED_CONTAINER_TYPE); + + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_andnot(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release + } else { + c = container_iandnot(c1, type1, c2, type2, &result_type); + } - if ((type2 == BITSET_CONTAINER_TYPE) && - (type1 != BITSET_CONTAINER_TYPE)) { - c = container_lazy_ior(c2, type2, c1, type1, &result_type); - container_free(c1, type1); - if (c != c2) { - container_free(c2, type2); - } + if (container_nonzero_cardinality(c, result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size++, s1, + c, result_type); } else { - c = container_lazy_ior(c1, type1, c2, type2, &result_type); - container_free(c2, type2); - if (c != c1) { - container_free(c1, type1); - } + container_free(c, result_type); } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; ++pos2; if (pos1 == length1) break; @@ -20921,183 +21175,238 @@ static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - ra_append(&answer->high_low_container, s1, c1, type1); + if (pos1 != intersection_size) { + container_t *c1 = ra_get_container_at_index( + &x1->high_low_container, (uint16_t)pos1, &type1); + + ra_replace_key_and_container_at_index( + &x1->high_low_container, intersection_size, s1, c1, type1); + } + intersection_size++; pos1++; if (pos1 == length1) break; s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); if (pos2 == length2) break; s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); + + if (pos1 < length1) { + // all containers between intersection_size and + // pos1 are junk. However, they have either been moved + // (thus still referenced) or involved in an iandnot + // that will clean up all containers that could not be reused. + // Thus we should not free the junk containers between + // intersection_size and pos1. + if (pos1 > intersection_size) { + // left slide of remaining items + ra_copy_range(&x1->high_low_container, pos1, length1, + intersection_size); + } + // else current placement is fine + intersection_size += (length1 - pos1); } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - roaring_free(x1); - roaring_free(x2); - return answer; + ra_downsize(&x1->high_low_container, intersection_size); +} + +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { + const roaring_array_t *ra = &r->high_low_container; + + uint64_t card = 0; + for (int i = 0; i < ra->size; ++i) + card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); + return card; } -/** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - */ -roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return 0; } - if (number == 1) { - return roaring_bitmap_copy(x[0]); + return roaring_bitmap_range_cardinality_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { + const roaring_array_t *ra = &r->high_low_container; + + if (range_start > range_end) { + return 0; } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); + // now we have: 0 <= range_start <= range_end <= UINT32_MAX - pq_add(pq, &x1); + uint16_t minhb = (uint16_t)(range_start >> 16); + uint16_t maxhb = (uint16_t)(range_end >> 16); + + uint64_t card = 0; + + int i = ra_get_index(ra, minhb); + if (i >= 0) { + if (minhb == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; + card += + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + } + if ((range_start & 0xffff) != 0) { + card -= container_rank(ra->containers[i], ra->typecodes[i], + (range_start & 0xffff) - 1); + } + i++; + } else { + i = -i - 1; + } - pq_add(pq, &newelement); + for (; i < ra->size; i++) { + uint16_t key = ra->keys[i]; + if (key < maxhb) { + card += + container_get_cardinality(ra->containers[i], ra->typecodes[i]); + } else if (key == maxhb) { + card += container_rank(ra->containers[i], ra->typecodes[i], + range_end & 0xffff); + break; + } else { + break; } } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; -} -#ifdef __cplusplus -} + return card; } -} // extern "C" { namespace roaring { namespace api { -#endif -/* end file src/roaring_priority_queue.c */ -/* begin file src/roaring.c */ -#include -#include -#include -#include -#include -#include -#include - - -// Include after roaring.h -#ifdef __cplusplus -using namespace ::roaring::internal; +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { + return r->high_low_container.size == 0; +} -extern "C" { -namespace roaring { -namespace api { -#endif +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { + ra_to_uint32_array(&r->high_low_container, ans); +} -#define CROARING_SERIALIZATION_ARRAY_UINT32 1 -#define CROARING_SERIALIZATION_CONTAINER 2 -extern inline int roaring_trailing_zeroes(unsigned long long input_num); -extern inline int roaring_leading_zeroes(unsigned long long input_num); -extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r); -extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r); -extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, - bool cow); -extern inline roaring_bitmap_t *roaring_bitmap_create(void); -extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, - uint64_t max); -extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, - uint64_t min, uint64_t max); +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, + size_t limit, uint32_t *ans) { + return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); +} -static inline bool is_cow(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_COW; +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. + */ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + ra_unshare_container_at_index( + &r->high_low_container, + (uint16_t)i); // TODO: this introduces extra cloning! + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + container_t *c1 = convert_run_optimize(c, type_original, &type_after); + if (type_after == RUN_CONTAINER_TYPE) { + answer = true; + } + ra_set_container_at_index(&r->high_low_container, i, c1, type_after); + } + return answer; } -static inline bool is_frozen(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_FROZEN; + +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { + size_t answer = 0; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original; + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + answer += container_shrink_to_fit(c, type_original); + } + answer += ra_shrink_to_fit(&r->high_low_container); + return answer; } -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *type, - int *index) { - roaring_array_t *ra = &r->high_low_container; +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t type_original, type_after; + container_t *c = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &type_original); + if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { + answer = true; + if (type_original == SHARED_CONTAINER_TYPE) { + run_container_t *truec = CAST_run(CAST_shared(c)->container); + int32_t card = run_container_cardinality(truec); + container_t *c1 = convert_to_bitset_or_array_container( + truec, card, &type_after); + shared_container_free(CAST_shared(c)); // frees run as needed + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); - uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - if (i >= 0) { - ra_unshare_container_at_index(ra, (uint16_t)i); - container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type); - uint8_t new_type = *type; - container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); - *index = i; - if (c2 != c) { - container_free(c, *type); - ra_set_container_at_index(ra, i, c2, new_type); - *type = new_type; - return c2; - } else { - return c; + } else { + int32_t card = run_container_cardinality(CAST_run(c)); + container_t *c1 = convert_to_bitset_or_array_container( + CAST_run(c), card, &type_after); + run_container_free(CAST_run(c)); + ra_set_container_at_index(&r->high_low_container, i, c1, + type_after); + } } + } + return answer; +} + +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t cardinality = roaring_bitmap_get_cardinality(r); + uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + if (portablesize < sizeasarray) { + buf[0] = CROARING_SERIALIZATION_CONTAINER; + return roaring_bitmap_portable_serialize(r, buf + 1) + 1; } else { - array_container_t *new_ac = array_container_create(); - container_t *c = - container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); - *index = -i - 1; - return c; + buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; + memcpy(buf + 1, &cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array( + r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); + return 1 + (size_t)sizeasarray; } } -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); + uint64_t sizeasarray = + roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t); + return portablesize < sizeasarray ? portablesize + 1 + : (size_t)sizeasarray + 1; +} + +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { + return ra_portable_size_in_bytes(&r->high_low_container); +} + +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, + size_t maxbytes) { roaring_bitmap_t *ans = (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { + if (ans == NULL) { return NULL; } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); + size_t bytesread; + bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, + maxbytes, &bytesread); + if (!is_ok) { + roaring_free(ans); + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, false); if (!is_ok) { roaring_free(ans); return NULL; @@ -21105,1152 +21414,730 @@ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { return ans; } -bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { - return ra_init_with_capacity(&r->high_low_container, cap); +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { + return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); } -static inline void add_bulk_impl(roaring_bitmap_t *r, - roaring_bulk_context_t *context, - uint32_t val) { - uint16_t key = val >> 16; - if (context->container == NULL || context->key != key) { - uint8_t typecode; - int idx; - context->container = - containerptr_roaring_bitmap_add(r, val, &typecode, &idx); - context->typecode = typecode; - context->idx = idx; - context->key = key; - } else { - // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion directly, bypassing the roaring_bitmap_add call - uint8_t new_typecode; - container_t *container2 = container_add( - context->container, val & 0xFFFF, context->typecode, &new_typecode); - if (container2 != context->container) { - // rare instance when we need to change the container type - container_free(context->container, context->typecode); - ra_set_container_at_index(&r->high_low_container, context->idx, - container2, new_typecode); - context->typecode = new_typecode; - context->container = container2; - } - } +size_t roaring_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes) { + return ra_portable_deserialize_size(buf, maxbytes); } -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - uint32_t val; - const uint32_t *start = vals; - const uint32_t *end = vals + n_args; - const uint32_t *current_val = start; - - if (n_args == 0) { - return; - } +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) { + return ra_portable_serialize(&r->high_low_container, buf); +} - uint8_t typecode; - int idx; - container_t *container; - val = *current_val; - container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); - roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), - typecode}; +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { + const char *bufaschar = (const char *)buf; + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; - for (; current_val != end; current_val++) { - memcpy(&val, current_val, sizeof(val)); - add_bulk_impl(r, &context, val); - } -} + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); -void roaring_bitmap_add_bulk(roaring_bitmap_t *r, - roaring_bulk_context_t *context, uint32_t val) { - add_bulk_impl(r, context, val); -} + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); -bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, - roaring_bulk_context_t *context, - uint32_t val) { - uint16_t key = val >> 16; - if (context->container == NULL || context->key != key) { - int32_t start_idx = -1; - if (context->container != NULL && context->key < key) { - start_idx = context->idx; - } - int idx = ra_advance_until(&r->high_low_container, key, start_idx); - if (idx == ra_get_size(&r->high_low_container)) { - return false; + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; } - uint8_t typecode; - context->container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)idx, &typecode); - context->typecode = typecode; - context->idx = idx; - context->key = - ra_get_key_at_index(&r->high_low_container, (uint16_t)idx); - // ra_advance_until finds the next key >= the target, we found a later - // container. - if (context->key != key) { - return false; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); } - } - // context is now set up - return container_contains(context->container, val & 0xFFFF, - context->typecode); -} - -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; -} - -roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - va_list ap; - va_start(ap, n_args); - for (size_t i = 0; i < n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add_bulk(answer, &context, val); - } - va_end(ap); - return answer; -} - -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; -} + return bitmap; -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if (max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - for (uint32_t value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; - } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = - (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - container_t *container = container_from_range( - &type, container_min, container_max, (uint16_t)step); - ra_append(&answer->high_low_container, (uint16_t)key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; + } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize(bufaschar + 1); + } else + return (NULL); } -void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, - uint32_t max) { - if (min > max) { - return; - } - - roaring_array_t *ra = &r->high_low_container; - - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = - count_greater(ra->keys, ra->size, (uint16_t)max_key); - int32_t prefix_length = - count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key); - int32_t common_length = ra->size - prefix_length - suffix_length; - - if (num_required_containers > common_length) { - ra_shift_tail(ra, suffix_length, - num_required_containers - common_length); +roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, + size_t maxbytes) { + if (maxbytes < 1) { + return NULL; } - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->size - suffix_length - 1; - for (uint32_t key = max_key; key != min_key - 1; - key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - container_t *new_container; - uint8_t new_type; - - if (src >= 0 && ra->keys[src] == key) { - ra_unshare_container_at_index(ra, (uint16_t)src); - new_container = - container_add_range(ra->containers[src], ra->typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], ra->typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max + 1, 1); + const char *bufaschar = (const char *)buf; + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { + if (maxbytes < 1 + sizeof(uint32_t)) { + return NULL; } - ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key, - new_container, new_type); - dst--; - } -} -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, - uint32_t max) { - if (min > max) { - return; - } + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - roaring_array_t *ra = &r->high_low_container; + // Check the buffer is big enough to contain card uint32_t elements + if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) { + return NULL; + } - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key); - int32_t dst = src; - while (src < ra->size && ra->keys[src] <= max_key) { - uint32_t container_min = - (min_key == ra->keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = - (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(ra, (uint16_t)src); - container_t *new_container; - uint8_t new_type; - new_container = - container_remove_range(ra->containers[src], ra->typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->containers[src]) { - container_free(ra->containers[src], ra->typecodes[src]); + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; } - if (new_container) { - ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], - new_container, new_type); - dst++; + roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); } - src++; - } - if (src > dst) { - ra_shift_tail(ra, ra->size - src, dst - src); - } + return bitmap; + + } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, + maxbytes - 1); + } else + return (NULL); } -void roaring_bitmap_printf(const roaring_bitmap_t *r) { +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, + void *ptr) { const roaring_array_t *ra = &r->high_low_container; - printf("{"); - for (int i = 0; i < ra->size; ++i) { - container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - - if (i + 1 < ra->size) { - printf(","); + for (int i = 0; i < ra->size; ++i) + if (!container_iterate(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) { + return false; } - } - printf("}"); + return true; } -void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr) { const roaring_array_t *ra = &r->high_low_container; - printf("{"); - for (int i = 0; i < ra->size; ++i) { - printf("%d: %s (%d)", ra->keys[i], - get_full_container_name(ra->containers[i], ra->typecodes[i]), - container_get_cardinality(ra->containers[i], ra->typecodes[i])); - if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { - printf("(shared count = %" PRIu32 " )", - croaring_refcount_get( - &(CAST_shared(ra->containers[i])->counter))); + for (int i = 0; i < ra->size; ++i) + if (!container_iterate64(ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16, iterator, + high_bits, ptr)) { + return false; } + return true; +} - if (i + 1 < ra->size) { - printf(", "); - } +/**** + * begin roaring_uint32_iterator_t + *****/ + +/** + * Partially initializes the iterator. Leaves it in either state: + * 1. Invalid due to `has_value = false`, or + * 2. At a container, with the high bits set, `has_value = true`. + */ +CROARING_WARN_UNUSED static bool iter_new_container_partial_init( + roaring_uint32_iterator_t *newit) { + newit->current_value = 0; + if (newit->container_index >= newit->parent->high_low_container.size || + newit->container_index < 0) { + newit->current_value = UINT32_MAX; + return (newit->has_value = false); } - printf("}"); + newit->has_value = true; + // we precompute container, typecode and highbits so that successive + // iterators do not have to grab them from odd memory locations + // and have to worry about the (easily predicted) container_unwrap_shared + // call. + newit->container = + newit->parent->high_low_container.containers[newit->container_index]; + newit->typecode = + newit->parent->high_low_container.typecodes[newit->container_index]; + newit->highbits = + ((uint32_t) + newit->parent->high_low_container.keys[newit->container_index]) + << 16; + newit->container = + container_unwrap_shared(newit->container, &(newit->typecode)); + return true; } /** - * (For advanced users.) - * Collect statistics about the bitmap + * Positions the iterator at the first value of the current container that the + * iterator points at, if available. */ -void roaring_bitmap_statistics(const roaring_bitmap_t *r, - roaring_statistics_t *stat) { - const roaring_array_t *ra = &r->high_low_container; - - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->size; - stat->min_value = roaring_bitmap_minimum(r); - stat->max_value = roaring_bitmap_maximum(r); - - for (int i = 0; i < ra->size; ++i) { - uint8_t truetype = - get_container_type(ra->containers[i], ra->typecodes[i]); - uint32_t card = - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->containers[i], ra->typecodes[i]); - stat->cardinality += card; - switch (truetype) { - case BITSET_CONTAINER_TYPE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - roaring_unreachable; - } +CROARING_WARN_UNUSED static bool loadfirstvalue( + roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; } + return newit->has_value; } -/* - * Checks that: - * - Array containers are sorted and contain no duplicates - * - Range containers are sorted and contain no overlapping ranges - * - Roaring containers are sorted by key and there are no duplicate keys - * - The correct container type is use for each container (e.g. bitmaps aren't - * used for small containers) +/** + * Positions the iterator at the last value of the current container that the + * iterator points at, if available. */ -bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, - const char **reason) { - const char *reason_local; - if (reason == NULL) { - // Always allow assigning through *reason - reason = &reason_local; - } - *reason = NULL; - const roaring_array_t *ra = &r->high_low_container; - if (ra->size < 0) { - *reason = "negative size"; - return false; - } - if (ra->allocation_size < 0) { - *reason = "negative allocation size"; - return false; - } - if (ra->size > ra->allocation_size) { - *reason = "more containers than allocated space"; - return false; - } - if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { - *reason = "invalid flags"; - return false; - } - if (ra->size == 0) { - return true; +CROARING_WARN_UNUSED static bool loadlastvalue( + roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = container_init_iterator_last( + newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; } + return newit->has_value; +} - if (ra->keys == NULL) { - *reason = "keys is NULL"; - return false; - } - if (ra->typecodes == NULL) { - *reason = "typecodes is NULL"; - return false; - } - if (ra->containers == NULL) { - *reason = "containers is NULL"; +/** + * Positions the iterator at the smallest value that is larger than or equal to + * `val` within the current container that the iterator points at. Assumes such + * a value exists within the current container. + */ +CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal( + roaring_uint32_iterator_t *newit, uint32_t val) { + bool partial_init = iter_new_container_partial_init(newit); + assert(partial_init); + if (!partial_init) { return false; } - - uint32_t prev_key = ra->keys[0]; - for (int32_t i = 1; i < ra->size; ++i) { - if (ra->keys[i] <= prev_key) { - *reason = "keys not strictly increasing"; - return false; - } - prev_key = ra->keys[i]; - } - - for (int32_t i = 0; i < ra->size; ++i) { - if (!container_internal_validate(ra->containers[i], ra->typecodes[i], - reason)) { - // reason should already be set - if (*reason == NULL) { - *reason = "container failed to validate but no reason given"; - } - return false; - } + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + bool found = container_iterator_lower_bound( + newit->container, newit->typecode, &newit->container_it, &value, + val & 0xFFFF); + assert(found); + if (!found) { + return false; } - + newit->current_value = newit->highbits | value; return true; } -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - if (!ra_init_with_capacity( // allocation of list of containers can fail - &ans->high_low_container, r->high_low_container.size)) { - roaring_free(ans); - return NULL; - } - if (!ra_overwrite( // memory allocation of individual containers may fail - &r->high_low_container, &ans->high_low_container, is_cow(r))) { - roaring_bitmap_free(ans); // overwrite should leave in freeable state - return NULL; - } - roaring_bitmap_set_copy_on_write(ans, is_cow(r)); - return ans; +void roaring_iterator_init(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = 0; + newit->has_value = loadfirstvalue(newit); } -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - roaring_bitmap_set_copy_on_write(dest, is_cow(src)); - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - is_cow(src)); +void roaring_iterator_init_last(const roaring_bitmap_t *r, + roaring_uint32_iterator_t *newit) { + newit->parent = r; + newit->container_index = newit->parent->high_low_container.size - 1; + newit->has_value = loadlastvalue(newit); } -void roaring_bitmap_free(const roaring_bitmap_t *r) { - if (r == NULL) { - return; - } - if (!is_frozen(r)) { - ra_clear((roaring_array_t *)&r->high_low_container); - } - roaring_free((roaring_bitmap_t *)r); +roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc( + sizeof(roaring_uint32_iterator_t)); + if (newit == NULL) return NULL; + roaring_iterator_init(r, newit); + return newit; } -void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); +roaring_uint32_iterator_t *roaring_uint32_iterator_copy( + const roaring_uint32_iterator_t *it) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)roaring_malloc( + sizeof(roaring_uint32_iterator_t)); + memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); + return newit; } -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - roaring_array_t *ra = &r->high_low_container; - - const uint16_t hb = val >> 16; - const int i = ra_get_index(ra, hb); - uint8_t typecode; +bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val) { + uint16_t hb = val >> 16; + const int i = ra_get_index(&it->parent->high_low_container, hb); if (i >= 0) { - ra_unshare_container_at_index(ra, (uint16_t)i); - container_t *container = - ra_get_container_at_index(ra, (uint16_t)i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); + uint32_t lowvalue = + container_maximum(it->parent->high_low_container.containers[i], + it->parent->high_low_container.typecodes[i]); + uint16_t lb = val & 0xFFFF; + if (lowvalue < lb) { + // will have to load first value of next container + it->container_index = i + 1; + } else { + // the value is necessarily within the range of the container + it->container_index = i; + it->has_value = loadfirstvalue_largeorequal(it, val); + return it->has_value; } } else { - array_container_t *newac = array_container_create(); - container_t *container = - container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); + // there is no matching, so we are going for the next container + it->container_index = -i - 1; } + it->has_value = loadfirstvalue(it); + return it->has_value; } -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); - - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - container_t *container = - container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; +bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) { + if (it->container_index >= it->parent->high_low_container.size) { + return (it->has_value = false); } - - return result; + if (it->container_index < 0) { + it->container_index = 0; + return (it->has_value = loadfirstvalue(it)); + } + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_next(it->container, it->typecode, &it->container_it, + &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); + } + it->container_index++; + return (it->has_value = loadfirstvalue(it)); } -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_nonzero_cardinality(container2, newtypecode)) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } +bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) { + if (it->container_index < 0) { + return (it->has_value = false); + } + if (it->container_index >= it->parent->high_low_container.size) { + it->container_index = it->parent->high_low_container.size - 1; + return (it->has_value = loadlastvalue(it)); + } + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_prev(it->container, it->typecode, &it->container_it, + &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); } + it->container_index--; + return (it->has_value = loadlastvalue(it)); } -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i); - container_t *container = ra_get_container_at_index( - &r->high_low_container, (uint16_t)i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - container_t *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - - const int newCardinality = - container_get_cardinality(container2, newtypecode); - - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); +uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, + uint32_t *buf, uint32_t count) { + uint32_t ret = 0; + while (it->has_value && ret < count) { + uint32_t consumed; + uint16_t low16 = (uint16_t)it->current_value; + bool has_value = container_iterator_read_into_uint32( + it->container, it->typecode, &it->container_it, it->highbits, buf, + count - ret, &consumed, &low16); + ret += consumed; + buf += consumed; + if (has_value) { + it->has_value = true; + it->current_value = it->highbits | low16; + assert(ret == count); + return ret; } - - result = oldCardinality != newCardinality; + it->container_index++; + it->has_value = loadfirstvalue(it); } - return result; + return ret; } -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; +void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) { + roaring_free(it); +} + +/**** + * end of roaring_uint32_iterator_t + *****/ + +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; + + if (ra1->size != ra2->size) { + return false; } - int32_t pos = - -1; // position of the container used in the previous iteration - for (size_t i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); + for (int i = 0; i < ra1->size; ++i) { + if (ra1->keys[i] != ra2->keys[i]) { + return false; } - if (pos >= 0) { - uint8_t new_typecode; - container_t *new_container; - new_container = container_remove( - r->high_low_container.containers[pos], vals[i] & 0xffff, - r->high_low_container.typecodes[pos], &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); - } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; - } + } + for (int i = 0; i < ra1->size; ++i) { + bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i], + ra2->containers[i], ra2->typecodes[i]); + if (!areequal) { + return false; } } + return true; } -// there should be some SIMD optimizations possible here -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + const roaring_array_t *ra1 = &r1->high_low_container; + const roaring_array_t *ra2 = &r2->high_low_container; + + const int length1 = ra1->size, length2 = ra2->size; int pos1 = 0, pos2 = 0; while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1); + const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2); if (s1 == s2) { uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_and(c1, type1, c2, type2, &result_type); - - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); // otherwise: memory leak! - } + container_t *c1 = + ra_get_container_at_index(ra1, (uint16_t)pos1, &type1); + container_t *c2 = + ra_get_container_at_index(ra2, (uint16_t)pos2, &type2); + if (!container_is_subset(c1, type1, c2, type2)) return false; ++pos1; ++pos2; } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + return false; } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + pos2 = ra_advance_until(ra2, s1, pos2); } } - return answer; + if (pos1 == length1) + return true; + else + return false; } -/** - * Compute the union of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; -} +static void insert_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); -/** - * Compute the xor of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); } - roaring_bitmap_repair_after_lazy(answer); - return answer; } -// inplace and (modifies its first argument). -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); +static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = container_inot_range( + container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + // if a new container was created, the old one was already freed + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); + } +} - if (s1 == s2) { - uint8_t type1, type2, result_type; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); +static void insert_fully_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, + uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_not(container_to_flip, ctype_in, &ctype_out); + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + } +} - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_and(c1, type1, c2, type2, &result_type) - : container_iand(c1, type1, c2, type2, &result_type); +static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + container_t *flipped_container = NULL; + if (i >= 0) { + container_t *container_to_flip = + ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); + flipped_container = + container_inot(container_to_flip, ctype_in, &ctype_out); - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, type1); - } - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - result_type); - intersection_size++; - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); } - } - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); } - - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); } -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { return roaring_bitmap_copy(x1); } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_or(c1, type1, c2, type2, &result_type); - - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - // c1 = container_clone(c1, type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // c2 = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; + return roaring_bitmap_flip_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); } -// inplace or (modifies its first argument). -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; +roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return roaring_bitmap_copy(x1); + } - if (0 == length2) return; + roaring_bitmap_t *ans = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - if (!container_is_full(c1, type1)) { - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, (uint16_t)pos2, &type2); - container_t *c = - (type1 == SHARED_CONTAINER_TYPE) - ? container_or(c1, type1, c2, type2, &result_type) - : container_ior(c1, type1, c2, type2, &result_type); + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; // & 0xFFFF; - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, + hb_start, is_cow(x1)); + if (hb_start == hb_end) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, + lb_start, 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + insert_fully_flipped_container(&ans->high_low_container, + &x1->high_low_container, + (uint16_t)hb); + } - // container_t *c2_clone = container_clone(c2, type2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + // handle a partial final container + if (lb_end != 0xFFFF) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; } } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } + ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, + hb_end, is_cow(x1)); + return ans; } -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return; } - if (0 == length2) { - return roaring_bitmap_copy(x1); + roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return; // empty range } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = container_xor(c1, type1, c2, type2, &result_type); - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; + + if (hb_start == hb_end) { + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); - } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + if (lb_end != 0xFFFF) --hb_end; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb); + } + // handle a partial final container + if (lb_end != 0xFFFF) { + inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; } } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; } -// inplace xor (modifies its first argument). +static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, + uint8_t t) { + int size = ra_get_size(ra); + if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) { + // No merge. + ra_append(ra, (uint16_t)k, c, t); + return; + } -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; + uint8_t last_t, new_t; + container_t *last_c, *new_c; - if (0 == length2) return; + // NOTE: we don't need to unwrap here, since we added last_c ourselves + // we have the certainty it's not a shared container. + // The same applies to c, as it's the result of calling container_offset. + last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t); + new_c = container_ior(last_c, last_t, c, t, &new_t); - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; + ra_set_container_at_index(ra, size - 1, new_c, new_t); + + // Comparison of pointers of different origin is UB (or so claim some + // compiler makers), so we compare their bit representation only. + if ((uintptr_t)last_c != (uintptr_t)new_c) { + container_free(last_c, last_t); } + container_free(c, t); +} - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. +// roaring_bitmap_add_offset adds the value 'offset' to each and every value in +// a bitmap, generating a new bitmap in the process. If offset + element is +// outside of the range [0,2^32), that the element will be dropped. +// We need "offset" to be 64 bits because we want to support values +// between -0xFFFFFFFF up to +0xFFFFFFFF. +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset) { + roaring_bitmap_t *answer; + roaring_array_t *ans_ra; + int64_t container_offset; + uint16_t in_offset; - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); + const roaring_array_t *bm_ra = &bm->high_low_container; + int length = bm_ra->size; - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. + if (offset == 0) { + return roaring_bitmap_copy(bm); + } - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // so release - } else { - c = container_ixor(c1, type1, c2, type2, &result_type); - } + container_offset = offset >> 16; + in_offset = (uint16_t)(offset - container_offset * (1 << 16)); - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } + answer = roaring_bitmap_create(); + bool cow = is_cow(bm); + roaring_bitmap_set_copy_on_write(answer, cow); - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + ans_ra = &answer->high_low_container; - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + if (in_offset == 0) { + ans_ra = &answer->high_low_container; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } + for (int i = 0, j = 0; i < length; ++i) { + int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i); + key += container_offset; - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + if (key < 0 || key >= (1 << 16)) { + continue; + } + ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow); + ans_ra->keys[j++] = (uint16_t)key; } + return answer; } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(empty_bitmap, - is_cow(x1) || is_cow(x2)); - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + uint8_t t; + const container_t *c; + container_t *lo, *hi, **lo_ptr, **hi_ptr; + int64_t k; - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + for (int i = 0; i < length; ++i) { + lo = hi = NULL; + lo_ptr = hi_ptr = NULL; - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = - container_andnot(c1, type1, c2, type2, &result_type); + k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset; + if (k >= 0 && k < (1 << 16)) { + lo_ptr = &lo; + } + if (k + 1 >= 0 && k + 1 < (1 << 16)) { + hi_ptr = &hi; + } + if (lo_ptr == NULL && hi_ptr == NULL) { + continue; + } + c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t); + c = container_unwrap_shared(c, &t); - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); - } else { - container_free(c, result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - is_cow(x1)); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; + container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); + if (lo != NULL) { + offset_append_with_merge(ans_ra, (int)k, lo, t); } + if (hi != NULL) { + ra_append(ans_ra, (uint16_t)(k + 1), hi, t); + } + // the `lo` and `hi` container type always keep same as container `c`. + // in the case of `container_add_offset` on bitset container, `lo` and + // `hi` may has small cardinality, they must be repaired to array + // container. } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } + + roaring_bitmap_repair_after_lazy(answer); // do required type conversions. return answer; } -// inplace andnot (modifies its first argument). - -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; - - if (0 == length2) return; - + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; if (0 == length1) { - roaring_bitmap_clear(x1); - return; + return roaring_bitmap_copy(x2); } - + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); int pos1 = 0, pos2 = 0; uint8_t type1, type2; uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); @@ -22261,29 +22148,26 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, (uint16_t)pos1, &type1); container_t *c2 = ra_get_container_at_index(&x2->high_low_container, (uint16_t)pos2, &type2); - - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_andnot(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } else { - c = container_iandnot(c1, type1, c2, type2, &result_type); - } - - if (container_nonzero_cardinality(c, result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, result_type); + if (bitsetconversion && + (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && + (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) { + container_t *newc1 = + container_mutable_unwrap_shared(c1, &type1); + newc1 = container_to_bitset(newc1, type1); + type1 = BITSET_CONTAINER_TYPE; + c = container_lazy_ior(newc1, type1, c2, type2, &result_type); + if (c != newc1) { // should not happen + container_free(newc1, type1); + } } else { - container_free(c, result_type); + c = container_lazy_or(c1, type1, c2, type2, &result_type); } - + // since we assume that the initial containers are non-empty, + // the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); ++pos1; ++pos2; if (pos1 == length1) break; @@ -22292,4833 +22176,4959 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - container_t *c1 = ra_get_container_at_index( - &x1->high_low_container, (uint16_t)pos1, &type1); - - ra_replace_key_and_container_at_index( - &x1->high_low_container, intersection_size, s1, c1, type1); + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); } - intersection_size++; + ra_append(&answer->high_low_container, s1, c1, type1); pos1++; if (pos1 == length1) break; s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - } - } - - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); - } - ra_downsize(&x1->high_low_container, intersection_size); -} - -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { - const roaring_array_t *ra = &r->high_low_container; - - uint64_t card = 0; - for (int i = 0; i < ra->size; ++i) - card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); - return card; -} - -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return 0; - } - return roaring_bitmap_range_cardinality_closed(r, (uint32_t)range_start, - (uint32_t)(range_end - 1)); -} - -uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end) { - const roaring_array_t *ra = &r->high_low_container; - - if (range_start > range_end) { - return 0; - } - - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - uint16_t minhb = (uint16_t)(range_start >> 16); - uint16_t maxhb = (uint16_t)(range_end >> 16); - - uint64_t card = 0; - - int i = ra_get_index(ra, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); - } else { - card += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->containers[i], ra->typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } - - for (; i < ra->size; i++) { - uint16_t key = ra->keys[i]; - if (key < maxhb) { - card += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->containers[i], ra->typecodes[i], - range_end & 0xffff); - break; - } else { - break; - } - } - - return card; -} - -bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { - return r->high_low_container.size == 0; -} - -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { - ra_to_uint32_array(&r->high_low_container, ans); -} - -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset, - size_t limit, uint32_t *ans) { - return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); -} - -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - */ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - ra_unshare_container_at_index( - &r->high_low_container, - (uint16_t)i); // TODO: this introduces extra cloning! - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - container_t *c1 = convert_run_optimize(c, type_original, &type_after); - if (type_after == RUN_CONTAINER_TYPE) { - answer = true; - } - ra_set_container_at_index(&r->high_low_container, i, c1, type_after); - } - return answer; -} - -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original; - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - answer += container_shrink_to_fit(c, type_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; -} - -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t type_original, type_after; - container_t *c = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &type_original); - if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { - answer = true; - if (type_original == SHARED_CONTAINER_TYPE) { - run_container_t *truec = CAST_run(CAST_shared(c)->container); - int32_t card = run_container_cardinality(truec); - container_t *c1 = convert_to_bitset_or_array_container( - truec, card, &type_after); - shared_container_free(CAST_shared(c)); // frees run as needed - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); - - } else { - int32_t card = run_container_cardinality(CAST_run(c)); - container_t *c1 = convert_to_bitset_or_array_container( - CAST_run(c), card, &type_after); - run_container_free(CAST_run(c)); - ra_set_container_at_index(&r->high_low_container, i, c1, - type_after); + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } return answer; } -size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t cardinality = roaring_bitmap_get_cardinality(r); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = CROARING_SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(r, buf + 1) + 1; - } else { - buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; } -} + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + if (!container_is_full(c1, type1)) { + if ((bitsetconversion == false) || + (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) { + c1 = get_writable_copy_if_shared(c1, &type1); + } else { + // convert to bitset + container_t *old_c1 = c1; + uint8_t old_type1 = type1; + c1 = container_mutable_unwrap_shared(c1, &type1); + c1 = container_to_bitset(c1, type1); + container_free(old_c1, old_type1); + type1 = BITSET_CONTAINER_TYPE; + } -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); - uint64_t sizeasarray = - roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 - : (size_t)sizeasarray + 1; -} + container_t *c2 = ra_get_container_at_index( + &x2->high_low_container, (uint16_t)pos2, &type2); + container_t *c = + container_lazy_ior(c1, type1, c2, type2, &result_type); -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { - return ra_portable_size_in_bytes(&r->high_low_container); -} + if (c != c1) { // in this instance a new container was created, + // and we need to free the old one + container_free(c1, type1); + } -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, - size_t maxbytes) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, - maxbytes, &bytesread); - if (!is_ok) { - roaring_free(ans); - return NULL; - } - roaring_bitmap_set_copy_on_write(ans, false); - if (!is_ok) { - roaring_free(ans); - return NULL; - } - return ans; -} + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); -} + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); -size_t roaring_bitmap_portable_deserialize_size(const char *buf, - size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } } -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) { - return ra_portable_serialize(&r->high_low_container, buf); -} +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + container_t *c = + container_lazy_xor(c1, type1, c2, type2, &result_type); -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; + if (container_nonzero_cardinality(c, result_type)) { + ra_append(&answer->high_low_container, s1, c, result_type); + } else { + container_free(c, result_type); + } - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + c1 = get_copy_of_container(c1, &type1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + type1); + } + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - roaring_bitmap_t *bitmap = roaring_bitmap_create(); - if (bitmap == NULL) { - return NULL; - } - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (uint32_t i = 0; i < card; i++) { - // elems may not be aligned, read with memcpy - uint32_t elem; - memcpy(&elem, elems + i, sizeof(elem)); - roaring_bitmap_add_bulk(bitmap, &context, elem); + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - return bitmap; - - } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); -} - -roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, - size_t maxbytes) { - if (maxbytes < 1) { - return NULL; } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} - const char *bufaschar = (const char *)buf; - if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { - if (maxbytes < 1 + sizeof(uint32_t)) { - return NULL; - } - - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - - // Check the buffer is big enough to contain card uint32_t elements - if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) { - return NULL; - } +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + if (0 == length2) return; - roaring_bitmap_t *bitmap = roaring_bitmap_create(); - if (bitmap == NULL) { - return NULL; - } - roaring_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (uint32_t i = 0; i < card; i++) { - // elems may not be aligned, read with memcpy - uint32_t elem; - memcpy(&elem, elems + i, sizeof(elem)); - roaring_bitmap_add_bulk(bitmap, &context, elem); - } - return bitmap; + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); - } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, - maxbytes - 1); - } else - return (NULL); -} + // We do the computation "in place" only when c1 is not a shared + // container. Rationale: using a shared container safely with in + // place computation would require making a copy and then doing the + // computation in place which is likely less efficient than avoiding + // in place entirely and always generating a new container. -bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, - void *ptr) { - const roaring_array_t *ra = &r->high_low_container; + container_t *c; + if (type1 == SHARED_CONTAINER_TYPE) { + c = container_lazy_xor(c1, type1, c2, type2, &result_type); + shared_container_free(CAST_shared(c1)); // release + } else { + c = container_lazy_ixor(c1, type1, c2, type2, &result_type); + } - for (int i = 0; i < ra->size; ++i) - if (!container_iterate(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) { - return false; - } - return true; -} + if (container_nonzero_cardinality(c, result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + result_type); + ++pos1; + } else { + container_free(c, result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); -bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - const roaring_array_t *ra = &r->high_low_container; + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - for (int i = 0; i < ra->size; ++i) - if (!container_iterate64(ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + // container_t *c2_clone = container_clone(c2, type2); + c2 = get_copy_of_container(c2, &type2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + type2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + type2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - return true; -} - -/**** - * begin roaring_uint32_iterator_t - *****/ - -/** - * Partially initializes the iterator. Leaves it in either state: - * 1. Invalid due to `has_value = false`, or - * 2. At a container, with the high bits set, `has_value = true`. - */ -CROARING_WARN_UNUSED static bool iter_new_container_partial_init( - roaring_uint32_iterator_t *newit) { - newit->current_value = 0; - if (newit->container_index >= newit->parent->high_low_container.size || - newit->container_index < 0) { - newit->current_value = UINT32_MAX; - return (newit->has_value = false); } - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - return true; -} - -/** - * Positions the iterator at the first value of the current container that the - * iterator points at, if available. - */ -CROARING_WARN_UNUSED static bool loadfirstvalue( - roaring_uint32_iterator_t *newit) { - if (iter_new_container_partial_init(newit)) { - uint16_t value = 0; - newit->container_it = - container_init_iterator(newit->container, newit->typecode, &value); - newit->current_value = newit->highbits | value; + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); } - return newit->has_value; } -/** - * Positions the iterator at the last value of the current container that the - * iterator points at, if available. - */ -CROARING_WARN_UNUSED static bool loadlastvalue( - roaring_uint32_iterator_t *newit) { - if (iter_new_container_partial_init(newit)) { - uint16_t value = 0; - newit->container_it = container_init_iterator_last( - newit->container, newit->typecode, &value); - newit->current_value = newit->highbits | value; +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { + roaring_array_t *ra = &r->high_low_container; + + for (int i = 0; i < ra->size; ++i) { + const uint8_t old_type = ra->typecodes[i]; + container_t *old_c = ra->containers[i]; + uint8_t new_type = old_type; + container_t *new_c = container_repair_after_lazy(old_c, &new_type); + ra->containers[i] = new_c; + ra->typecodes[i] = new_type; } - return newit->has_value; } /** - * Positions the iterator at the smallest value that is larger than or equal to - * `val` within the current container that the iterator points at. Assumes such - * a value exists within the current container. + * roaring_bitmap_rank returns the number of integers that are smaller or equal + * to x. */ -CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal( - roaring_uint32_iterator_t *newit, uint32_t val) { - bool partial_init = iter_new_container_partial_init(newit); - assert(partial_init); - if (!partial_init) { - return false; - } - uint16_t value = 0; - newit->container_it = - container_init_iterator(newit->container, newit->typecode, &value); - bool found = container_iterator_lower_bound( - newit->container, newit->typecode, &newit->container_it, &value, - val & 0xFFFF); - assert(found); - if (!found) { - return false; - } - newit->current_value = newit->highbits | value; - return true; -} - -void roaring_iterator_init(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); -} - -void roaring_iterator_init_last(const roaring_bitmap_t *r, - roaring_uint32_iterator_t *newit) { - newit->parent = r; - newit->container_index = newit->parent->high_low_container.size - 1; - newit->has_value = loadlastvalue(newit); -} - -roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)roaring_malloc( - sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_iterator_init(r, newit); - return newit; +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { + uint64_t size = 0; + uint32_t xhigh = x >> 16; + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + return size + container_rank(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], + x & 0xFFFF); + } else { + return size; + } + } + return size; } +void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin, + const uint32_t *end, uint64_t *ans) { + uint64_t size = 0; -roaring_uint32_iterator_t *roaring_uint32_iterator_copy( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)roaring_malloc( - sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; + int i = 0; + const uint32_t *iter = begin; + while (i < bm->high_low_container.size && iter != end) { + uint32_t x = *iter; + uint32_t xhigh = x >> 16; + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + i++; + } else if (xhigh == key) { + uint32_t consumed = container_rank_many( + bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], size, iter, end, ans); + iter += consumed; + ans += consumed; + } else { + *(ans++) = size; + iter++; + } + } } -bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, - uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(&it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = - container_maximum(it->parent->high_low_container.containers[i], - it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if (lowvalue < lb) { - // will have to load first value of next container - it->container_index = i + 1; +/** + * roaring_bitmap_get_index returns the index of x, if not exsist return -1. + */ +int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) { + int64_t index = 0; + const uint16_t xhigh = x >> 16; + int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh); + if (high_idx < 0) return -1; + + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + index += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + int32_t low_idx = container_get_index( + bm->high_low_container.containers[high_idx], + bm->high_low_container.typecodes[high_idx], x & 0xFFFF); + if (low_idx < 0) return -1; + return index + low_idx; } else { - // the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; + return -1; } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i - 1; } - it->has_value = loadfirstvalue(it); - return it->has_value; + return index; } -bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - if (it->container_index < 0) { - it->container_index = 0; - return (it->has_value = loadfirstvalue(it)); - } - uint16_t low16 = (uint16_t)it->current_value; - if (container_iterator_next(it->container, it->typecode, &it->container_it, - &low16)) { - it->current_value = it->highbits | low16; - return (it->has_value = true); +/** + * roaring_bitmap_smallest returns the smallest value in the set. + * Returns UINT32_MAX if the set is empty. + */ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *c = bm->high_low_container.containers[0]; + uint8_t type = bm->high_low_container.typecodes[0]; + uint32_t key = bm->high_low_container.keys[0]; + uint32_t lowvalue = container_minimum(c, type); + return lowvalue | (key << 16); } - it->container_index++; - return (it->has_value = loadfirstvalue(it)); + return UINT32_MAX; } -bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) { - if (it->container_index < 0) { - return (it->has_value = false); - } - if (it->container_index >= it->parent->high_low_container.size) { - it->container_index = it->parent->high_low_container.size - 1; - return (it->has_value = loadlastvalue(it)); - } - uint16_t low16 = (uint16_t)it->current_value; - if (container_iterator_prev(it->container, it->typecode, &it->container_it, - &low16)) { - it->current_value = it->highbits | low16; - return (it->has_value = true); +/** + * roaring_bitmap_smallest returns the greatest value in the set. + * Returns 0 if the set is empty. + */ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + container_t *container = + bm->high_low_container.containers[bm->high_low_container.size - 1]; + uint8_t typecode = + bm->high_low_container.typecodes[bm->high_low_container.size - 1]; + uint32_t key = + bm->high_low_container.keys[bm->high_low_container.size - 1]; + uint32_t lowvalue = container_maximum(container, typecode); + return lowvalue | (key << 16); } - it->container_index--; - return (it->has_value = loadlastvalue(it)); + return 0; } -uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, - uint32_t *buf, uint32_t count) { - uint32_t ret = 0; - while (it->has_value && ret < count) { - uint32_t consumed; - uint16_t low16 = (uint16_t)it->current_value; - bool has_value = container_iterator_read_into_uint32( - it->container, it->typecode, &it->container_it, it->highbits, buf, - count - ret, &consumed, &low16); - ret += consumed; - buf += consumed; - if (has_value) { - it->has_value = true; - it->current_value = it->highbits | low16; - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); +bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, + uint32_t *element) { + container_t *container; + uint8_t typecode; + uint16_t key; + uint32_t start_rank = 0; + int i = 0; + bool valid = false; + while (!valid && i < bm->high_low_container.size) { + container = bm->high_low_container.containers[i]; + typecode = bm->high_low_container.typecodes[i]; + valid = + container_select(container, typecode, &start_rank, rank, element); + i++; } - return ret; -} -void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) { - roaring_free(it); + if (valid) { + key = bm->high_low_container.keys[i - 1]; + *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed + return true; + } else + return false; } -/**** - * end of roaring_uint32_iterator_t - *****/ +bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; -bool roaring_bitmap_equals(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - if (ra1->size != ra2->size) { + if (s1 == s2) { + uint8_t type1, type2; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + if (container_intersect(c1, type1, c2, type2)) return true; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer != 0; +} + +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, + uint64_t y) { + if (x >= y) { + // Empty range. return false; } - for (int i = 0; i < ra1->size; ++i) { - if (ra1->keys[i] != ra2->keys[i]) { - return false; - } + roaring_uint32_iterator_t it; + roaring_iterator_init(bm, &it); + if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) { + // No values above x. + return false; } - for (int i = 0; i < ra1->size; ++i) { - bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i], - ra2->containers[i], ra2->typecodes[i]); - if (!areequal) { - return false; - } + if (it.current_value >= y) { + // No values below y. + return false; } return true; } -bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - const roaring_array_t *ra1 = &r1->high_low_container; - const roaring_array_t *ra2 = &r2->high_low_container; - - const int length1 = ra1->size, length2 = ra2->size; - +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1); - const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2); + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = + ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + const uint16_t s2 = + ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); if (s1 == s2) { uint8_t type1, type2; - container_t *c1 = - ra_get_container_at_index(ra1, (uint16_t)pos1, &type1); - container_t *c2 = - ra_get_container_at_index(ra2, (uint16_t)pos2, &type2); - if (!container_is_subset(c1, type1, c2, type2)) return false; + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + answer += container_and_cardinality(c1, type1, c2, type2); ++pos1; ++pos2; } else if (s1 < s2) { // s1 < s2 - return false; + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); } else { // s1 > s2 - pos2 = ra_advance_until(ra2, s1, pos2); + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); } } - if (pos1 == length1) - return true; - else - return false; + return answer; } -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return (double)inter / (double)(c1 + c2 - inter); } -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - inter; } -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 - inter; } -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - container_t *flipped_container = NULL; - if (i >= 0) { - container_t *container_to_flip = - ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - 2 * inter; +} - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + /* + * the next function call involves a binary search and lots of branching. + */ + int32_t i = ra_get_index(&r->high_low_container, hb); + if (i < 0) return false; - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } + uint8_t typecode; + // next call ought to be cheap + container_t *container = ra_get_container_at_index(&r->high_low_container, + (uint16_t)i, &typecode); + // rest might be a tad expensive, possibly involving another round of binary + // search + return container_contains(container, val & 0xFFFF, typecode); } -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { +/** + * Check whether a range of values from range_start (included) to range_end + * (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, + uint64_t range_start, uint64_t range_end) { if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return roaring_bitmap_copy(x1); + return true; } - return roaring_bitmap_flip_closed(x1, (uint32_t)range_start, - (uint32_t)(range_end - 1)); + return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); } -roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, - uint32_t range_start, - uint32_t range_end) { +/** + * Check whether a range of values from range_start (included) to range_end + * (included) is present + */ +bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { if (range_start > range_end) { - return roaring_bitmap_copy(x1); + return true; + } // empty range are always contained! + if (range_end == range_start) { + return roaring_bitmap_contains(r, (uint32_t)range_start); } - - roaring_bitmap_t *ans = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)(range_end >> 16); - const uint16_t lb_end = (uint16_t)range_end; // & 0xFFFF; - - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, is_cow(x1)); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. + uint16_t hb_rs = (uint16_t)(range_start >> 16); + uint16_t hb_re = (uint16_t)(range_end >> 16); + const int32_t span = hb_re - hb_rs; + const int32_t hlc_sz = ra_get_size(&r->high_low_container); + if (hlc_sz < span + 1) { + return false; + } + int32_t is = ra_get_index(&r->high_low_container, hb_rs); + int32_t ie = ra_get_index(&r->high_low_container, hb_re); + if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) { + return false; + } + const uint32_t lb_rs = range_start & 0xFFFF; + const uint32_t lb_re = (range_end & 0xFFFF) + 1; + uint8_t type; + container_t *c = + ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); + if (hb_rs == hb_re) { + return container_contains_range(c, lb_rs, lb_re, type); + } + if (!container_contains_range(c, lb_rs, 1 << 16, type)) { + return false; + } + c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type); + if (!container_contains_range(c, 0, lb_re, type)) { + return false; + } + for (int32_t i = is + 1; i < ie; ++i) { + c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i, + &type); + if (!container_is_full(c, type)) { + return false; } + } + return true; +} - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, - (uint16_t)hb); - } +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2) { + return (roaring_bitmap_get_cardinality(r2) > + roaring_bitmap_get_cardinality(r1) && + roaring_bitmap_is_subset(r1, r2)); +} - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; +/* + * FROZEN SERIALIZATION FORMAT DESCRIPTION + * + * -- (beginning must be aligned by 32 bytes) -- + * uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * + * num_bitset_containers] rle16_t[total number of rle elements in + * all run containers] uint16_t[total number of array elements in + * all array containers] uint16_t[num_containers] + * uint16_t[num_containers] uint8_t[num_containers]
+ * uint32_t + * + *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) + * and the number of containers (17 bits). + * + * stores number of elements for every container. + * Its meaning depends on container type. + * For array and bitset containers, this value is the container cardinality + * minus one. For run container, it is the number of rle_t elements (n_runs). + * + * ,, are flat arrays of elements of + * all containers of respective type. + * + * <*_data> and are kept close together because they are not accessed + * during deserilization. This may reduce IO in case of large mmaped bitmaps. + * All members have their native alignments during deserilization except + *
, which is not guaranteed to be aligned by 4 bytes. + */ + +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { + const roaring_array_t *ra = &rb->high_low_container; + size_t num_bytes = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + num_bytes += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + num_bytes += ac->cardinality * sizeof(uint16_t); + break; + } + default: + roaring_unreachable; } } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, is_cow(x1)); - return ans; + num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes + num_bytes += 4; // header + return num_bytes; } -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return; - } - roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start, - (uint32_t)(range_end - 1)); +inline static void *arena_alloc(char **arena, size_t num_bytes) { + char *res = *arena; + *arena += num_bytes; + return res; } -void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *x1, - uint32_t range_start, - uint32_t range_end) { - if (range_start > range_end) { - return; // empty range - } +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { + /* + * Note: we do not require user to supply a specifically aligned buffer. + * Thus we have to use memcpy() everywhere. + */ - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)(range_end >> 16); - const uint16_t lb_end = (uint16_t)range_end; + const roaring_array_t *ra = &rb->high_low_container; - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + run_zone_size += rc->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + array_zone_size += ac->cardinality * sizeof(uint16_t); + break; + } + default: + roaring_unreachable; } + } - if (lb_end != 0xFFFF) --hb_end; + uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); + rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); + uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); + uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); + uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); + uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); + uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; + for (int32_t i = 0; i < ra->size; i++) { + uint16_t count; + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = + const_CAST_bitset(ra->containers[i]); + memcpy(bitset_zone, bc->words, + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { + count = (uint16_t)(bc->cardinality - 1); + } else { + count = + (uint16_t)(bitset_container_compute_cardinality(bc) - + 1); + } + break; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(ra->containers[i]); + size_t num_bytes = rc->n_runs * sizeof(rle16_t); + memcpy(run_zone, rc->runs, num_bytes); + run_zone += rc->n_runs; + count = (uint16_t)rc->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = + const_CAST_array(ra->containers[i]); + size_t num_bytes = ac->cardinality * sizeof(uint16_t); + memcpy(array_zone, ac->array, num_bytes); + array_zone += ac->cardinality; + count = (uint16_t)(ac->cardinality - 1); + break; + } + default: + roaring_unreachable; } + memcpy(&count_zone[i], &count, 2); } + memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); + memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); + uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; + memcpy(header_zone, &header, 4); } -static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, - uint8_t t) { - int size = ra_get_size(ra); - if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) { - // No merge. - ra_append(ra, (uint16_t)k, c, t); - return; +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, + size_t length) { + if ((uintptr_t)buf % 32 != 0) { + return NULL; } - uint8_t last_t, new_t; - container_t *last_c, *new_c; - - // NOTE: we don't need to unwrap here, since we added last_c ourselves - // we have the certainty it's not a shared container. - // The same applies to c, as it's the result of calling container_offset. - last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t); - new_c = container_ior(last_c, last_t, c, t, &new_t); - - ra_set_container_at_index(ra, size - 1, new_c, new_t); - - // Comparison of pointers of different origin is UB (or so claim some - // compiler makers), so we compare their bit representation only. - if ((uintptr_t)last_c != (uintptr_t)new_c) { - container_free(last_c, last_t); + // cookie and num_containers + if (length < 4) { + return NULL; } - container_free(c, t); -} - -// roaring_bitmap_add_offset adds the value 'offset' to each and every value in -// a bitmap, generating a new bitmap in the process. If offset + element is -// outside of the range [0,2^32), that the element will be dropped. -// We need "offset" to be 64 bits because we want to support values -// between -0xFFFFFFFF up to +0xFFFFFFFF. -roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, - int64_t offset) { - roaring_bitmap_t *answer; - roaring_array_t *ans_ra; - int64_t container_offset; - uint16_t in_offset; + uint32_t header; + memcpy(&header, buf + length - 4, 4); // header may be misaligned + if ((header & 0x7FFF) != FROZEN_COOKIE) { + return NULL; + } + int32_t num_containers = (header >> 15); - const roaring_array_t *bm_ra = &bm->high_low_container; - int length = bm_ra->size; + // typecodes, counts and keys + if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { + return NULL; + } + uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); + uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); + uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); - if (offset == 0) { - return roaring_bitmap_copy(bm); + // {bitset,array,run}_zone + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: + num_bitset_containers++; + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + case RUN_CONTAINER_TYPE: + num_run_containers++; + run_zone_size += counts[i] * sizeof(rle16_t); + break; + case ARRAY_CONTAINER_TYPE: + num_array_containers++; + array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); + break; + default: + return NULL; + } } + if (length != bitset_zone_size + run_zone_size + array_zone_size + + 5 * num_containers + 4) { + return NULL; + } + uint64_t *bitset_zone = (uint64_t *)(buf); + rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size); + uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size); - container_offset = offset >> 16; - in_offset = (uint16_t)(offset - container_offset * (1 << 16)); - - answer = roaring_bitmap_create(); - bool cow = is_cow(bm); - roaring_bitmap_set_copy_on_write(answer, cow); - - ans_ra = &answer->high_low_container; - - if (in_offset == 0) { - ans_ra = &answer->high_low_container; + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t *); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); - for (int i = 0, j = 0; i < length; ++i) { - int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i); - key += container_offset; + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; + } - if (key < 0 || key >= (1 << 16)) { - continue; + roaring_bitmap_t *rb = + (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.keys = (uint16_t *)keys; + rb->high_low_container.typecodes = (uint8_t *)typecodes; + rb->high_low_container.containers = (container_t **)arena_alloc( + &arena, sizeof(container_t *) * num_containers); + // Ensure offset of high_low_container.containers is known distance used in + // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the + // only allocation that precedes high_low_container.containers. If this is + // changed (new allocation or changed order), this offset will also need to + // be changed in the C++ wrapper. + assert(rb == + (roaring_bitmap_t *)((char *)rb->high_low_container.containers - + sizeof(roaring_bitmap_t))); + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)arena_alloc( + &arena, sizeof(bitset_container_t)); + bitset->words = bitset_zone; + bitset->cardinality = counts[i] + UINT32_C(1); + rb->high_low_container.containers[i] = bitset; + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + break; } - ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow); - ans_ra->keys[j++] = (uint16_t)key; + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)arena_alloc( + &arena, sizeof(run_container_t)); + run->capacity = counts[i]; + run->n_runs = counts[i]; + run->runs = run_zone; + rb->high_low_container.containers[i] = run; + run_zone += run->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)arena_alloc( + &arena, sizeof(array_container_t)); + array->capacity = counts[i] + UINT32_C(1); + array->cardinality = counts[i] + UINT32_C(1); + array->array = array_zone; + rb->high_low_container.containers[i] = array; + array_zone += counts[i] + UINT32_C(1); + break; + } + default: + roaring_free(arena); + return NULL; } - return answer; } - uint8_t t; - const container_t *c; - container_t *lo, *hi, **lo_ptr, **hi_ptr; - int64_t k; - - for (int i = 0; i < length; ++i) { - lo = hi = NULL; - lo_ptr = hi_ptr = NULL; + return rb; +} - k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset; - if (k >= 0 && k < (1 << 16)) { - lo_ptr = &lo; - } - if (k + 1 >= 0 && k + 1 < (1 << 16)) { - hi_ptr = &hi; - } - if (lo_ptr == NULL && hi_ptr == NULL) { - continue; - } - c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t); - c = container_unwrap_shared(c, &t); +ALLOW_UNALIGNED +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { + char *start_of_buf = (char *)buf; + uint32_t cookie; + int32_t num_containers; + uint16_t *descriptive_headers; + uint32_t *offset_headers = NULL; + const char *run_flag_bitset = NULL; + bool hasrun = false; - container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); - if (lo != NULL) { - offset_append_with_merge(ans_ra, (int)k, lo, t); - } - if (hi != NULL) { - ra_append(ans_ra, (uint16_t)(k + 1), hi, t); + // deserialize cookie + memcpy(&cookie, buf, sizeof(uint32_t)); + buf += sizeof(uint32_t); + if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { + memcpy(&num_containers, buf, sizeof(int32_t)); + buf += sizeof(int32_t); + descriptive_headers = (uint16_t *)buf; + buf += num_containers * 2 * sizeof(uint16_t); + offset_headers = (uint32_t *)buf; + buf += num_containers * sizeof(uint32_t); + } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { + num_containers = (cookie >> 16) + 1; + hasrun = true; + int32_t run_flag_bitset_size = (num_containers + 7) / 8; + run_flag_bitset = buf; + buf += run_flag_bitset_size; + descriptive_headers = (uint16_t *)buf; + buf += num_containers * 2 * sizeof(uint16_t); + if (num_containers >= NO_OFFSET_THRESHOLD) { + offset_headers = (uint32_t *)buf; + buf += num_containers * sizeof(uint32_t); } - // the `lo` and `hi` container type always keep same as container `c`. - // in the case of `container_add_offset` on bitset container, `lo` and - // `hi` may has small cardinality, they must be repaired to array - // container. + } else { + return NULL; } - roaring_bitmap_repair_after_lazy(answer); // do required type conversions. - return answer; -} - -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c; - if (bitsetconversion && - (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && - (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) { - container_t *newc1 = - container_mutable_unwrap_shared(c1, &type1); - newc1 = container_to_bitset(newc1, type1); - type1 = BITSET_CONTAINER_TYPE; - c = container_lazy_ior(newc1, type1, c2, type2, &result_type); - if (c != newc1) { // should not happen - container_free(newc1, type1); - } - } else { - c = container_lazy_or(c1, type1, c2, type2, &result_type); - } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + // calculate total size for allocation + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); + uint32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + } - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + if (isbitmap) { + num_bitset_containers++; + } else if (isrun) { + num_run_containers++; + } else { + num_array_containers++; } } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; -} - -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - if (0 == length2) return; + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t *); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + alloc_size += num_containers * sizeof(uint16_t); // keys + alloc_size += num_containers * sizeof(uint8_t); // typecodes - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; + // allocate bitmap and construct containers + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - if (!container_is_full(c1, type1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) { - c1 = get_writable_copy_if_shared(c1, &type1); - } else { - // convert to bitset - container_t *old_c1 = c1; - uint8_t old_type1 = type1; - c1 = container_mutable_unwrap_shared(c1, &type1); - c1 = container_to_bitset(c1, type1); - container_free(old_c1, old_type1); - type1 = BITSET_CONTAINER_TYPE; - } - - container_t *c2 = ra_get_container_at_index( - &x2->high_low_container, (uint16_t)pos2, &type2); - container_t *c = - container_lazy_ior(c1, type1, c2, type2, &result_type); - if (c != c1) { // in this instance a new container was created, - // and we need to free the old one - container_free(c1, type1); - } + roaring_bitmap_t *rb = + (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.containers = (container_t **)arena_alloc( + &arena, sizeof(container_t *) * num_containers); - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + uint16_t *keys = + (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); + uint8_t *typecodes = + (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + rb->high_low_container.keys = keys; + rb->high_low_container.typecodes = typecodes; - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); + int32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - container_t *c = - container_lazy_xor(c1, type1, c2, type2, &result_type); + keys[i] = descriptive_headers[2 * i]; - if (container_nonzero_cardinality(c, result_type)) { - ra_append(&answer->high_low_container, s1, c, result_type); + if (isbitmap) { + typecodes[i] = BITSET_CONTAINER_TYPE; + bitset_container_t *c = (bitset_container_t *)arena_alloc( + &arena, sizeof(bitset_container_t)); + c->cardinality = cardinality; + if (offset_headers != NULL) { + c->words = (uint64_t *)(start_of_buf + offset_headers[i]); } else { - container_free(c, result_type); + c->words = (uint64_t *)buf; + buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - - } else if (s1 < s2) { // s1 < s2 - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - c1 = get_copy_of_container(c1, &type1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - type1); + rb->high_low_container.containers[i] = c; + } else if (isrun) { + typecodes[i] = RUN_CONTAINER_TYPE; + run_container_t *c = + (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); + c->capacity = cardinality; + uint16_t n_runs; + if (offset_headers != NULL) { + memcpy(&n_runs, start_of_buf + offset_headers[i], + sizeof(uint16_t)); + c->n_runs = n_runs; + c->runs = (rle16_t *)(start_of_buf + offset_headers[i] + + sizeof(uint16_t)); + } else { + memcpy(&n_runs, buf, sizeof(uint16_t)); + c->n_runs = n_runs; + buf += sizeof(uint16_t); + c->runs = (rle16_t *)buf; + buf += c->n_runs * sizeof(rle16_t); } - ra_append(&answer->high_low_container, s1, c1, type1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); + rb->high_low_container.containers[i] = c; + } else { + typecodes[i] = ARRAY_CONTAINER_TYPE; + array_container_t *c = (array_container_t *)arena_alloc( + &arena, sizeof(array_container_t)); + c->cardinality = cardinality; + c->capacity = cardinality; + if (offset_headers != NULL) { + c->array = (uint16_t *)(start_of_buf + offset_headers[i]); + } else { + c->array = (uint16_t *)buf; + buf += cardinality * sizeof(uint16_t); } - ra_append(&answer->high_low_container, s2, c2, type2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + rb->high_low_container.containers[i] = c; + } + } + + return rb; +} + +bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { + uint32_t max_value = roaring_bitmap_maximum(r); + size_t new_array_size = (size_t)(max_value / 64 + 1); + bool resize_ok = bitset_resize(bitset, new_array_size, true); + if (!resize_ok) { + return false; + } + const roaring_array_t *ra = &r->high_low_container; + for (int i = 0; i < ra->size; ++i) { + uint64_t *words = bitset->array + (ra->keys[i] << 10); + uint8_t type = ra->typecodes[i]; + const container_t *c = ra->containers[i]; + if (type == SHARED_CONTAINER_TYPE) { + c = container_unwrap_shared(c, &type); + } + switch (type) { + case BITSET_CONTAINER_TYPE: { + size_t max_word_index = new_array_size - (ra->keys[i] << 10); + if (max_word_index > 1024) { + max_word_index = 1024; + } + const bitset_container_t *src = const_CAST_bitset(c); + memcpy(words, src->words, max_word_index * sizeof(uint64_t)); + } break; + case ARRAY_CONTAINER_TYPE: { + const array_container_t *src = const_CAST_array(c); + bitset_set_list(words, src->array, src->cardinality); + } break; + case RUN_CONTAINER_TYPE: { + const run_container_t *src = const_CAST_run(c); + for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) { + rle16_t rle = src->runs[rlepos]; + bitset_set_lenrange(words, rle.value, rle.length); + } + } break; + default: + roaring_unreachable; } } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; + return true; } -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; +#ifdef __cplusplus +} +} +} // extern "C" { namespace roaring { +#endif +/* end file src/roaring.c */ +/* begin file src/roaring64.c */ +#include +#include +#include +#include +#include - if (0 == length2) return; - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t type1, type2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - while (true) { - if (s1 == s2) { - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); +// For serialization / deserialization +// containers.h last to avoid conflict with ROARING_CONTAINER_T. - // We do the computation "in place" only when c1 is not a shared - // container. Rationale: using a shared container safely with in - // place computation would require making a copy and then doing the - // computation in place which is likely less efficient than avoiding - // in place entirely and always generating a new container. +#define CROARING_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) - container_t *c; - if (type1 == SHARED_CONTAINER_TYPE) { - c = container_lazy_xor(c1, type1, c2, type2, &result_type); - shared_container_free(CAST_shared(c1)); // release - } else { - c = container_lazy_ixor(c1, type1, c2, type2, &result_type); - } +#define CROARING_BITSET_ALIGNMENT 64 - if (container_nonzero_cardinality(c, result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - result_type); - ++pos1; - } else { - container_free(c, result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +#ifdef __cplusplus +using namespace ::roaring::internal; - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); +extern "C" { +namespace roaring { +namespace api { +#endif - } else { // s1 > s2 - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - // container_t *c2_clone = container_clone(c2, type2); - c2 = get_copy_of_container(c2, &type2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - type2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - type2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } -} +// TODO: Copy on write. +// TODO: Error on failed allocation. -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { - roaring_array_t *ra = &r->high_low_container; +typedef struct roaring64_bitmap_s { + art_t art; + uint8_t flags; + uint64_t first_free; + uint64_t capacity; + container_t **containers; +} roaring64_bitmap_t; - for (int i = 0; i < ra->size; ++i) { - const uint8_t old_type = ra->typecodes[i]; - container_t *old_c = ra->containers[i]; - uint8_t new_type = old_type; - container_t *new_c = container_repair_after_lazy(old_c, &new_type); - ra->containers[i] = new_c; - ra->typecodes[i] = new_type; - } +// Leaf type of the ART used to keep the high 48 bits of each entry. +// Low 8 bits: typecode +// High 56 bits: container index +typedef roaring64_leaf_t leaf_t; + +// Iterator struct to hold iteration state. +typedef struct roaring64_iterator_s { + const roaring64_bitmap_t *r; + art_iterator_t art_it; + roaring_container_iterator_t container_it; + uint64_t high48; // Key that art_it points to. + + uint64_t value; + bool has_value; + + // If has_value is false, then the iterator is saturated. This field + // indicates the direction of saturation. If true, there are no more values + // in the forward direction. If false, there are no more values in the + // backward direction. + bool saturated_forward; +} roaring64_iterator_t; + +static inline bool is_frozen64(const roaring64_bitmap_t *r) { + return r->flags & ROARING_FLAG_FROZEN; } -/** - * roaring_bitmap_rank returns the number of integers that are smaller or equal - * to x. - */ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } - } - return size; +// Splits the given uint64 key into high 48 bit and low 16 bit components. +// Expects high48_out to be of length ART_KEY_BYTES. +static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { + uint64_t tmp = croaring_htobe64(key); + memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); + return (uint16_t)key; } -void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin, - const uint32_t *end, uint64_t *ans) { - uint64_t size = 0; - int i = 0; - const uint32_t *iter = begin; - while (i < bm->high_low_container.size && iter != end) { - uint32_t x = *iter; - uint32_t xhigh = x >> 16; - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - i++; - } else if (xhigh == key) { - uint32_t consumed = container_rank_many( - bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], size, iter, end, ans); - iter += consumed; - ans += consumed; - } else { - *(ans++) = size; - iter++; - } - } +// Recombines the high 48 bit and low 16 bit components into a uint64 key. +// Expects high48_out to be of length ART_KEY_BYTES. +static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { + uint64_t result = 0; + memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); + return croaring_be64toh(result) | low16; } -/** - * roaring_bitmap_get_index returns the index of x, if not exsist return -1. - */ -int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) { - int64_t index = 0; - const uint16_t xhigh = x >> 16; - int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh); - if (high_idx < 0) return -1; +static inline uint64_t minimum(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - index += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - int32_t low_idx = container_get_index( - bm->high_low_container.containers[high_idx], - bm->high_low_container.typecodes[high_idx], x & 0xFFFF); - if (low_idx < 0) return -1; - return index + low_idx; - } else { - return -1; - } - } - return index; +static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { + return (container_index << 8) | typecode; +} + +static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } + +static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } + +static inline container_t *get_container(const roaring64_bitmap_t *r, + leaf_t leaf) { + return r->containers[get_index(leaf)]; } -/** - * roaring_bitmap_smallest returns the smallest value in the set. - * Returns UINT32_MAX if the set is empty. - */ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *c = bm->high_low_container.containers[0]; - uint8_t type = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(c, type); - return lowvalue | (key << 16); - } - return UINT32_MAX; +// Replaces the container of `leaf` with the given container. Returns the +// modified leaf for convenience. +static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, + container_t *container, + uint8_t typecode) { + uint64_t index = get_index(*leaf); + r->containers[index] = container; + *leaf = create_leaf(index, typecode); + return *leaf; } /** - * roaring_bitmap_smallest returns the greatest value in the set. - * Returns 0 if the set is empty. + * Extends the array of container pointers. */ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - container_t *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); +static void extend_containers(roaring64_bitmap_t *r) { + uint64_t size = r->first_free; + if (size < r->capacity) { + return; } - return 0; + uint64_t new_capacity; + if (r->capacity == 0) { + new_capacity = 2; + } else if (r->capacity < 1024) { + new_capacity = 2 * r->capacity; + } else { + new_capacity = 5 * r->capacity / 4; + } + uint64_t increase = new_capacity - r->capacity; + r->containers = + roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); + memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); + r->capacity = new_capacity; } -bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - container_t *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; +static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { + for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { + if (r->containers[i] == NULL) { + return i; + } } - - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed - return true; - } else - return false; + return r->capacity; } -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; +static uint64_t allocate_index(roaring64_bitmap_t *r) { + uint64_t first_free = r->first_free; + if (first_free == r->capacity) { + extend_containers(r); + } + r->first_free = next_free_container_idx(r); + return first_free; +} - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, + uint8_t typecode) { + uint64_t index = allocate_index(r); + r->containers[index] = container; + return create_leaf(index, typecode); +} - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - if (container_intersect(c1, type1, c2, type2)) return true; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } +static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { + uint64_t index = get_index(leaf); + r->containers[index] = NULL; + if (index < r->first_free) { + r->first_free = index; } - return answer != 0; } -bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x, - uint64_t y) { - if (x >= y) { - // Empty range. - return false; - } - roaring_uint32_iterator_t it; - roaring_iterator_init(bm, &it); - if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) { - // No values above x. - return false; - } - if (it.current_value >= y) { - // No values below y. - return false; - } - return true; +// Copies the container referenced by `leaf` from `r1` to `r2`. +static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, leaf_t leaf) { + uint8_t typecode = get_typecode(leaf); + // get_copy_of_container modifies the typecode passed in. + container_t *container = get_copy_of_container( + get_container(r1, leaf), &typecode, /*copy_on_write=*/false); + return add_container(r2, container, typecode); } -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = - ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); - const uint16_t s2 = - ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); +static inline int compare_high48(art_key_chunk_t key1[], + art_key_chunk_t key2[]) { + return art_compare_keys(key1, key2); +} - if (s1 == s2) { - uint8_t type1, type2; - container_t *c1 = ra_get_container_at_index(&x1->high_low_container, - (uint16_t)pos1, &type1); - container_t *c2 = ra_get_container_at_index(&x2->high_low_container, - (uint16_t)pos2, &type2); - answer += container_and_cardinality(c1, type1, c2, type2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; +static inline bool roaring64_iterator_init_at_leaf_first( + roaring64_iterator_t *it) { + it->high48 = combine_key(it->art_it.key, 0); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = 0; + it->container_it = container_init_iterator(get_container(it->r, leaf), + get_typecode(leaf), &low16); + it->value = it->high48 | low16; + return (it->has_value = true); } -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); +static inline bool roaring64_iterator_init_at_leaf_last( + roaring64_iterator_t *it) { + it->high48 = combine_key(it->art_it.key, 0); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = 0; + it->container_it = container_init_iterator_last(get_container(it->r, leaf), + get_typecode(leaf), &low16); + it->value = it->high48 | low16; + return (it->has_value = true); } -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; +static inline roaring64_iterator_t *roaring64_iterator_init_at( + const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { + it->r = r; + it->art_it = art_init_iterator((art_t *)&r->art, first); + it->has_value = it->art_it.value != NULL; + if (it->has_value) { + if (first) { + roaring64_iterator_init_at_leaf_first(it); + } else { + roaring64_iterator_init_at_leaf_last(it); + } + } else { + it->saturated_forward = first; + } + return it; } -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; +roaring64_bitmap_t *roaring64_bitmap_create(void) { + roaring64_bitmap_t *r = + (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); + art_init_cleared(&r->art); + r->flags = 0; + r->capacity = 0; + r->first_free = 0; + r->containers = NULL; + return r; } -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; +void roaring64_bitmap_free(roaring64_bitmap_t *r) { + if (!r) { + return; + } + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + if (is_frozen64(r)) { + // Only free the container itself, not the buffer-backed contents + // within. + roaring_free(get_container(r, leaf)); + } else { + container_free(get_container(r, leaf), get_typecode(leaf)); + } + art_iterator_next(&it); + } + if (!is_frozen64(r)) { + art_free(&r->art); + } + roaring_free(r->containers); + roaring_free(r); } -bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; +roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); - uint8_t typecode; - // next call ought to be cheap - container_t *container = ra_get_container_at_index(&r->high_low_container, - (uint16_t)i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary - // search - return container_contains(container, val & 0xFFFF, typecode); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t result_typecode = get_typecode(leaf); + container_t *result_container = get_copy_of_container( + get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t)result_leaf); + art_iterator_next(&it); + } + return result; } /** - * Check whether a range of values from range_start (included) to range_end - * (excluded) is present + * Steal the containers from a 32-bit bitmap and insert them into a 64-bit + * bitmap (with an offset) + * + * After calling this function, the original bitmap will be empty, and the + * returned bitmap will contain all the values from the original bitmap. */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, - uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return true; +static void move_from_roaring32_offset(roaring64_bitmap_t *dst, + roaring_bitmap_t *src, + uint32_t high_bits) { + uint64_t key_base = ((uint64_t)high_bits) << 32; + uint32_t r32_size = ra_get_size(&src->high_low_container); + for (uint32_t i = 0; i < r32_size; ++i) { + uint16_t key = ra_get_key_at_index(&src->high_low_container, i); + uint8_t typecode; + container_t *container = ra_get_container_at_index( + &src->high_low_container, (uint16_t)i, &typecode); + + uint8_t high48[ART_KEY_BYTES]; + uint64_t high48_bits = key_base | ((uint64_t)key << 16); + split_key(high48_bits, high48); + leaf_t leaf = add_container(dst, container, typecode); + art_insert(&dst->art, high48, (art_val_t)leaf); } - return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start, - (uint32_t)(range_end - 1)); + // We stole all the containers, so leave behind a size of zero + src->high_low_container.size = 0; } -/** - * Check whether a range of values from range_start (included) to range_end - * (included) is present - */ -bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end) { - if (range_start > range_end) { - return true; - } // empty range are always contained! - if (range_end == range_start) { - return roaring_bitmap_contains(r, (uint32_t)range_start); - } - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)(range_end >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = (range_end & 0xFFFF) + 1; - uint8_t type; - container_t *c = - ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); - if (hb_rs == hb_re) { - return container_contains_range(c, lb_rs, lb_re, type); - } - if (!container_contains_range(c, lb_rs, 1 << 16, type)) { - return false; - } - c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type); - if (!container_contains_range(c, 0, lb_re, type)) { - return false; +roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32( + roaring_bitmap_t *bitmap32) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + move_from_roaring32_offset(result, bitmap32, 0); + + return result; +} + +roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, + uint64_t step) { + if (step == 0 || max <= min) { + return NULL; } - for (int32_t i = is + 1; i < ie; ++i) { - c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i, - &type); - if (!container_is_full(c, type)) { - return false; + roaring64_bitmap_t *r = roaring64_bitmap_create(); + if (step >= (1 << 16)) { + // Only one value per container. + for (uint64_t value = min; value < max; value += step) { + roaring64_bitmap_add(r, value); + if (value > UINT64_MAX - step) { + break; + } } + return r; } - return true; -} + do { + uint64_t high_bits = min & 0xFFFFFFFFFFFF0000; + uint16_t container_min = min & 0xFFFF; + uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16); -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, - const roaring_bitmap_t *r2) { - return (roaring_bitmap_get_cardinality(r2) > - roaring_bitmap_get_cardinality(r1) && - roaring_bitmap_is_subset(r1, r2)); + uint8_t typecode; + container_t *container = container_from_range( + &typecode, container_min, container_max, (uint16_t)step); + + uint8_t high48[ART_KEY_BYTES]; + split_key(min, high48); + leaf_t leaf = add_container(r, container, typecode); + art_insert(&r->art, high48, (art_val_t)leaf); + + uint64_t gap = container_max - container_min + step - 1; + uint64_t increment = gap - (gap % step); + if (min > UINT64_MAX - increment) { + break; + } + min += increment; + } while (min < max); + return r; } -/* - * FROZEN SERIALIZATION FORMAT DESCRIPTION - * - * -- (beginning must be aligned by 32 bytes) -- - * uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * - * num_bitset_containers] rle16_t[total number of rle elements in - * all run containers] uint16_t[total number of array elements in - * all array containers] uint16_t[num_containers] - * uint16_t[num_containers] uint8_t[num_containers]
- * uint32_t - * - *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) - * and the number of containers (17 bits). - * - * stores number of elements for every container. - * Its meaning depends on container type. - * For array and bitset containers, this value is the container cardinality - * minus one. For run container, it is the number of rle_t elements (n_runs). - * - * ,, are flat arrays of elements of - * all containers of respective type. - * - * <*_data> and are kept close together because they are not accessed - * during deserilization. This may reduce IO in case of large mmaped bitmaps. - * All members have their native alignments during deserilization except - *
, which is not guaranteed to be aligned by 4 bytes. - */ +roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, + const uint64_t *vals) { + roaring64_bitmap_t *r = roaring64_bitmap_create(); + roaring64_bitmap_add_many(r, n_args, vals); + return r; +} -size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { - const roaring_array_t *ra = &rb->high_low_container; - size_t num_bytes = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - num_bytes += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - num_bytes += ac->cardinality * sizeof(uint16_t); - break; - } - default: - roaring_unreachable; +static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { + if (leaf != NULL) { + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); } + return leaf; + } else { + array_container_t *ac = array_container_create(); + uint8_t typecode; + container_t *container = + container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); + assert(ac == container); + leaf_t new_leaf = add_container(r, container, typecode); + return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); } - num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes - num_bytes += 4; // header - return num_bytes; } -inline static void *arena_alloc(char **arena, size_t num_bytes) { - char *res = *arena; - *arena += num_bytes; - return res; +void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + containerptr_roaring64_bitmap_add(r, high48, low16, leaf); } -void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { - /* - * Note: we do not require user to supply a specifically aligned buffer. - * Thus we have to use memcpy() everywhere. - */ +bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - const roaring_array_t *ra = &rb->high_low_container; + int old_cardinality = 0; + if (leaf != NULL) { + old_cardinality = container_get_cardinality(get_container(r, *leaf), + get_typecode(*leaf)); + } + leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); + int new_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + return old_cardinality != new_cardinality; +} - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - run_zone_size += rc->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - array_zone_size += ac->cardinality * sizeof(uint16_t); - break; - } - default: - roaring_unreachable; +void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = context->leaf; + if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add(container1, low16, typecode1, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf = (leaf_t *)art_find(&r->art, high48); + context->leaf = + containerptr_roaring64_bitmap_add(r, high48, low16, leaf); + memcpy(context->high_bytes, high48, ART_KEY_BYTES); } +} - uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); - rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); - uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); - uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); - uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size); - uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); - uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); +void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_add_bulk(r, &context, *current_val); + } +} - for (int32_t i = 0; i < ra->size; i++) { - uint16_t count; - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = - const_CAST_bitset(ra->containers[i]); - memcpy(bitset_zone, bc->words, - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { - count = (uint16_t)(bc->cardinality - 1); - } else { - count = - (uint16_t)(bitset_container_compute_cardinality(bc) - - 1); - } - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(ra->containers[i]); - size_t num_bytes = rc->n_runs * sizeof(rle16_t); - memcpy(run_zone, rc->runs, num_bytes); - run_zone += rc->n_runs; - count = (uint16_t)rc->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = - const_CAST_array(ra->containers[i]); - size_t num_bytes = ac->cardinality * sizeof(uint16_t); - memcpy(array_zone, ac->array, num_bytes); - array_zone += ac->cardinality; - count = (uint16_t)(ac->cardinality - 1); - break; - } - default: - roaring_unreachable; +static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf != NULL) { + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_add_range(container1, typecode1, min, max, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } - memcpy(&count_zone[i], &count, 2); + return; } - memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); - memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); - uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; - memcpy(header_zone, &header, 4); + uint8_t typecode; + // container_add_range is inclusive, but `container_range_of_ones` is + // exclusive. + container_t *container = container_range_of_ones(min, max + 1, &typecode); + leaf_t new_leaf = add_container(r, container, typecode); + art_insert(art, high48, (art_val_t)new_leaf); } -const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, - size_t length) { - if ((uintptr_t)buf % 32 != 0) { - return NULL; +void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return; } + roaring64_bitmap_add_range_closed(r, min, max - 1); +} - // cookie and num_containers - if (length < 4) { - return NULL; +void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; } - uint32_t header; - memcpy(&header, buf + length - 4, 4); // header may be misaligned - if ((header & 0x7FFF) != FROZEN_COOKIE) { - return NULL; + + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only populate range within one container. + add_range_closed_at(r, art, min_high48, min_low16, max_low16); + return; } - int32_t num_containers = (header >> 15); - // typecodes, counts and keys - if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { - return NULL; + // Populate a range across containers. Fill intermediate containers + // entirely. + add_range_closed_at(r, art, min_high48, min_low16, 0xffff); + uint64_t min_high_bits = min >> 16; + uint64_t max_high_bits = max >> 16; + for (uint64_t current = min_high_bits + 1; current < max_high_bits; + ++current) { + uint8_t current_high48[ART_KEY_BYTES]; + split_key(current << 16, current_high48); + add_range_closed_at(r, art, current_high48, 0, 0xffff); } - uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); - uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); - uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); + add_range_closed_at(r, art, max_high48, 0, max_low16); +} - // {bitset,array,run}_zone - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: - num_bitset_containers++; - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - case RUN_CONTAINER_TYPE: - num_run_containers++; - run_zone_size += counts[i] * sizeof(rle16_t); - break; - case ARRAY_CONTAINER_TYPE: - num_array_containers++; - array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); - break; - default: - return NULL; - } +bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf != NULL) { + return container_contains(get_container(r, *leaf), low16, + get_typecode(*leaf)); } - if (length != bitset_zone_size + run_zone_size + array_zone_size + - 5 * num_containers + 4) { - return NULL; + return false; +} + +bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return true; } - uint64_t *bitset_zone = (uint64_t *)(buf); - rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size); - uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size); - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(container_t *); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - char *arena = (char *)roaring_malloc(alloc_size); - if (arena == NULL) { - return NULL; + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + if (it.value == NULL || combine_key(it.key, 0) > min) { + return false; } + uint64_t prev_high48_bits = min & 0xFFFFFFFFFFFF0000; + while (it.value != NULL) { + uint64_t current_high48_bits = combine_key(it.key, 0); + if (current_high48_bits > max_high48_bits) { + // We've passed the end of the range with all containers containing + // the range. + return true; + } + if (current_high48_bits - prev_high48_bits > 0x10000) { + // There is a gap in the iterator that falls in the range. + return false; + } - roaring_bitmap_t *rb = - (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.keys = (uint16_t *)keys; - rb->high_low_container.typecodes = (uint8_t *)typecodes; - rb->high_low_container.containers = (container_t **)arena_alloc( - &arena, sizeof(container_t *) * num_containers); - // Ensure offset of high_low_container.containers is known distance used in - // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the - // only allocation that precedes high_low_container.containers. If this is - // changed (new allocation or changed order), this offset will also need to - // be changed in the C++ wrapper. - assert(rb == - (roaring_bitmap_t *)((char *)rb->high_low_container.containers - - sizeof(roaring_bitmap_t))); - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *)arena_alloc( - &arena, sizeof(bitset_container_t)); - bitset->words = bitset_zone; - bitset->cardinality = counts[i] + UINT32_C(1); - rb->high_low_container.containers[i] = bitset; - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *)arena_alloc( - &arena, sizeof(run_container_t)); - run->capacity = counts[i]; - run->n_runs = counts[i]; - run->runs = run_zone; - rb->high_low_container.containers[i] = run; - run_zone += run->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *)arena_alloc( - &arena, sizeof(array_container_t)); - array->capacity = counts[i] + UINT32_C(1); - array->cardinality = counts[i] + UINT32_C(1); - array->array = array_zone; - rb->high_low_container.containers[i] = array; - array_zone += counts[i] + UINT32_C(1); - break; + leaf_t leaf = (leaf_t)*it.value; + uint32_t container_min = 0; + if (compare_high48(it.key, min_high48) == 0) { + container_min = min_low16; + } + uint32_t container_max = 0xFFFF + 1; // Exclusive + if (compare_high48(it.key, max_high48) == 0) { + container_max = max_low16; + } + + // For the first and last containers we use container_contains_range, + // for the intermediate containers we can use container_is_full. + if (container_min == 0 && container_max == 0xFFFF + 1) { + if (!container_is_full(get_container(r, leaf), + get_typecode(leaf))) { + return false; } - default: - roaring_free(arena); - return NULL; + } else if (!container_contains_range(get_container(r, leaf), + container_min, container_max, + get_typecode(leaf))) { + return false; } + prev_high48_bits = current_high48_bits; + art_iterator_next(&it); } - - return rb; + return prev_high48_bits == max_high48_bits; } -ALLOW_UNALIGNED -roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { - char *start_of_buf = (char *)buf; - uint32_t cookie; - int32_t num_containers; - uint16_t *descriptive_headers; - uint32_t *offset_headers = NULL; - const char *run_flag_bitset = NULL; - bool hasrun = false; +bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - // deserialize cookie - memcpy(&cookie, buf, sizeof(uint32_t)); - buf += sizeof(uint32_t); - if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { - memcpy(&num_containers, buf, sizeof(int32_t)); - buf += sizeof(int32_t); - descriptive_headers = (uint16_t *)buf; - buf += num_containers * 2 * sizeof(uint16_t); - offset_headers = (uint32_t *)buf; - buf += num_containers * sizeof(uint32_t); - } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { - num_containers = (cookie >> 16) + 1; - hasrun = true; - int32_t run_flag_bitset_size = (num_containers + 7) / 8; - run_flag_bitset = buf; - buf += run_flag_bitset_size; - descriptive_headers = (uint16_t *)buf; - buf += num_containers * 2 * sizeof(uint16_t); - if (num_containers >= NO_OFFSET_THRESHOLD) { - offset_headers = (uint32_t *)buf; - buf += num_containers * sizeof(uint32_t); + if (context->leaf == NULL || + art_compare_keys(context->high_bytes, high48) != 0) { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf == NULL) { + return false; } - } else { - return NULL; + context->leaf = leaf; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); } + return container_contains(get_container(r, *context->leaf), low16, + get_typecode(*context->leaf)); +} - // calculate total size for allocation - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; - - for (int32_t i = 0; i < num_containers; i++) { - uint16_t tmp; - memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); - uint32_t cardinality = tmp + 1; - bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { - isbitmap = false; - isrun = true; +bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, + uint64_t *element) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t start_rank = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint64_t cardinality = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + if (start_rank + cardinality > rank) { + uint32_t uint32_start = 0; + uint32_t uint32_rank = rank - start_rank; + uint32_t uint32_element = 0; + if (container_select(get_container(r, leaf), get_typecode(leaf), + &uint32_start, uint32_rank, &uint32_element)) { + *element = combine_key(it.key, (uint16_t)uint32_element); + return true; } + return false; } - - if (isbitmap) { - num_bitset_containers++; - } else if (isrun) { - num_run_containers++; - } else { - num_array_containers++; - } + start_rank += cardinality; + art_iterator_next(&it); } + return false; +} - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(container_t *); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); - alloc_size += num_containers * sizeof(uint16_t); // keys - alloc_size += num_containers * sizeof(uint8_t); // typecodes +uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - // allocate bitmap and construct containers - char *arena = (char *)roaring_malloc(alloc_size); - if (arena == NULL) { - return NULL; + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t rank = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + rank += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + } else if (compare_result == 0) { + return rank + container_rank(get_container(r, leaf), + get_typecode(leaf), low16); + } else { + return rank; + } + art_iterator_next(&it); } + return rank; +} - roaring_bitmap_t *rb = - (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.containers = (container_t **)arena_alloc( - &arena, sizeof(container_t *) * num_containers); - - uint16_t *keys = - (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); - uint8_t *typecodes = - (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); - - rb->high_low_container.keys = keys; - rb->high_low_container.typecodes = typecodes; +bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, + uint64_t *out_index) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); - for (int32_t i = 0; i < num_containers; i++) { - uint16_t tmp; - memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp)); - int32_t cardinality = tmp + 1; - bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); - bool isrun = false; - if (hasrun) { - if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { - isbitmap = false; - isrun = true; + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t index = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + index += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + } else if (compare_result == 0) { + int index16 = container_get_index(get_container(r, leaf), + get_typecode(leaf), low16); + if (index16 < 0) { + return false; } + *out_index = index + index16; + return true; + } else { + return false; } + art_iterator_next(&it); + } + return false; +} - keys[i] = descriptive_headers[2 * i]; +// Returns true if a container was removed. +static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { + if (leaf == NULL) { + return false; + } - if (isbitmap) { - typecodes[i] = BITSET_CONTAINER_TYPE; - bitset_container_t *c = (bitset_container_t *)arena_alloc( - &arena, sizeof(bitset_container_t)); - c->cardinality = cardinality; - if (offset_headers != NULL) { - c->words = (uint64_t *)(start_of_buf + offset_headers[i]); - } else { - c->words = (uint64_t *)buf; - buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - } - rb->high_low_container.containers[i] = c; - } else if (isrun) { - typecodes[i] = RUN_CONTAINER_TYPE; - run_container_t *c = - (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); - c->capacity = cardinality; - uint16_t n_runs; - if (offset_headers != NULL) { - memcpy(&n_runs, start_of_buf + offset_headers[i], - sizeof(uint16_t)); - c->n_runs = n_runs; - c->runs = (rle16_t *)(start_of_buf + offset_headers[i] + - sizeof(uint16_t)); - } else { - memcpy(&n_runs, buf, sizeof(uint16_t)); - c->n_runs = n_runs; - buf += sizeof(uint16_t); - c->runs = (rle16_t *)buf; - buf += c->n_runs * sizeof(rle16_t); - } - rb->high_low_container.containers[i] = c; - } else { - typecodes[i] = ARRAY_CONTAINER_TYPE; - array_container_t *c = (array_container_t *)arena_alloc( - &arena, sizeof(array_container_t)); - c->cardinality = cardinality; - c->capacity = cardinality; - if (offset_headers != NULL) { - c->array = (uint16_t *)(start_of_buf + offset_headers[i]); - } else { - c->array = (uint16_t *)buf; - buf += cardinality * sizeof(uint16_t); - } - rb->high_low_container.containers[i] = c; - } + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); + } + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); + assert(erased); + return true; } + return false; +} - return rb; +void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + + leaf_t *leaf = (leaf_t *)art_find(art, high48); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); } -bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { - uint32_t max_value = roaring_bitmap_maximum(r); - size_t new_array_size = (size_t)(max_value / 64 + 1); - bool resize_ok = bitset_resize(bitset, new_array_size, true); - if (!resize_ok) { +bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(art, high48); + + if (leaf == NULL) { return false; } - const roaring_array_t *ra = &r->high_low_container; - for (int i = 0; i < ra->size; ++i) { - uint64_t *words = bitset->array + (ra->keys[i] << 10); - uint8_t type = ra->typecodes[i]; - const container_t *c = ra->containers[i]; - if (type == SHARED_CONTAINER_TYPE) { - c = container_unwrap_shared(c, &type); + int old_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { + return true; + } + int new_cardinality = + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + return new_cardinality != old_cardinality; +} + +void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode = get_typecode(*context->leaf); + container_t *container = get_container(r, *context->leaf); + uint8_t typecode2; + container_t *container2 = + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, context->leaf, container2, typecode2); } - switch (type) { - case BITSET_CONTAINER_TYPE: { - size_t max_word_index = new_array_size - (ra->keys[i] << 10); - if (max_word_index > 1024) { - max_word_index = 1024; - } - const bitset_container_t *src = const_CAST_bitset(c); - memcpy(words, src->words, max_word_index * sizeof(uint64_t)); - } break; - case ARRAY_CONTAINER_TYPE: { - const array_container_t *src = const_CAST_array(c); - bitset_set_list(words, src->array, src->cardinality); - } break; - case RUN_CONTAINER_TYPE: { - const run_container_t *src = const_CAST_run(c); - for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) { - rle16_t rle = src->runs[rlepos]; - bitset_set_lenrange(words, rle.value, rle.length); - } - } break; - default: - roaring_unreachable; + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + leaf_t leaf; + bool erased = art_erase(art, high48, (art_val_t *)&leaf); + assert(erased); + remove_container(r, leaf); } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(art, high48); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + context->leaf = leaf; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return true; } -#ifdef __cplusplus -} +void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_remove_bulk(r, &context, *current_val); + } } -} // extern "C" { namespace roaring { -#endif -/* end file src/roaring.c */ -/* begin file src/roaring64.c */ -#include -#include -#include -#include -#include - -// For serialization / deserialization -// containers.h last to avoid conflict with ROARING_CONTAINER_T. +static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf == NULL) { + return; + } + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); + uint8_t typecode2; + container_t *container2 = + container_remove_range(container, typecode, min, max, &typecode2); + if (container2 != container) { + container_free(container, typecode); + if (container2 != NULL) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(art, high48, NULL); + assert(erased); + remove_container(r, *leaf); + } + } +} -#define CROARING_ALIGN_BUF(buf, alignment) \ - (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) +void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min >= max) { + return; + } + roaring64_bitmap_remove_range_closed(r, min, max - 1); +} -#define CROARING_BITSET_ALIGNMENT 64 +void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; + } -#ifdef __cplusplus -using namespace ::roaring::internal; + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only remove a range within one container. + remove_range_closed_at(r, art, min_high48, min_low16, max_low16); + return; + } -extern "C" { -namespace roaring { -namespace api { -#endif + // Remove a range across containers. Remove intermediate containers + // entirely. + remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); -// TODO: Copy on write. -// TODO: Error on failed allocation. + art_iterator_t it = art_upper_bound(art, min_high48); + while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { + leaf_t leaf; + bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r, leaf), get_typecode(leaf)); + remove_container(r, leaf); + } + remove_range_closed_at(r, art, max_high48, 0, max_low16); +} -typedef struct roaring64_bitmap_s { - art_t art; - uint8_t flags; - uint64_t first_free; - uint64_t capacity; - container_t **containers; -} roaring64_bitmap_t; +void roaring64_bitmap_clear(roaring64_bitmap_t *r) { + roaring64_bitmap_remove_range_closed(r, 0, UINT64_MAX); +} -// Leaf type of the ART used to keep the high 48 bits of each entry. -// Low 8 bits: typecode -// High 56 bits: container index -typedef roaring64_leaf_t leaf_t; +uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint64_t cardinality = 0; + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + cardinality += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + art_iterator_next(&it); + } + return cardinality; +} -// Iterator struct to hold iteration state. -typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *r; - art_iterator_t art_it; - roaring_container_iterator_t container_it; - uint64_t high48; // Key that art_it points to. +uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return 0; + } + // Convert to a closed range + // No underflow here: passing the above condition implies min < max, so + // there is a number less than max + return roaring64_bitmap_range_closed_cardinality(r, min, max - 1); +} - uint64_t value; - bool has_value; +uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min > max) { + return 0; + } - // If has_value is false, then the iterator is saturated. This field - // indicates the direction of saturation. If true, there are no more values - // in the forward direction. If false, there are no more values in the - // backward direction. - bool saturated_forward; -} roaring64_iterator_t; + uint64_t cardinality = 0; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); -static inline bool is_frozen64(const roaring64_bitmap_t *r) { - return r->flags & ROARING_FLAG_FROZEN; -} + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + while (it.value != NULL) { + int max_compare_result = compare_high48(it.key, max_high48); + if (max_compare_result > 0) { + // We're outside the range. + break; + } -// Splits the given uint64 key into high 48 bit and low 16 bit components. -// Expects high48_out to be of length ART_KEY_BYTES. -static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { - uint64_t tmp = croaring_htobe64(key); - memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); - return (uint16_t)key; + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + if (max_compare_result == 0) { + // We're at the max high key, add only the range up to the low + // 16 bits of max. + cardinality += container_rank(container, typecode, max_low16); + } else { + // We're not yet at the max high key, add the full container + // range. + cardinality += container_get_cardinality(container, typecode); + } + if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { + // We're at the min high key, remove the range up to the low 16 + // bits of min. + cardinality -= container_rank(container, typecode, min_low16 - 1); + } + art_iterator_next(&it); + } + return cardinality; } -// Recombines the high 48 bit and low 16 bit components into a uint64 key. -// Expects high48_out to be of length ART_KEY_BYTES. -static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { - uint64_t result = 0; - memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); - return croaring_be64toh(result) | low16; +bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { + return art_is_empty(&r->art); } -static inline uint64_t minimum(uint64_t a, uint64_t b) { - return (a < b) ? a : b; +uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + if (it.value == NULL) { + return UINT64_MAX; + } + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); } -static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { - return (container_index << 8) | typecode; +uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); + if (it.value == NULL) { + return 0; + } + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); } -static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } - -static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } +bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + bool has_run_container = false; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + uint8_t new_typecode; + // We don't need to free the existing container if a new one was + // created, convert_run_optimize does that internally. + container_t *new_container = convert_run_optimize( + get_container(r, *leaf), get_typecode(*leaf), &new_typecode); + replace_container(r, leaf, new_container, new_typecode); + has_run_container |= new_typecode == RUN_CONTAINER_TYPE; + art_iterator_next(&it); + } + return has_run_container; +} -static inline container_t *get_container(const roaring64_bitmap_t *r, - leaf_t leaf) { - return r->containers[get_index(leaf)]; +static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { + uint64_t idx = get_index(*leaf); + if (idx < r->first_free) { + return; + } + r->containers[r->first_free] = get_container(r, *leaf); + r->containers[idx] = NULL; + *leaf = create_leaf(r->first_free, get_typecode(*leaf)); + r->first_free = next_free_container_idx(r); } -// Replaces the container of `leaf` with the given container. Returns the -// modified leaf for convenience. -static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, - container_t *container, - uint8_t typecode) { - uint64_t index = get_index(*leaf); - r->containers[index] = container; - *leaf = create_leaf(index, typecode); - return *leaf; +static inline bool is_shrunken(const roaring64_bitmap_t *r) { + return art_is_shrunken(&r->art) && r->first_free == r->capacity; } -/** - * Extends the array of container pointers. - */ -static void extend_containers(roaring64_bitmap_t *r) { - uint64_t size = r->first_free; - if (size < r->capacity) { - return; +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { + size_t freed = art_shrink_to_fit(&r->art); + art_iterator_t it = art_init_iterator(&r->art, true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + freed += container_shrink_to_fit(get_container(r, *leaf), + get_typecode(*leaf)); + move_to_shrink(r, leaf); + art_iterator_next(&it); } - uint64_t new_capacity; - if (r->capacity == 0) { - new_capacity = 2; - } else if (r->capacity < 1024) { - new_capacity = 2 * r->capacity; - } else { - new_capacity = 5 * r->capacity / 4; + if (is_shrunken(r)) { + return freed; } - uint64_t increase = new_capacity - r->capacity; - r->containers = (container_t **)roaring_realloc( - r->containers, new_capacity * sizeof(container_t *)); - memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); - r->capacity = new_capacity; + uint64_t new_capacity = r->first_free; + if (new_capacity < r->capacity) { + r->containers = roaring_realloc(r->containers, + new_capacity * sizeof(container_t *)); + freed += (r->capacity - new_capacity) * sizeof(container_t *); + r->capacity = new_capacity; + } + return freed; } -static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { - for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { - if (r->containers[i] == NULL) { - return i; +/** + * (For advanced users.) + * Collect statistics about the bitmap + */ +void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, + roaring64_statistics_t *stat) { + memset(stat, 0, sizeof(*stat)); + stat->min_value = roaring64_bitmap_minimum(r); + stat->max_value = roaring64_bitmap_maximum(r); + + art_iterator_t it = art_init_iterator((art_t *)&r->art, true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + stat->n_containers++; + uint8_t truetype = + get_container_type(get_container(r, leaf), get_typecode(leaf)); + uint32_t card = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); + uint32_t sbytes = + container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); + stat->cardinality += card; + switch (truetype) { + case BITSET_CONTAINER_TYPE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + roaring_unreachable; } + art_iterator_next(&it); } - return r->capacity; } -static uint64_t allocate_index(roaring64_bitmap_t *r) { - uint64_t first_free = r->first_free; - if (first_free == r->capacity) { - extend_containers(r); - } - r->first_free = next_free_container_idx(r); - return first_free; +static bool roaring64_leaf_internal_validate(const art_val_t val, + const char **reason, + void *context) { + leaf_t leaf = (leaf_t)val; + roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; + return container_internal_validate(get_container(r, leaf), + get_typecode(leaf), reason); } -static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, - uint8_t typecode) { - uint64_t index = allocate_index(r); - r->containers[index] = container; - return create_leaf(index, typecode); +bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, + const char **reason) { + return art_internal_validate(&r->art, reason, + roaring64_leaf_internal_validate, (void *)r); } -static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { - uint64_t index = get_index(leaf); - r->containers[index] = NULL; - if (index < r->first_free) { - r->first_free = index; +bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + if (compare_high48(it1.key, it2.key) != 0) { + return false; + } + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); } + return it1.value == NULL && it2.value == NULL; } -// Copies the container referenced by `leaf` from `r1` to `r2`. -static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, leaf_t leaf) { - uint8_t typecode = get_typecode(leaf); - // get_copy_of_container modifies the typecode passed in. - container_t *container = get_copy_of_container( - get_container(r1, leaf), &typecode, /*copy_on_write=*/false); - return add_container(r2, container, typecode); -} +bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); -static inline int compare_high48(art_key_chunk_t key1[], - art_key_chunk_t key2[]) { - return art_compare_keys(key1, key2); + while (it1.value != NULL) { + bool it2_present = it2.value != NULL; + + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_is_subset( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + return false; + } else if (compare_result > 0) { + art_iterator_lower_bound(&it2, it1.key); + } + } + return true; } -static inline bool roaring64_iterator_init_at_leaf_first( - roaring64_iterator_t *it) { - it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = 0; - it->container_it = container_init_iterator(get_container(it->r, leaf), - get_typecode(leaf), &low16); - it->value = it->high48 | low16; - return (it->has_value = true); +bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + return roaring64_bitmap_get_cardinality(r1) < + roaring64_bitmap_get_cardinality(r2) && + roaring64_bitmap_is_subset(r1, r2); } -static inline bool roaring64_iterator_init_at_leaf_last( - roaring64_iterator_t *it) { - it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = 0; - it->container_it = container_init_iterator_last(get_container(it->r, leaf), - get_typecode(leaf), &low16); - it->value = it->high48 | low16; - return (it->has_value = true); +roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_and(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } + } + return result; } -static inline roaring64_iterator_t *roaring64_iterator_init_at( - const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->r = r; - it->art_it = art_init_iterator((art_t *)&r->art, first); - it->has_value = it->art_it.value != NULL; - if (it->has_value) { - if (first) { - roaring64_iterator_init_at_leaf_first(it); +uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t result = 0; + + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + result += container_and_cardinality( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); } else { - roaring64_iterator_init_at_leaf_last(it); + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } + } + return result; +} + +// Inplace and (modifies its first argument). +void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { + return; + } + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL) { + // Cases: + // 1. !it2_present -> erase it1 + // 2. it2_present + // a. it1 < it2 -> erase it1 + // b. it1 == it2 -> output it1 & it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2a: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + + // We do the computation "in place" only when c1 is not a + // shared container. Rationale: using a shared container + // safely with in place computation would require making a + // copy and then doing the computation in place which is + // likely less efficient than avoiding in place entirely and + // always generating a new container. + uint8_t typecode = get_typecode(*leaf1); + container_t *container = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and(container, typecode, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } else { + container2 = container_iand( + container, typecode, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } + + if (container2 != container) { + container_free(container, typecode); + } + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + art_iterator_erase(&it1, NULL); + remove_container(r1, *leaf1); + } else { + if (container2 != container) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + + if (!it2_present || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t leaf; + bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r1, leaf), get_typecode(leaf)); + remove_container(r1, leaf); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_lower_bound(&it2, it1.key); } - } else { - it->saturated_forward = first; } - return it; } -roaring64_bitmap_t *roaring64_bitmap_create(void) { - roaring64_bitmap_t *r = - (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - art_init_cleared(&r->art); - r->flags = 0; - r->capacity = 0; - r->first_free = 0; - r->containers = NULL; - return r; -} +bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + bool intersect = false; + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); -void roaring64_bitmap_free(roaring64_bitmap_t *r) { - if (!r) { - return; - } - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - if (is_frozen64(r)) { - // Only free the container itself, not the buffer-backed contents - // within. - roaring_free(get_container(r, leaf)); + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + intersect |= container_intersect( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); } else { - container_free(get_container(r, leaf), get_typecode(leaf)); + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); } - art_iterator_next(&it); } - if (!is_frozen64(r)) { - art_free(&r->art); - } - roaring_free(r->containers); - roaring_free(r); + return intersect; } -roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); - - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t result_typecode = get_typecode(leaf); - container_t *result_container = get_copy_of_container( - get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t)result_leaf); - art_iterator_next(&it); +bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return false; } - return result; + roaring64_iterator_t it; + roaring64_iterator_init_at(r, &it, /*first=*/true); + if (!roaring64_iterator_move_equalorlarger(&it, min)) { + return false; + } + return roaring64_iterator_has_value(&it) && + roaring64_iterator_value(&it) < max; } -/** - * Steal the containers from a 32-bit bitmap and insert them into a 64-bit - * bitmap (with an offset) - * - * After calling this function, the original bitmap will be empty, and the - * returned bitmap will contain all the values from the original bitmap. - */ -static void move_from_roaring32_offset(roaring64_bitmap_t *dst, - roaring_bitmap_t *src, - uint32_t high_bits) { - uint64_t key_base = ((uint64_t)high_bits) << 32; - uint32_t r32_size = ra_get_size(&src->high_low_container); - for (uint32_t i = 0; i < r32_size; ++i) { - uint16_t key = ra_get_key_at_index(&src->high_low_container, i); - uint8_t typecode; - container_t *container = ra_get_container_at_index( - &src->high_low_container, (uint16_t)i, &typecode); - - uint8_t high48[ART_KEY_BYTES]; - uint64_t high48_bits = key_base | ((uint64_t)key << 16); - split_key(high48_bits, high48); - leaf_t leaf = add_container(dst, container, typecode); - art_insert(&dst->art, high48, (art_val_t)leaf); - } - // We stole all the containers, so leave behind a size of zero - src->high_low_container.size = 0; +double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return (double)inter / (double)(c1 + c2 - inter); } -roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32( - roaring_bitmap_t *bitmap32) { +roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - move_from_roaring32_offset(result, bitmap32, 0); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - return result; -} + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; -roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, - uint64_t step) { - if (step == 0 || max <= min) { - return NULL; - } - roaring64_bitmap_t *r = roaring64_bitmap_create(); - if (step >= (1 << 16)) { - // Only one value per container. - for (uint64_t value = min; value < max; value += step) { - roaring64_bitmap_add(r, value); - if (value > UINT64_MAX - step) { - break; + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 | it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_or(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + art_iterator_next(&it2); } } - return r; - } - do { - uint64_t high_bits = min & 0xFFFFFFFFFFFF0000; - uint16_t container_min = min & 0xFFFF; - uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16); - - uint8_t typecode; - container_t *container = container_from_range( - &typecode, container_min, container_max, (uint16_t)step); - - uint8_t high48[ART_KEY_BYTES]; - split_key(min, high48); - leaf_t leaf = add_container(r, container, typecode); - art_insert(&r->art, high48, (art_val_t)leaf); - - uint64_t gap = container_max - container_min + step - 1; - uint64_t increment = gap - (gap % step); - if (min > UINT64_MAX - increment) { - break; - } - min += increment; - } while (min < max); - return r; -} - -roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, - const uint64_t *vals) { - roaring64_bitmap_t *r = roaring64_bitmap_create(); - roaring64_bitmap_add_many(r, n_args, vals); - return r; -} - -static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, - uint8_t *high48, - uint16_t low16, - leaf_t *leaf) { - if (leaf != NULL) { - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); - uint8_t typecode2; - container_t *container2 = - container_add(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); - } - return leaf; - } else { - array_container_t *ac = array_container_create(); - uint8_t typecode; - container_t *container = - container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); - assert(ac == container); - leaf_t new_leaf = add_container(r, container, typecode); - return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); - } -} - -void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - containerptr_roaring64_bitmap_add(r, high48, low16, leaf); -} - -bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - - int old_cardinality = 0; - if (leaf != NULL) { - old_cardinality = container_get_cardinality(get_container(r, *leaf), - get_typecode(*leaf)); + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); + } } - leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); - int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); - return old_cardinality != new_cardinality; + return result; } -void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = context->leaf; - if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { - // We're at a container with the correct high bits. - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); - uint8_t typecode2; - container_t *container2 = - container_add(container1, low16, typecode1, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); - } - } else { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf = (leaf_t *)art_find(&r->art, high48); - context->leaf = - containerptr_roaring64_bitmap_add(r, high48, low16, leaf); - memcpy(context->high_bytes, high48, ART_KEY_BYTES); - } +uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - inter; } -void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, - const uint64_t *vals) { - if (n_args == 0) { +void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { return; } - const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (const uint64_t *current_val = vals; current_val != end; - current_val++) { - roaring64_bitmap_add_bulk(r, &context, *current_val); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 | it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { + container2 = container_or(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } else { + container2 = container_ior(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r1, leaf1, container2, typecode2); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); + } } } -static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { - leaf_t *leaf = (leaf_t *)art_find(art, high48); - if (leaf != NULL) { - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); - uint8_t typecode2; - container_t *container2 = - container_add_range(container1, typecode1, min, max, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); +roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_xor(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it2); } - return; } - uint8_t typecode; - // container_add_range is inclusive, but `container_range_of_ones` is - // exclusive. - container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf_t new_leaf = add_container(r, container, typecode); - art_insert(art, high48, (art_val_t)new_leaf); + return result; } -void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return; - } - roaring64_bitmap_add_range_closed(r, min, max - 1); +uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - 2 * inter; } -void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min > max) { - return; - } +void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + assert(r1 != r2); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - art_t *art = &r->art; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - if (compare_high48(min_high48, max_high48) == 0) { - // Only populate range within one container. - add_range_closed_at(r, art, min_high48, min_low16, max_low16); - return; - } + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; - // Populate a range across containers. Fill intermediate containers - // entirely. - add_range_closed_at(r, art, min_high48, min_low16, 0xffff); - uint64_t min_high_bits = min >> 16; - uint64_t max_high_bits = max >> 16; - for (uint64_t current = min_high_bits + 1; current < max_high_bits; - ++current) { - uint8_t current_high48[ART_KEY_BYTES]; - split_key(current << 16, current_high48); - add_range_closed_at(r, art, current_high48, 0, 0xffff); - } - add_range_closed_at(r, art, max_high48, 0, max_low16); -} + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_xor(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + if (container2 != container1) { + // We only free when doing container_xor, not + // container_ixor, as ixor frees the original + // internally. + container_free(container1, typecode1); + } + } else { + container2 = container_ixor( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } -bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - if (leaf != NULL) { - return container_contains(get_container(r, *leaf), low16, - get_typecode(*leaf)); + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); + } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + if (it1_present) { + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else { + art_insert(&r1->art, it2.key, (art_val_t)result_leaf); + } + art_iterator_next(&it2); + } } - return false; } -bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return true; - } - - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive +roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); - if (it.value == NULL || combine_key(it.key, 0) > min) { - return false; - } - uint64_t prev_high48_bits = min & 0xFFFFFFFFFFFF0000; - while (it.value != NULL) { - uint64_t current_high48_bits = combine_key(it.key, 0); - if (current_high48_bits > max_high48_bits) { - // We've passed the end of the range with all containers containing - // the range. - return true; - } - if (current_high48_bits - prev_high48_bits > 0x10000) { - // There is a gap in the iterator that falls in the range. - return false; - } + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - leaf_t leaf = (leaf_t)*it.value; - uint32_t container_min = 0; - if (compare_high48(it.key, min_high48) == 0) { - container_min = min_low16; - } - uint32_t container_max = 0xFFFF + 1; // Exclusive - if (compare_high48(it.key, max_high48) == 0) { - container_max = max_low16; - } + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = container_andnot( + get_container(r1, *leaf1), get_typecode(*leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); - // For the first and last containers we use container_contains_range, - // for the intermediate containers we can use container_is_full. - if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(get_container(r, leaf), - get_typecode(leaf))) { - return false; + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + } else { + container_free(result_container, result_typecode); + } + art_iterator_next(&it1); + art_iterator_next(&it2); } - } else if (!container_contains_range(get_container(r, leaf), - container_min, container_max, - get_typecode(leaf))) { - return false; } - prev_high48_bits = current_high48_bits; - art_iterator_next(&it); - } - return prev_high48_bits == max_high48_bits; -} - -bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - - if (context->leaf == NULL || - art_compare_keys(context->high_bytes, high48) != 0) { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - if (leaf == NULL) { - return false; + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); } - context->leaf = leaf; - memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return container_contains(get_container(r, *context->leaf), low16, - get_typecode(*context->leaf)); + return result; } -bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, - uint64_t *element) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint64_t start_rank = 0; - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint64_t cardinality = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); - if (start_rank + cardinality > rank) { - uint32_t uint32_start = 0; - uint32_t uint32_rank = rank - start_rank; - uint32_t uint32_element = 0; - if (container_select(get_container(r, leaf), get_typecode(leaf), - &uint32_start, uint32_rank, &uint32_element)) { - *element = combine_key(it.key, (uint16_t)uint32_element); - return true; - } - return false; - } - start_rank += cardinality; - art_iterator_next(&it); - } - return false; +uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 - inter; } -uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); +void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint64_t rank = 0; - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - int compare_result = compare_high48(it.key, high48); - if (compare_result < 0) { - rank += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); - } else if (compare_result == 0) { - return rank + container_rank(get_container(r, leaf), - get_typecode(leaf), low16); - } else { - return rank; + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); + uint8_t typecode2; + container_t *container2; + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_andnot( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + if (container2 != container1) { + // We only free when doing container_andnot, not + // container_iandnot, as iandnot frees the original + // internally. + container_free(container1, typecode1); + } + } else { + container2 = container_iandnot( + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); + } + + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); + } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); } - art_iterator_next(&it); } - return rank; } -bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, - uint64_t *out_index) { - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint64_t index = 0; - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - int compare_result = compare_high48(it.key, high48); - if (compare_result < 0) { - index += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); - } else if (compare_result == 0) { - int index16 = container_get_index(get_container(r, leaf), - get_typecode(leaf), low16); - if (index16 < 0) { - return false; - } - *out_index = index + index16; - return true; - } else { - return false; - } - art_iterator_next(&it); +/** + * Flips the leaf at high48 in the range [min, max), adding the result to + * `r2`. If the high48 key is not found in `r1`, a new container is created. + */ +static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); + uint8_t typecode2; + container_t *container2; + if (leaf1 == NULL) { + // No container at this key, create a full container. + container2 = container_range_of_ones(min, max, &typecode2); + } else if (min == 0 && max > 0xFFFF) { + // Flip whole container. + container2 = container_not(get_container(r1, *leaf1), + get_typecode(*leaf1), &typecode2); + } else { + // Partially flip a container. + container2 = + container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), + min, max, &typecode2); + } + if (container_nonzero_cardinality(container2, typecode2)) { + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, high48, (art_val_t)leaf2); + } else { + container_free(container2, typecode2); } - return false; } -// Returns true if a container was removed. -static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, - uint8_t *high48, - uint16_t low16, - leaf_t *leaf) { +/** + * Flips the leaf at high48 in the range [min, max). If the high48 key is + * not found in the bitmap, a new container is created. Deletes the leaf and + * associated container if the negation results in an empty range. + */ +static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + container_t *container2; + uint8_t typecode2; if (leaf == NULL) { - return false; + // No container at this key, insert a full container. + container2 = container_range_of_ones(min, max, &typecode2); + leaf_t new_leaf = add_container(r, container2, typecode2); + art_insert(&r->art, high48, (art_val_t)new_leaf); + return; } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); - uint8_t typecode2; - container_t *container2 = - container_remove(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); + if (min == 0 && max > 0xFFFF) { + // Flip whole container. + container2 = container_inot(get_container(r, *leaf), + get_typecode(*leaf), &typecode2); + } else { + // Partially flip a container. + container2 = container_inot_range( + get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); + + if (container_nonzero_cardinality(container2, typecode2)) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(&r->art, high48, NULL); assert(erased); - (void)erased; + container_free(container2, typecode2); remove_container(r, *leaf); - return true; } - return false; } -void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - - leaf_t *leaf = (leaf_t *)art_find(art, high48); - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); +roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + if (min >= max) { + return roaring64_bitmap_copy(r); + } + return roaring64_bitmap_flip_closed(r, min, max - 1); } -bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - leaf_t *leaf = (leaf_t *)art_find(art, high48); - - if (leaf == NULL) { - return false; +roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, + uint64_t min, uint64_t max) { + if (min > max) { + return roaring64_bitmap_copy(r1); } - int old_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); - if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { - return true; + uint8_t min_high48_key[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48_key); + uint8_t max_high48_key[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48_key); + uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; + uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; + + roaring64_bitmap_t *r2 = roaring64_bitmap_create(); + art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); + + // Copy the containers before min unchanged. + while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); + container_t *container2 = get_copy_of_container( + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); + art_iterator_next(&it); } - int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); - return new_cardinality != old_cardinality; -} -void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, - roaring64_bulk_context_t *context, - uint64_t val) { - art_t *art = &r->art; - uint8_t high48[ART_KEY_BYTES]; - uint16_t low16 = split_key(val, high48); - if (context->leaf != NULL && - compare_high48(context->high_bytes, high48) == 0) { - // We're at a container with the correct high bits. - uint8_t typecode = get_typecode(*context->leaf); - container_t *container = get_container(r, *context->leaf); - uint8_t typecode2; - container_t *container2 = - container_remove(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, context->leaf, container2, typecode2); + // Flip the range (including non-existent containers!) between min and + // max. + for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; + high48_bits++) { + uint8_t current_high48_key[ART_KEY_BYTES]; + split_key(high48_bits << 16, current_high48_key); + + uint32_t min_container = 0; + if (high48_bits == min_high48_bits) { + min_container = min_low16; } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - leaf_t leaf; - bool erased = art_erase(art, high48, (art_val_t *)&leaf); - assert(erased); - (void)erased; - remove_container(r, leaf); + uint32_t max_container = 0xFFFF + 1; // Exclusive range. + if (high48_bits == max_high48_bits) { + max_container = max_low16 + 1; // Exclusive. } - } else { - // We're not positioned anywhere yet or the high bits of the key - // differ. - leaf_t *leaf = (leaf_t *)art_find(art, high48); - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - context->leaf = leaf; - memcpy(context->high_bytes, high48, ART_KEY_BYTES); - } -} -void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, - const uint64_t *vals) { - if (n_args == 0) { - return; - } - const uint64_t *end = vals + n_args; - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - for (const uint64_t *current_val = vals; current_val != end; - current_val++) { - roaring64_bitmap_remove_bulk(r, &context, *current_val); + roaring64_flip_leaf(r1, r2, current_high48_key, min_container, + max_container); } -} -static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { - leaf_t *leaf = (leaf_t *)art_find(art, high48); - if (leaf == NULL) { - return; - } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); - uint8_t typecode2; - container_t *container2 = - container_remove_range(container, typecode, min, max, &typecode2); - if (container2 != container) { - container_free(container, typecode); - if (container2 != NULL) { - replace_container(r, leaf, container2, typecode2); - } else { - bool erased = art_erase(art, high48, NULL); - assert(erased); - (void)erased; - remove_container(r, *leaf); - } + // Copy the containers after max unchanged. + it = art_upper_bound((art_t *)&r1->art, max_high48_key); + while (it.value != NULL) { + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); + container_t *container2 = get_copy_of_container( + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); + art_iterator_next(&it); } + + return r2; } -void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, +void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max) { if (min >= max) { return; } - roaring64_bitmap_remove_range_closed(r, min, max - 1); + roaring64_bitmap_flip_closed_inplace(r, min, max - 1); } -void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, +void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max) { if (min > max) { return; } + uint16_t min_low16 = (uint16_t)min; + uint16_t max_low16 = (uint16_t)max; + uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; + uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - art_t *art = &r->art; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); - if (compare_high48(min_high48, max_high48) == 0) { - // Only remove a range within one container. - remove_range_closed_at(r, art, min_high48, min_low16, max_low16); - return; - } + // Flip the range (including non-existent containers!) between min and + // max. + for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; + high48_bits++) { + uint8_t current_high48_key[ART_KEY_BYTES]; + split_key(high48_bits << 16, current_high48_key); - // Remove a range across containers. Remove intermediate containers - // entirely. - remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); + uint32_t min_container = 0; + if (high48_bits == min_high48_bits) { + min_container = min_low16; + } + uint32_t max_container = 0xFFFF + 1; // Exclusive range. + if (high48_bits == max_high48_bits) { + max_container = max_low16 + 1; // Exclusive. + } - art_iterator_t it = art_upper_bound(art, min_high48); - while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t leaf; - bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); - assert(erased); - (void)erased; - container_free(get_container(r, leaf), get_typecode(leaf)); - remove_container(r, leaf); + roaring64_flip_leaf_inplace(r, current_high48_key, min_container, + max_container); } - remove_range_closed_at(r, art, max_high48, 0, max_low16); -} - -void roaring64_bitmap_clear(roaring64_bitmap_t *r) { - roaring64_bitmap_remove_range_closed(r, 0, UINT64_MAX); } -uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { +// Returns the number of distinct high 32-bit entries in the bitmap. +static inline uint64_t count_high32(const roaring64_bitmap_t *r) { art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint64_t cardinality = 0; + uint64_t high32_count = 0; + uint32_t prev_high32 = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - cardinality += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); + if (high32_count == 0 || prev_high32 != current_high32) { + high32_count++; + prev_high32 = current_high32; + } art_iterator_next(&it); } - return cardinality; + return high32_count; } -uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return 0; - } - // Convert to a closed range - // No underflow here: passing the above condition implies min < max, so - // there is a number less than max - return roaring64_bitmap_range_closed_cardinality(r, min, max - 1); +// Frees the (32-bit!) bitmap without freeing the containers. +static inline void roaring_bitmap_free_without_containers(roaring_bitmap_t *r) { + ra_clear_without_containers(&r->high_low_container); + roaring_free(r); } -uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min > max) { - return 0; - } +size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + size_t size = 0; - uint64_t cardinality = 0; - uint8_t min_high48[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48); - uint8_t max_high48[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48); + // Write as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t high32_count; + size += sizeof(high32_count); - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint32_t prev_high32 = 0; + roaring_bitmap_t *bitmap32 = NULL; + + // Iterate through buckets ordered by increasing keys. while (it.value != NULL) { - int max_compare_result = compare_high48(it.key, max_high48); - if (max_compare_result > 0) { - // We're outside the range. - break; - } + uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); + if (bitmap32 == NULL || prev_high32 != current_high32) { + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the + // bucket. + size += sizeof(prev_high32); - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - if (max_compare_result == 0) { - // We're at the max high key, add only the range up to the low - // 16 bits of max. - cardinality += container_rank(container, typecode, max_low16); - } else { - // We're not yet at the max high key, add the full container - // range. - cardinality += container_get_cardinality(container, typecode); - } - if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { - // We're at the min high key, remove the range up to the low 16 - // bits of min. - cardinality -= container_rank(container, typecode, min_low16 - 1); + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size += roaring_bitmap_portable_size_in_bytes(bitmap32); + roaring_bitmap_free_without_containers(bitmap32); + } + + // Start a new 32-bit bitmap with the current high 32 bits. + art_iterator_t it2 = it; + uint32_t containers_with_high32 = 0; + while (it2.value != NULL && (uint32_t)(combine_key(it2.key, 0) >> + 32) == current_high32) { + containers_with_high32++; + art_iterator_next(&it2); + } + bitmap32 = + roaring_bitmap_create_with_capacity(containers_with_high32); + + prev_high32 = current_high32; } + leaf_t leaf = (leaf_t)*it.value; + ra_append(&bitmap32->high_low_container, + (uint16_t)(current_high32 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } - return cardinality; -} -bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { - return art_is_empty(&r->art); -} + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the bucket. + size += sizeof(prev_high32); -uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - if (it.value == NULL) { - return UINT64_MAX; + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size += roaring_bitmap_portable_size_in_bytes(bitmap32); + roaring_bitmap_free_without_containers(bitmap32); } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); + + return size; } -uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); - if (it.value == NULL) { +size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, + char *buf) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { return 0; } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); -} + const char *initial_buf = buf; -bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - bool has_run_container = false; + // Write as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t high32_count = count_high32(r); + memcpy(buf, &high32_count, sizeof(high32_count)); + buf += sizeof(high32_count); + + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + uint32_t prev_high32 = 0; + roaring_bitmap_t *bitmap32 = NULL; + + // Iterate through buckets ordered by increasing keys. while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint8_t new_typecode; - // We don't need to free the existing container if a new one was - // created, convert_run_optimize does that internally. - container_t *new_container = convert_run_optimize( - get_container(r, *leaf), get_typecode(*leaf), &new_typecode); - replace_container(r, leaf, new_container, new_typecode); - has_run_container |= new_typecode == RUN_CONTAINER_TYPE; - art_iterator_next(&it); - } - return has_run_container; -} + uint64_t current_high48 = combine_key(it.key, 0); + uint32_t current_high32 = (uint32_t)(current_high48 >> 32); + if (bitmap32 == NULL || prev_high32 != current_high32) { + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the + // bucket. + memcpy(buf, &prev_high32, sizeof(prev_high32)); + buf += sizeof(prev_high32); -static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { - uint64_t idx = get_index(*leaf); - if (idx < r->first_free) { - return; - } - r->containers[r->first_free] = get_container(r, *leaf); - r->containers[idx] = NULL; - *leaf = create_leaf(r->first_free, get_typecode(*leaf)); - r->first_free = next_free_container_idx(r); -} + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + buf += roaring_bitmap_portable_serialize(bitmap32, buf); + roaring_bitmap_free_without_containers(bitmap32); + } -static inline bool is_shrunken(const roaring64_bitmap_t *r) { - return art_is_shrunken(&r->art) && r->first_free == r->capacity; -} + // Start a new 32-bit bitmap with the current high 32 bits. + art_iterator_t it2 = it; + uint32_t containers_with_high32 = 0; + while (it2.value != NULL && + (uint32_t)combine_key(it2.key, 0) == current_high32) { + containers_with_high32++; + art_iterator_next(&it2); + } + bitmap32 = + roaring_bitmap_create_with_capacity(containers_with_high32); -size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { - size_t freed = art_shrink_to_fit(&r->art); - art_iterator_t it = art_init_iterator(&r->art, true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - freed += container_shrink_to_fit(get_container(r, *leaf), - get_typecode(*leaf)); - move_to_shrink(r, leaf); + prev_high32 = current_high32; + } + leaf_t leaf = (leaf_t)*it.value; + ra_append(&bitmap32->high_low_container, + (uint16_t)(current_high48 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } - if (is_shrunken(r)) { - return freed; - } - uint64_t new_capacity = r->first_free; - if (new_capacity < r->capacity) { - r->containers = (container_t **)roaring_realloc( - r->containers, new_capacity * sizeof(container_t *)); - freed += (r->capacity - new_capacity) * sizeof(container_t *); - r->capacity = new_capacity; - } - return freed; -} -/** - * (For advanced users.) - * Collect statistics about the bitmap - */ -void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, - roaring64_statistics_t *stat) { - memset(stat, 0, sizeof(*stat)); - stat->min_value = roaring64_bitmap_minimum(r); - stat->max_value = roaring64_bitmap_maximum(r); + if (bitmap32 != NULL) { + // Write as uint32 the most significant 32 bits of the bucket. + memcpy(buf, &prev_high32, sizeof(prev_high32)); + buf += sizeof(prev_high32); - art_iterator_t it = art_init_iterator((art_t *)&r->art, true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - stat->n_containers++; - uint8_t truetype = - get_container_type(get_container(r, leaf), get_typecode(leaf)); - uint32_t card = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); - uint32_t sbytes = - container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); - stat->cardinality += card; - switch (truetype) { - case BITSET_CONTAINER_TYPE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - roaring_unreachable; - } - art_iterator_next(&it); + // Write the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + buf += roaring_bitmap_portable_serialize(bitmap32, buf); + roaring_bitmap_free_without_containers(bitmap32); } -} -static bool roaring64_leaf_internal_validate(const art_val_t val, - const char **reason, - void *context) { - leaf_t leaf = (leaf_t)val; - roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; - return container_internal_validate(get_container(r, leaf), - get_typecode(leaf), reason); + return buf - initial_buf; } -bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, - const char **reason) { - return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate, (void *)r); -} +size_t roaring64_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { + return 0; + } + size_t read_bytes = 0; -bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + // Read as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t buckets; + if (read_bytes + sizeof(buckets) > maxbytes) { + return 0; + } + memcpy(&buckets, buf, sizeof(buckets)); + buf += sizeof(buckets); + read_bytes += sizeof(buckets); - while (it1.value != NULL && it2.value != NULL) { - if (compare_high48(it1.key, it2.key) != 0) { - return false; + // Buckets should be 32 bits with 4 bits of zero padding. + if (buckets > UINT32_MAX) { + return 0; + } + + // Iterate through buckets ordered by increasing keys. + for (uint64_t bucket = 0; bucket < buckets; ++bucket) { + // Read as uint32 the most significant 32 bits of the bucket. + uint32_t high32; + if (read_bytes + sizeof(high32) > maxbytes) { + return 0; } - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { - return false; + buf += sizeof(high32); + read_bytes += sizeof(high32); + + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( + buf, maxbytes - read_bytes); + if (bitmap32_size == 0) { + return 0; } - art_iterator_next(&it1); - art_iterator_next(&it2); + buf += bitmap32_size; + read_bytes += bitmap32_size; } - return it1.value == NULL && it2.value == NULL; + return read_bytes; } -bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); +roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( + const char *buf, size_t maxbytes) { + // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations + if (buf == NULL) { + return NULL; + } + size_t read_bytes = 0; - while (it1.value != NULL) { - bool it2_present = it2.value != NULL; + // Read as uint64 the distinct number of "buckets", where a bucket is + // defined as the most significant 32 bits of an element. + uint64_t buckets; + if (read_bytes + sizeof(buckets) > maxbytes) { + return NULL; + } + memcpy(&buckets, buf, sizeof(buckets)); + buf += sizeof(buckets); + read_bytes += sizeof(buckets); - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_is_subset( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { - return false; - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if (!it2_present || compare_result < 0) { - return false; - } else if (compare_result > 0) { - art_iterator_lower_bound(&it2, it1.key); - } + // Buckets should be 32 bits with 4 bits of zero padding. + if (buckets > UINT32_MAX) { + return NULL; } - return true; -} -bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - return roaring64_bitmap_get_cardinality(r1) < - roaring64_bitmap_get_cardinality(r2) && - roaring64_bitmap_is_subset(r1, r2); -} + roaring64_bitmap_t *r = roaring64_bitmap_create(); + // Iterate through buckets ordered by increasing keys. + int64_t previous_high32 = -1; + for (uint64_t bucket = 0; bucket < buckets; ++bucket) { + // Read as uint32 the most significant 32 bits of the bucket. + uint32_t high32; + if (read_bytes + sizeof(high32) > maxbytes) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&high32, buf, sizeof(high32)); + buf += sizeof(high32); + read_bytes += sizeof(high32); + // High 32 bits must be strictly increasing. + if (high32 <= previous_high32) { + roaring64_bitmap_free(r); + return NULL; + } + previous_high32 = high32; -roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. + size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( + buf, maxbytes - read_bytes); + if (bitmap32_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + roaring_bitmap_t *bitmap32 = roaring_bitmap_portable_deserialize_safe( + buf, maxbytes - read_bytes); + if (bitmap32 == NULL) { + roaring64_bitmap_free(r); + return NULL; + } + buf += bitmap32_size; + read_bytes += bitmap32_size; - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> output it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_and(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - } else { - container_free(result_container, result_typecode); + // While we don't attempt to validate much, we must ensure that there + // is no duplication in the high 48 bits - inserting into the ART + // assumes (or UB) no duplicate keys. The top 32 bits must be unique + // because we check for strict increasing values of high32, but we + // must also ensure the top 16 bits within each 32-bit bitmap are also + // at least unique (we ensure they're strictly increasing as well, + // which they must be for a _valid_ bitmap, since it's cheaper to check) + int32_t last_bitmap_key = -1; + for (int i = 0; i < bitmap32->high_low_container.size; i++) { + uint16_t key = bitmap32->high_low_container.keys[i]; + if (key <= last_bitmap_key) { + roaring_bitmap_free(bitmap32); + roaring64_bitmap_free(r); + return NULL; } - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); - } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); + last_bitmap_key = key; } + + // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. + move_from_roaring32_offset(r, bitmap32, high32); + roaring_bitmap_free(bitmap32); } - return result; + return r; } -uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t result = 0; - - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - result += container_and_cardinality( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); - } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); +// Returns an "element count" for the given container. This has a different +// meaning for each container type, but the purpose is the minimal information +// required to serialize the container metadata. +static inline uint32_t container_get_element_count(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return ((bitset_container_t *)c)->cardinality; + } + case ARRAY_CONTAINER_TYPE: { + return ((array_container_t *)c)->cardinality; + } + case RUN_CONTAINER_TYPE: { + return ((run_container_t *)c)->n_runs; + } + default: { + assert(false); + roaring_unreachable; + return 0; } } - return result; } -// Inplace and (modifies its first argument). -void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - if (r1 == r2) { - return; +static inline size_t container_get_frozen_size(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + case ARRAY_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(uint16_t); + } + case RUN_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(rle16_t); + } + default: { + assert(false); + roaring_unreachable; + return 0; + } } - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - - while (it1.value != NULL) { - // Cases: - // 1. !it2_present -> erase it1 - // 2. it2_present - // a. it1 < it2 -> erase it1 - // b. it1 == it2 -> output it1 & it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2a: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; +} - // We do the computation "in place" only when c1 is not a - // shared container. Rationale: using a shared container - // safely with in place computation would require making a - // copy and then doing the computation in place which is - // likely less efficient than avoiding in place entirely and - // always generating a new container. - uint8_t typecode = get_typecode(*leaf1); - container_t *container = get_container(r1, *leaf1); - uint8_t typecode2; - container_t *container2; - if (typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and(container, typecode, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } else { - container2 = container_iand( - container, typecode, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } +uint64_t align_size(uint64_t size, uint64_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} - if (container2 != container) { - container_free(container, typecode); - } - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - art_iterator_erase(&it1, NULL); - remove_container(r1, *leaf1); - } else { - if (container2 != container) { - replace_container(r1, leaf1, container2, typecode2); - } - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); - } - } +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { + if (!is_shrunken(r)) { + return 0; + } + // Flags. + uint64_t size = sizeof(r->flags); + // Container count. + size += sizeof(r->capacity); + // Container element counts. + size += r->capacity * sizeof(uint16_t); + // Total container sizes. + size += 3 * sizeof(uint64_t); + // ART (8 byte aligned). + size = align_size(size, 8); + size += art_size_in_bytes(&r->art); - if (!it2_present || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t leaf; - bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); - assert(erased); - (void)erased; - container_free(get_container(r1, leaf), get_typecode(leaf)); - remove_container(r1, leaf); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_lower_bound(&it2, it1.key); - } + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + total_sizes[typecode] += + container_get_frozen_size(get_container(r, leaf), typecode); + art_iterator_next(&it); } + // Containers (aligned). + size = align_size(size, CROARING_BITSET_ALIGNMENT); + size += total_sizes[BITSET_CONTAINER_TYPE]; + size = align_size(size, alignof(rle16_t)); + size += total_sizes[ARRAY_CONTAINER_TYPE]; + size = align_size(size, alignof(uint16_t)); + size += total_sizes[RUN_CONTAINER_TYPE]; + // Padding to make overall size a multiple of required alignment. + size = align_size(size, CROARING_BITSET_ALIGNMENT); + return size; } -bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - bool intersect = false; - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); - - while (it1.value != NULL && it2.value != NULL) { - // Cases: - // 1. it1 < it2 -> it1++ - // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++ - // 3. it1 > it2 -> it2++ - int compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - intersect |= container_intersect( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); - art_iterator_next(&it1); - art_iterator_next(&it2); - } else if (compare_result < 0) { - // Case 1: it1 is before it2. - art_iterator_lower_bound(&it1, it2.key); - } else { - // Case 3: it2 is before it1. - art_iterator_lower_bound(&it2, it1.key); +static inline void container_frozen_serialize(const container_t *container, + uint8_t typecode, + uint64_t **bitsets, + uint16_t **arrays, + rle16_t **runs) { + size_t size = container_get_frozen_size(container, typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)container; + memcpy(*bitsets, bitset->words, size); + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)container; + memcpy(*arrays, array->array, size); + *arrays += container_get_element_count(container, typecode); + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)container; + memcpy(*runs, run->runs, size); + *runs += container_get_element_count(container, typecode); + break; + } + default: { + assert(false); + roaring_unreachable; } } - return intersect; } -bool roaring64_bitmap_intersect_with_range(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return false; +static inline char *pad_align(char *buf, const char *initial_buf, + size_t alignment) { + uint64_t buf_size = buf - initial_buf; + uint64_t pad = align_size(buf_size, alignment) - buf_size; + memset(buf, 0, pad); + return buf + pad; +} + +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf) { + if (buf == NULL) { + return 0; } - roaring64_iterator_t it; - roaring64_iterator_init_at(r, &it, /*first=*/true); - if (!roaring64_iterator_move_equalorlarger(&it, min)) { - return false; + if (!is_shrunken(r)) { + return 0; } - return roaring64_iterator_has_value(&it) && - roaring64_iterator_value(&it) < max; -} + const char *initial_buf = buf; -double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return (double)inter / (double)(c1 + c2 - inter); -} + // Flags. + memcpy(buf, &r->flags, sizeof(r->flags)); + buf += sizeof(r->flags); -roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); + // Container count. + memcpy(buf, &r->capacity, sizeof(r->capacity)); + buf += sizeof(r->capacity); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + // Container element counts. + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + uint32_t elem_count = container_get_element_count(container, typecode); + uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); + memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); + buf += sizeof(compressed_elem_count); - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. !it1_present && it2_present -> output it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 | it2, it1++, it2++ - // c. it1 > it2 -> output it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_or(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); - art_iterator_next(&it2); - } + total_sizes[typecode] += container_get_frozen_size(container, typecode); + art_iterator_next(&it); } - return result; -} -uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 + c2 - inter; -} + // Total container sizes. + memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); -void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - if (r1 == r2) { - return; - } - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + // ART. + buf = pad_align(buf, initial_buf, 8); + buf += art_serialize(&r->art, buf); - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + // Containers (aligned). + // Runs before arrays as run elements are larger than array elements and + // smaller than bitset elements. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + uint64_t *bitsets = (uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(rle16_t)); + rle16_t *runs = (rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(uint16_t)); + uint16_t *arrays = (uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. !it1_present && it2_present -> add it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 | it2, it1++, it2++ - // c. it1 > it2 -> add it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); - uint8_t typecode2; - container_t *container2; - if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { - container2 = container_or(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } else { - container2 = container_ior(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r1, leaf1, container2, typecode2); - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); - art_iterator_next(&it2); - } + it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + container_frozen_serialize(container, typecode, &bitsets, &arrays, + &runs); + art_iterator_next(&it); } -} - -roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + // Padding to make overall size a multiple of required alignment. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + return buf - initial_buf; +} - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. !it1_present && it2_present -> output it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 ^ it2, it1++, it2++ - // c. it1 > it2 -> output it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_xor(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - } else { - container_free(result_container, result_typecode); - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } +static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, + const uint64_t **bitsets, + const uint16_t **arrays, + const rle16_t **runs) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *c = (bitset_container_t *)roaring_malloc( + sizeof(bitset_container_t)); + c->cardinality = elem_count; + c->words = (uint64_t *)*bitsets; + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + return (container_t *)c; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *c = + (array_container_t *)roaring_malloc(sizeof(array_container_t)); + c->cardinality = elem_count; + c->capacity = elem_count; + c->array = (uint16_t *)*arrays; + *arrays += elem_count; + return (container_t *)c; + } + case RUN_CONTAINER_TYPE: { + run_container_t *c = + (run_container_t *)roaring_malloc(sizeof(run_container_t)); + c->n_runs = elem_count; + c->capacity = elem_count; + c->runs = (rle16_t *)*runs; + *runs += elem_count; + return (container_t *)c; } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); - art_iterator_next(&it2); + default: { + assert(false); + roaring_unreachable; + return NULL; } } - return result; } -uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t c2 = roaring64_bitmap_get_cardinality(r2); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 + c2 - 2 * inter; -} +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes) { + if (buf == NULL) { + return NULL; + } -void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - assert(r1 != r2); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + roaring64_bitmap_t *r = roaring64_bitmap_create(); - while (it1.value != NULL || it2.value != NULL) { - bool it1_present = it1.value != NULL; - bool it2_present = it2.value != NULL; + // Flags. + if (maxbytes < sizeof(r->flags)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->flags, buf, sizeof(r->flags)); + buf += sizeof(r->flags); + maxbytes -= sizeof(r->flags); + r->flags |= ROARING_FLAG_FROZEN; - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. !it1_present && it2_present -> add it2, it2++ - // 3. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 ^ it2, it1++, it2++ - // c. it1 > it2 -> add it2, it2++ - int compare_result = 0; - if (it1_present && it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); - uint8_t typecode2; - container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { - container2 = container_xor(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - if (container2 != container1) { - // We only free when doing container_xor, not - // container_ixor, as ixor frees the original - // internally. - container_free(container1, typecode1); - } - } else { - container2 = container_ixor( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } + // Container count. + if (maxbytes < sizeof(r->capacity)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->capacity, buf, sizeof(r->capacity)); + buf += sizeof(r->capacity); + maxbytes -= sizeof(r->capacity); - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - (void)erased; - remove_container(r1, *leaf1); - } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); - } - } - if ((it1_present && !it2_present) || compare_result < 0) { - // Cases 1 and 3a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if ((!it1_present && it2_present) || compare_result > 0) { - // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); - if (it1_present) { - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); - art_iterator_next(&it1); - } else { - art_insert(&r1->art, it2.key, (art_val_t)result_leaf); - } - art_iterator_next(&it2); - } + r->containers = + (container_t *)roaring_malloc(r->capacity * sizeof(container_t *)); + + // Container element counts. + if (maxbytes < r->capacity * sizeof(uint16_t)) { + roaring64_bitmap_free(r); + return NULL; } -} + const char *elem_counts = buf; + buf += r->capacity * sizeof(uint16_t); + maxbytes -= r->capacity * sizeof(uint16_t); -roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - roaring64_bitmap_t *result = roaring64_bitmap_create(); + // Total container sizes. + uint64_t total_sizes[4]; + if (maxbytes < sizeof(uint64_t) * 3) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + // ART (8 byte aligned). + buf = CROARING_ALIGN_BUF(buf, 8); + size_t art_size = art_frozen_view(buf, maxbytes, &r->art); + if (art_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } + buf += art_size; + maxbytes -= art_size; - while (it1.value != NULL) { - // Cases: - // 1. it1_present && !it2_present -> output it1, it1++ - // 2. it1_present && it2_present - // a. it1 < it2 -> output it1, it1++ - // b. it1 == it2 -> output it1 - it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = container_andnot( - get_container(r1, *leaf1), get_typecode(*leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); + // Containers (aligned). + const char *before_containers = buf; + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + const uint64_t *bitsets = (const uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); + const rle16_t *runs = (const rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); + const uint16_t *arrays = (const uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + if (maxbytes < (uint64_t)(buf - before_containers)) { + roaring64_bitmap_free(r); + return NULL; + } + maxbytes -= buf - before_containers; - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - } else { - container_free(result_container, result_typecode); - } - art_iterator_next(&it1); - art_iterator_next(&it2); - } - } - if (!it2_present || compare_result < 0) { - // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); - art_iterator_next(&it1); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_next(&it2); + // Deserialize in ART iteration order. + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + for (size_t i = 0; it.value != NULL; ++i) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + + uint16_t compressed_elem_count; + memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), + sizeof(compressed_elem_count)); + uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; + + // The container index is unrelated to the iteration order. + uint64_t index = get_index(leaf); + r->containers[index] = container_frozen_view(typecode, elem_count, + &bitsets, &arrays, &runs); + + art_iterator_next(&it); + } + + // Padding to make overall size a multiple of required alignment. + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + + return r; +} + +bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, + roaring_iterator64 iterator, void *ptr) { + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + uint64_t high48 = combine_key(it.key, 0); + uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; + uint32_t low32 = high48; + leaf_t leaf = (leaf_t)*it.value; + if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), + low32, iterator, high32, ptr)) { + return false; } + art_iterator_next(&it); } - return result; + return true; } -uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - uint64_t c1 = roaring64_bitmap_get_cardinality(r1); - uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); - return c1 - inter; +void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, + uint64_t *out) { + roaring64_iterator_t it; // gets initialized in the next line + roaring64_iterator_init_at(r, &it, /*first=*/true); + roaring64_iterator_read(&it, out, UINT64_MAX); } -void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, - const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); +roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) { + roaring64_iterator_t *it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + return roaring64_iterator_init_at(r, it, /*first=*/true); +} - while (it1.value != NULL) { - // Cases: - // 1. it1_present && !it2_present -> it1++ - // 2. it1_present && it2_present - // a. it1 < it2 -> it1++ - // b. it1 == it2 -> it1 - it2, it1++, it2++ - // c. it1 > it2 -> it2++ - bool it2_present = it2.value != NULL; - int compare_result = 0; - if (it2_present) { - compare_result = compare_high48(it1.key, it2.key); - if (compare_result == 0) { - // Case 2b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); - uint8_t typecode2; - container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { - container2 = container_andnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - if (container2 != container1) { - // We only free when doing container_andnot, not - // container_iandnot, as iandnot frees the original - // internally. - container_free(container1, typecode1); - } - } else { - container2 = container_iandnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); - } +roaring64_iterator_t *roaring64_iterator_create_last( + const roaring64_bitmap_t *r) { + roaring64_iterator_t *it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + return roaring64_iterator_init_at(r, it, /*first=*/false); +} - if (!container_nonzero_cardinality(container2, typecode2)) { - container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - (void)erased; - remove_container(r1, *leaf1); - } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } - // Only advance the iterator if we didn't delete the - // leaf, as erasing advances by itself. - art_iterator_next(&it1); - } - art_iterator_next(&it2); - } - } - if (!it2_present || compare_result < 0) { - // Cases 1 and 2a: it1 is the only iterator or is before it2. - art_iterator_next(&it1); - } else if (compare_result > 0) { - // Case 2c: it1 is after it2. - art_iterator_next(&it2); +void roaring64_iterator_reinit(const roaring64_bitmap_t *r, + roaring64_iterator_t *it) { + roaring64_iterator_init_at(r, it, /*first=*/true); +} + +void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, + roaring64_iterator_t *it) { + roaring64_iterator_init_at(r, it, /*first=*/false); +} + +roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it) { + roaring64_iterator_t *new_it = + (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); + memcpy(new_it, it, sizeof(*it)); + return new_it; +} + +void roaring64_iterator_free(roaring64_iterator_t *it) { roaring_free(it); } + +bool roaring64_iterator_has_value(const roaring64_iterator_t *it) { + return it->has_value; +} + +uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) { + return it->value; +} + +bool roaring64_iterator_advance(roaring64_iterator_t *it) { + if (it->art_it.value == NULL) { + if (it->saturated_forward) { + return (it->has_value = false); } + roaring64_iterator_init_at(it->r, it, /*first=*/true); + return it->has_value; + } + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); } + if (art_iterator_next(&it->art_it)) { + return roaring64_iterator_init_at_leaf_first(it); + } + it->saturated_forward = true; + return (it->has_value = false); } -/** - * Flips the leaf at high48 in the range [min, max), adding the result to - * `r2`. If the high48 key is not found in `r1`, a new container is created. - */ -static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, uint8_t high48[], - uint32_t min, uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); - uint8_t typecode2; - container_t *container2; - if (leaf1 == NULL) { - // No container at this key, create a full container. - container2 = container_range_of_ones(min, max, &typecode2); - } else if (min == 0 && max > 0xFFFF) { - // Flip whole container. - container2 = container_not(get_container(r1, *leaf1), - get_typecode(*leaf1), &typecode2); - } else { - // Partially flip a container. - container2 = - container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), - min, max, &typecode2); +bool roaring64_iterator_previous(roaring64_iterator_t *it) { + if (it->art_it.value == NULL) { + if (!it->saturated_forward) { + // Saturated backward. + return (it->has_value = false); + } + roaring64_iterator_init_at(it->r, it, /*first=*/false); + return it->has_value; } - if (container_nonzero_cardinality(container2, typecode2)) { - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, high48, (art_val_t)leaf2); - } else { - container_free(container2, typecode2); + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); + } + if (art_iterator_prev(&it->art_it)) { + return roaring64_iterator_init_at_leaf_last(it); } + it->saturated_forward = false; // Saturated backward. + return (it->has_value = false); } -/** - * Flips the leaf at high48 in the range [min, max). If the high48 key is - * not found in the bitmap, a new container is created. Deletes the leaf and - * associated container if the negation results in an empty range. - */ -static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], - uint32_t min, uint32_t max) { - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); - container_t *container2; - uint8_t typecode2; - if (leaf == NULL) { - // No container at this key, insert a full container. - container2 = container_range_of_ones(min, max, &typecode2); - leaf_t new_leaf = add_container(r, container2, typecode2); - art_insert(&r->art, high48, (art_val_t)new_leaf); - return; +bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, + uint64_t val) { + uint8_t val_high48[ART_KEY_BYTES]; + uint16_t val_low16 = split_key(val, val_high48); + if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { + // The ART iterator is before or after the high48 bits of `val` (or + // beyond the ART altogether), so we need to move to a leaf with a + // key equal or greater. + if (!art_iterator_lower_bound(&it->art_it, val_high48)) { + // Only smaller keys found. + it->saturated_forward = true; + return (it->has_value = false); + } + it->high48 = combine_key(it->art_it.key, 0); + // Fall through to the next if statement. } - if (min == 0 && max > 0xFFFF) { - // Flip whole container. - container2 = container_inot(get_container(r, *leaf), - get_typecode(*leaf), &typecode2); - } else { - // Partially flip a container. - container2 = container_inot_range( - get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); + if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { + // We're at equal high bits, check if a suitable value can be found + // in this container. + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + if (container_iterator_lower_bound( + get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16, val_low16)) { + it->value = it->high48 | low16; + return (it->has_value = true); + } + // Only smaller entries in this container, move to the next. + if (!art_iterator_next(&it->art_it)) { + it->saturated_forward = true; + return (it->has_value = false); + } } - if (container_nonzero_cardinality(container2, typecode2)) { - replace_container(r, leaf, container2, typecode2); - } else { - bool erased = art_erase(&r->art, high48, NULL); - assert(erased); - (void)erased; - container_free(container2, typecode2); - remove_container(r, *leaf); + // We're at a leaf with high bits greater than `val`, so the first entry + // in this container is our result. + return roaring64_iterator_init_at_leaf_first(it); +} + +uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, + uint64_t count) { + uint64_t consumed = 0; + while (it->has_value && consumed < count) { + uint32_t container_consumed; + leaf_t leaf = (leaf_t)*it->art_it.value; + uint16_t low16 = (uint16_t)it->value; + uint32_t container_count = UINT32_MAX; + if (count - consumed < (uint64_t)UINT32_MAX) { + container_count = count - consumed; + } + bool has_value = container_iterator_read_into_uint64( + get_container(it->r, leaf), get_typecode(leaf), &it->container_it, + it->high48, buf, container_count, &container_consumed, &low16); + consumed += container_consumed; + buf += container_consumed; + if (has_value) { + it->has_value = true; + it->value = it->high48 | low16; + assert(consumed == count); + return consumed; + } + it->has_value = art_iterator_next(&it->art_it); + if (it->has_value) { + roaring64_iterator_init_at_leaf_first(it); + } } + return consumed; } -roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, - uint64_t min, uint64_t max) { - if (min >= max) { - return roaring64_bitmap_copy(r); - } - return roaring64_bitmap_flip_closed(r, min, max - 1); -} +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace api +#endif +/* end file src/roaring64.c */ +/* begin file src/roaring_array.c */ +#include +#include +#include +#include +#include +#include -roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, - uint64_t min, uint64_t max) { - if (min > max) { - return roaring64_bitmap_copy(r1); - } - uint8_t min_high48_key[ART_KEY_BYTES]; - uint16_t min_low16 = split_key(min, min_high48_key); - uint8_t max_high48_key[ART_KEY_BYTES]; - uint16_t max_low16 = split_key(max, max_high48_key); - uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; - uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif - // Copy the containers before min unchanged. - while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); - container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); - art_iterator_next(&it); - } +// Convention: [0,ra->size) all elements are initialized +// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing - // Flip the range (including non-existent containers!) between min and - // max. - for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; - high48_bits++) { - uint8_t current_high48_key[ART_KEY_BYTES]; - split_key(high48_bits << 16, current_high48_key); +extern inline int32_t ra_get_size(const roaring_array_t *ra); +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - uint32_t min_container = 0; - if (high48_bits == min_high48_bits) { - min_container = min_low16; - } - uint32_t max_container = 0xFFFF + 1; // Exclusive range. - if (high48_bits == max_high48_bits) { - max_container = max_low16 + 1; // Exclusive. - } +extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, + uint16_t i, + uint8_t *typecode); - roaring64_flip_leaf(r1, r2, current_high48_key, min_container, - max_container); +extern inline void ra_unshare_container_at_index(roaring_array_t *ra, + uint16_t i); + +extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, + int32_t i, + uint16_t key, + container_t *c, + uint8_t typecode); + +extern inline void ra_set_container_at_index(const roaring_array_t *ra, + int32_t i, container_t *c, + uint8_t typecode); + +static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { + // + // Note: not implemented using C's realloc(), because the memory layout is + // Struct-of-Arrays vs. Array-of-Structs: + // https://github.com/RoaringBitmap/CRoaring/issues/256 + + if (new_capacity == 0) { + roaring_free(ra->containers); + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; + ra->allocation_size = 0; + return true; + } + const size_t memoryneeded = + new_capacity * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + void *bigalloc = roaring_malloc(memoryneeded); + if (!bigalloc) return false; + void *oldbigalloc = ra->containers; + container_t **newcontainers = (container_t **)bigalloc; + uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); + uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); + assert((char *)(newtypecodes + new_capacity) == + (char *)bigalloc + memoryneeded); + if (ra->size > 0) { + memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); + memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); + memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); } + ra->containers = newcontainers; + ra->keys = newkeys; + ra->typecodes = newtypecodes; + ra->allocation_size = new_capacity; + roaring_free(oldbigalloc); + return true; +} - // Copy the containers after max unchanged. - it = art_upper_bound((art_t *)&r1->art, max_high48_key); - while (it.value != NULL) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); - container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); - art_iterator_next(&it); +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { + if (!new_ra) return false; + ra_init(new_ra); + + // Containers hold 64Ki elements, so 64Ki containers is enough to hold + // `0x10000 * 0x10000` (all 2^32) elements + if (cap > 0x10000) { + cap = 0x10000; } - return r2; + if (cap > 0) { + void *bigalloc = roaring_malloc( + cap * (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); + if (bigalloc == NULL) return false; + new_ra->containers = (container_t **)bigalloc; + new_ra->keys = (uint16_t *)(new_ra->containers + cap); + new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); + // Narrowing is safe because of above check + new_ra->allocation_size = (int32_t)cap; + } + return true; } -void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min >= max) { - return; +int ra_shrink_to_fit(roaring_array_t *ra) { + int savings = (ra->allocation_size - ra->size) * + (sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); + if (!realloc_array(ra, ra->size)) { + return 0; } - roaring64_bitmap_flip_closed_inplace(r, min, max - 1); + ra->allocation_size = ra->size; + return savings; } -void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, - uint64_t max) { - if (min > max) { +void ra_init(roaring_array_t *new_ra) { + if (!new_ra) { return; } - uint16_t min_low16 = (uint16_t)min; - uint16_t max_low16 = (uint16_t)max; - uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; - uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; + new_ra->keys = NULL; + new_ra->containers = NULL; + new_ra->typecodes = NULL; - // Flip the range (including non-existent containers!) between min and - // max. - for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; - high48_bits++) { - uint8_t current_high48_key[ART_KEY_BYTES]; - split_key(high48_bits << 16, current_high48_key); + new_ra->allocation_size = 0; + new_ra->size = 0; + new_ra->flags = 0; +} - uint32_t min_container = 0; - if (high48_bits == min_high48_bits) { - min_container = min_low16; - } - uint32_t max_container = 0xFFFF + 1; // Exclusive range. - if (high48_bits == max_high48_bits) { - max_container = max_low16 + 1; // Exclusive. +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write) { + ra_clear_containers(dest); // we are going to overwrite them + if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size + dest->size = 0; // <--- This is important. + return true; // output was just cleared, so they match + } + if (dest->allocation_size < source->size) { + if (!realloc_array(dest, source->size)) { + return false; } - - roaring64_flip_leaf_inplace(r, current_high48_key, min_container, - max_container); } -} - -// Returns the number of distinct high 32-bit entries in the bitmap. -static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint64_t high32_count = 0; - uint32_t prev_high32 = 0; - while (it.value != NULL) { - uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); - if (high32_count == 0 || prev_high32 != current_high32) { - high32_count++; - prev_high32 = current_high32; + dest->size = source->size; + memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); + // we go through the containers, turning them into shared containers... + if (copy_on_write) { + for (int32_t i = 0; i < dest->size; ++i) { + source->containers[i] = get_copy_of_container( + source->containers[i], &source->typecodes[i], copy_on_write); + } + // we do a shallow copy to the other bitmap + memcpy(dest->containers, source->containers, + dest->size * sizeof(container_t *)); + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } else { + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + for (int32_t i = 0; i < dest->size; i++) { + dest->containers[i] = + container_clone(source->containers[i], source->typecodes[i]); + if (dest->containers[i] == NULL) { + for (int32_t j = 0; j < i; j++) { + container_free(dest->containers[j], dest->typecodes[j]); + } + ra_clear_without_containers(dest); + return false; + } } - art_iterator_next(&it); } - return high32_count; + return true; } -// Frees the (32-bit!) bitmap without freeing the containers. -static inline void roaring_bitmap_free_without_containers(roaring_bitmap_t *r) { - ra_clear_without_containers(&r->high_low_container); - roaring_free(r); +void ra_clear_containers(roaring_array_t *ra) { + for (int32_t i = 0; i < ra->size; ++i) { + container_free(ra->containers[i], ra->typecodes[i]); + } } -size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - size_t size = 0; - - // Write as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t high32_count; - size += sizeof(high32_count); - - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint32_t prev_high32 = 0; - roaring_bitmap_t *bitmap32 = NULL; - - // Iterate through buckets ordered by increasing keys. - while (it.value != NULL) { - uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); - if (bitmap32 == NULL || prev_high32 != current_high32) { - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. - size += sizeof(prev_high32); +void ra_reset(roaring_array_t *ra) { + ra_clear_containers(ra); + ra->size = 0; + ra_shrink_to_fit(ra); +} - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size += roaring_bitmap_portable_size_in_bytes(bitmap32); - roaring_bitmap_free_without_containers(bitmap32); - } +void ra_clear_without_containers(roaring_array_t *ra) { + roaring_free( + ra->containers); // keys and typecodes are allocated with containers + ra->size = 0; + ra->allocation_size = 0; + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; +} - // Start a new 32-bit bitmap with the current high 32 bits. - art_iterator_t it2 = it; - uint32_t containers_with_high32 = 0; - while (it2.value != NULL && (uint32_t)(combine_key(it2.key, 0) >> - 32) == current_high32) { - containers_with_high32++; - art_iterator_next(&it2); - } - bitmap32 = - roaring_bitmap_create_with_capacity(containers_with_high32); +void ra_clear(roaring_array_t *ra) { + ra_clear_containers(ra); + ra_clear_without_containers(ra); +} - prev_high32 = current_high32; +bool extend_array(roaring_array_t *ra, int32_t k) { + int32_t desired_size = ra->size + k; + const int32_t max_containers = 65536; + assert(desired_size <= max_containers); + if (desired_size > ra->allocation_size) { + int32_t new_capacity = + (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + if (new_capacity > max_containers) { + new_capacity = max_containers; } - leaf_t leaf = (leaf_t)*it.value; - ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), get_container(r, leaf), - get_typecode(leaf)); - art_iterator_next(&it); - } - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - size += sizeof(prev_high32); - - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size += roaring_bitmap_portable_size_in_bytes(bitmap32); - roaring_bitmap_free_without_containers(bitmap32); + return realloc_array(ra, new_capacity); } - - return size; + return true; } -size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, - char *buf) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { - return 0; - } - const char *initial_buf = buf; - - // Write as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t high32_count = count_high32(r); - memcpy(buf, &high32_count, sizeof(high32_count)); - buf += sizeof(high32_count); - - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - uint32_t prev_high32 = 0; - roaring_bitmap_t *bitmap32 = NULL; - - // Iterate through buckets ordered by increasing keys. - while (it.value != NULL) { - uint64_t current_high48 = combine_key(it.key, 0); - uint32_t current_high32 = (uint32_t)(current_high48 >> 32); - if (bitmap32 == NULL || prev_high32 != current_high32) { - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. - memcpy(buf, &prev_high32, sizeof(prev_high32)); - buf += sizeof(prev_high32); +void ra_append(roaring_array_t *ra, uint16_t key, container_t *c, + uint8_t typecode) { + extend_array(ra, 1); + const int32_t pos = ra->size; - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - buf += roaring_bitmap_portable_serialize(bitmap32, buf); - roaring_bitmap_free_without_containers(bitmap32); - } + ra->keys[pos] = key; + ra->containers[pos] = c; + ra->typecodes[pos] = typecode; + ra->size++; +} - // Start a new 32-bit bitmap with the current high 32 bits. - art_iterator_t it2 = it; - uint32_t containers_with_high32 = 0; - while (it2.value != NULL && - (uint32_t)combine_key(it2.key, 0) == current_high32) { - containers_with_high32++; - art_iterator_next(&it2); - } - bitmap32 = - roaring_bitmap_create_with_capacity(containers_with_high32); +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t index, bool copy_on_write) { + extend_array(ra, 1); + const int32_t pos = ra->size; - prev_high32 = current_high32; - } - leaf_t leaf = (leaf_t)*it.value; - ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), get_container(r, leaf), - get_typecode(leaf)); - art_iterator_next(&it); + // old contents is junk that does not need freeing + ra->keys[pos] = sa->keys[index]; + // the shared container will be in two bitmaps + if (copy_on_write) { + sa->containers[index] = get_copy_of_container( + sa->containers[index], &sa->typecodes[index], copy_on_write); + ra->containers[pos] = sa->containers[index]; + ra->typecodes[pos] = sa->typecodes[index]; + } else { + ra->containers[pos] = + container_clone(sa->containers[index], sa->typecodes[index]); + ra->typecodes[pos] = sa->typecodes[index]; } + ra->size++; +} - if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. - memcpy(buf, &prev_high32, sizeof(prev_high32)); - buf += sizeof(prev_high32); - - // Write the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - buf += roaring_bitmap_portable_serialize(bitmap32, buf); - roaring_bitmap_free_without_containers(bitmap32); +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t stopping_key, bool copy_on_write) { + for (int32_t i = 0; i < sa->size; ++i) { + if (sa->keys[i] >= stopping_key) break; + ra_append_copy(ra, sa, (uint16_t)i, copy_on_write); } - - return buf - initial_buf; } -size_t roaring64_bitmap_portable_deserialize_size(const char *buf, - size_t maxbytes) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { - return 0; +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; } - size_t read_bytes = 0; +} - // Read as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t buckets; - if (read_bytes + sizeof(buckets) > maxbytes) { - return 0; - } - memcpy(&buckets, buf, sizeof(buckets)); - buf += sizeof(buckets); - read_bytes += sizeof(buckets); +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t before_start, bool copy_on_write) { + int start_location = ra_get_index(sa, before_start); + if (start_location >= 0) + ++start_location; + else + start_location = -start_location - 1; + ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); +} - // Buckets should be 32 bits with 4 bits of zero padding. - if (buckets > UINT32_MAX) { - return 0; - } +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index) { + extend_array(ra, end_index - start_index); - // Iterate through buckets ordered by increasing keys. - for (uint64_t bucket = 0; bucket < buckets; ++bucket) { - // Read as uint32 the most significant 32 bits of the bucket. - uint32_t high32; - if (read_bytes + sizeof(high32) > maxbytes) { - return 0; - } - buf += sizeof(high32); - read_bytes += sizeof(high32); + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( - buf, maxbytes - read_bytes); - if (bitmap32_size == 0) { - return 0; - } - buf += bitmap32_size; - read_bytes += bitmap32_size; + ra->keys[pos] = sa->keys[i]; + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + ra->size++; } - return read_bytes; } -roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( - const char *buf, size_t maxbytes) { - // https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations - if (buf == NULL) { - return NULL; - } - size_t read_bytes = 0; +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); - // Read as uint64 the distinct number of "buckets", where a bucket is - // defined as the most significant 32 bits of an element. - uint64_t buckets; - if (read_bytes + sizeof(buckets) > maxbytes) { - return NULL; + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; } - memcpy(&buckets, buf, sizeof(buckets)); - buf += sizeof(buckets); - read_bytes += sizeof(buckets); +} - // Buckets should be 32 bits with 4 bits of zero padding. - if (buckets > UINT32_MAX) { - return NULL; - } +container_t *ra_get_container(roaring_array_t *ra, uint16_t x, + uint8_t *typecode) { + int i = binarySearch(ra->keys, (int32_t)ra->size, x); + if (i < 0) return NULL; + *typecode = ra->typecodes[i]; + return ra->containers[i]; +} - roaring64_bitmap_t *r = roaring64_bitmap_create(); - // Iterate through buckets ordered by increasing keys. - int64_t previous_high32 = -1; - for (uint64_t bucket = 0; bucket < buckets; ++bucket) { - // Read as uint32 the most significant 32 bits of the bucket. - uint32_t high32; - if (read_bytes + sizeof(high32) > maxbytes) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&high32, buf, sizeof(high32)); - buf += sizeof(high32); - read_bytes += sizeof(high32); - // High 32 bits must be strictly increasing. - if (high32 <= previous_high32) { - roaring64_bitmap_free(r); - return NULL; - } - previous_high32 = high32; +extern inline container_t *ra_get_container_at_index(const roaring_array_t *ra, + uint16_t i, + uint8_t *typecode); - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. - size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( - buf, maxbytes - read_bytes); - if (bitmap32_size == 0) { - roaring64_bitmap_free(r); - return NULL; - } +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, + uint16_t i); - roaring_bitmap_t *bitmap32 = roaring_bitmap_portable_deserialize_safe( - buf, maxbytes - read_bytes); - if (bitmap32 == NULL) { - roaring64_bitmap_free(r); - return NULL; - } - buf += bitmap32_size; - read_bytes += bitmap32_size; +extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); - // While we don't attempt to validate much, we must ensure that there - // is no duplication in the high 48 bits - inserting into the ART - // assumes (or UB) no duplicate keys. The top 32 bits must be unique - // because we check for strict increasing values of high32, but we - // must also ensure the top 16 bits within each 32-bit bitmap are also - // at least unique (we ensure they're strictly increasing as well, - // which they must be for a _valid_ bitmap, since it's cheaper to check) - int32_t last_bitmap_key = -1; - for (int i = 0; i < bitmap32->high_low_container.size; i++) { - uint16_t key = bitmap32->high_low_container.keys[i]; - if (key <= last_bitmap_key) { - roaring_bitmap_free(bitmap32); - roaring64_bitmap_free(r); - return NULL; - } - last_bitmap_key = key; - } +extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, + int32_t pos); - // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. - move_from_roaring32_offset(r, bitmap32, high32); - roaring_bitmap_free(bitmap32); +// everything skipped over is freed +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { + while (pos < ra->size && ra->keys[pos] < x) { + container_free(ra->containers[pos], ra->typecodes[pos]); + ++pos; } - return r; + return pos; } -// Returns an "element count" for the given container. This has a different -// meaning for each container type, but the purpose is the minimal information -// required to serialize the container metadata. -static inline uint32_t container_get_element_count(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return ((bitset_container_t *)c)->cardinality; - } - case ARRAY_CONTAINER_TYPE: { - return ((array_container_t *)c)->cardinality; - } - case RUN_CONTAINER_TYPE: { - return ((run_container_t *)c)->n_runs; - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } +void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, + container_t *c, uint8_t typecode) { + extend_array(ra, 1); + // May be an optimization opportunity with DIY memmove + memmove(&(ra->keys[i + 1]), &(ra->keys[i]), + sizeof(uint16_t) * (ra->size - i)); + memmove(&(ra->containers[i + 1]), &(ra->containers[i]), + sizeof(container_t *) * (ra->size - i)); + memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), + sizeof(uint8_t) * (ra->size - i)); + ra->keys[i] = key; + ra->containers[i] = c; + ra->typecodes[i] = typecode; + ra->size++; } -static inline size_t container_get_frozen_size(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - } - case ARRAY_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(uint16_t); - } - case RUN_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(rle16_t); - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } -} +// note: Java routine set things to 0, enabling GC. +// Java called it "resize" but it was always used to downsize. +// Allowing upsize would break the conventions about +// valid containers below ra->size. -uint64_t align_size(uint64_t size, uint64_t alignment) { - return (size + alignment - 1) & ~(alignment - 1); +void ra_downsize(roaring_array_t *ra, int32_t new_length) { + assert(new_length <= ra->size); + ra->size = new_length; } -size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { - if (!is_shrunken(r)) { - return 0; - } - // Flags. - uint64_t size = sizeof(r->flags); - // Container count. - size += sizeof(r->capacity); - // Container element counts. - size += r->capacity * sizeof(uint16_t); - // Total container sizes. - size += 3 * sizeof(uint64_t); - // ART (8 byte aligned). - size = align_size(size, 8); - size += art_size_in_bytes(&r->art); - - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - total_sizes[typecode] += - container_get_frozen_size(get_container(r, leaf), typecode); - art_iterator_next(&it); - } - // Containers (aligned). - size = align_size(size, CROARING_BITSET_ALIGNMENT); - size += total_sizes[BITSET_CONTAINER_TYPE]; - size = align_size(size, alignof(rle16_t)); - size += total_sizes[ARRAY_CONTAINER_TYPE]; - size = align_size(size, alignof(uint16_t)); - size += total_sizes[RUN_CONTAINER_TYPE]; - // Padding to make overall size a multiple of required alignment. - size = align_size(size, CROARING_BITSET_ALIGNMENT); - return size; +void ra_remove_at_index(roaring_array_t *ra, int32_t i) { + memmove(&(ra->containers[i]), &(ra->containers[i + 1]), + sizeof(container_t *) * (ra->size - i - 1)); + memmove(&(ra->keys[i]), &(ra->keys[i + 1]), + sizeof(uint16_t) * (ra->size - i - 1)); + memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), + sizeof(uint8_t) * (ra->size - i - 1)); + ra->size--; } -static inline void container_frozen_serialize(const container_t *container, - uint8_t typecode, - uint64_t **bitsets, - uint16_t **arrays, - rle16_t **runs) { - size_t size = container_get_frozen_size(container, typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *)container; - memcpy(*bitsets, bitset->words, size); - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *)container; - memcpy(*arrays, array->array, size); - *arrays += container_get_element_count(container, typecode); - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *)container; - memcpy(*runs, run->runs, size); - *runs += container_get_element_count(container, typecode); - break; - } - default: { - assert(false); - roaring_unreachable; - } - } +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { + container_free(ra->containers[i], ra->typecodes[i]); + ra_remove_at_index(ra, i); } -static inline char *pad_align(char *buf, const char *initial_buf, - size_t alignment) { - uint64_t buf_size = buf - initial_buf; - uint64_t pad = align_size(buf_size, alignment) - buf_size; - memset(buf, 0, pad); - return buf + pad; +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. In use it should be followed by a call to +// downsize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, + uint32_t new_begin) { + assert(begin <= end); + assert(new_begin < begin); + + const int range = end - begin; + + // We ensure to previously have freed overwritten containers + // that are not copied elsewhere + + memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), + sizeof(container_t *) * range); + memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), + sizeof(uint16_t) * range); + memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), + sizeof(uint8_t) * range); } -size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, - char *buf) { - if (buf == NULL) { - return 0; +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { + if (distance > 0) { + extend_array(ra, distance); } - if (!is_shrunken(r)) { - return 0; + int32_t srcpos = ra->size - count; + int32_t dstpos = srcpos + distance; + memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), sizeof(uint16_t) * count); + memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), + sizeof(container_t *) * count); + memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), + sizeof(uint8_t) * count); + ra->size += distance; +} + +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { + size_t ctr = 0; + for (int32_t i = 0; i < ra->size; ++i) { + int num_added = container_to_uint32_array( + ans + ctr, ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + ctr += num_added; } - const char *initial_buf = buf; +} - // Flags. - memcpy(buf, &r->flags, sizeof(r->flags)); - buf += sizeof(r->flags); +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, + size_t limit, uint32_t *ans) { + size_t ctr = 0; + size_t dtr = 0; - // Container count. - memcpy(buf, &r->capacity, sizeof(r->capacity)); - buf += sizeof(r->capacity); + size_t t_limit = 0; - // Container element counts. - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); + bool first = false; + size_t first_skip = 0; - uint32_t elem_count = container_get_element_count(container, typecode); - uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); - memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); - buf += sizeof(compressed_elem_count); + uint32_t *t_ans = NULL; + size_t cur_len = 0; - total_sizes[typecode] += container_get_frozen_size(container, typecode); - art_iterator_next(&it); + for (int i = 0; i < ra->size; ++i) { + const container_t *c = + container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + t_limit = (const_CAST_bitset(c))->cardinality; + break; + case ARRAY_CONTAINER_TYPE: + t_limit = (const_CAST_array(c))->cardinality; + break; + case RUN_CONTAINER_TYPE: + t_limit = run_container_cardinality(const_CAST_run(c)); + break; + } + if (ctr + t_limit - 1 >= offset && ctr < offset + limit) { + if (!first) { + // first_skip = t_limit - (ctr + t_limit - offset); + first_skip = offset - ctr; + first = true; + t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * + (first_skip + limit)); + if (t_ans == NULL) { + return false; + } + memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)); + cur_len = first_skip + limit; + } + if (dtr + t_limit > cur_len) { + uint32_t *append_ans = (uint32_t *)roaring_malloc( + sizeof(*append_ans) * (cur_len + t_limit)); + if (append_ans == NULL) { + if (t_ans != NULL) roaring_free(t_ans); + return false; + } + memset(append_ans, 0, + sizeof(*append_ans) * (cur_len + t_limit)); + cur_len = cur_len + t_limit; + memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); + roaring_free(t_ans); + t_ans = append_ans; + } + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_bitset(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case ARRAY_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_array(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case RUN_CONTAINER_TYPE: + container_to_uint32_array(t_ans + dtr, const_CAST_run(c), + ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + } + dtr += t_limit; + } + ctr += t_limit; + if (dtr - first_skip >= limit) break; } + if (t_ans != NULL) { + memcpy(ans, t_ans + first_skip, limit * sizeof(uint32_t)); + free(t_ans); + } + return true; +} - // Total container sizes. - memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - - // ART. - buf = pad_align(buf, initial_buf, 8); - buf += art_serialize(&r->art, buf); - - // Containers (aligned). - // Runs before arrays as run elements are larger than array elements and - // smaller than bitset elements. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - uint64_t *bitsets = (uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(rle16_t)); - rle16_t *runs = (rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(uint16_t)); - uint16_t *arrays = (uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; +bool ra_has_run_container(const roaring_array_t *ra) { + for (int32_t k = 0; k < ra->size; ++k) { + if (get_container_type(ra->containers[k], ra->typecodes[k]) == + RUN_CONTAINER_TYPE) + return true; + } + return false; +} - it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - container_frozen_serialize(container, typecode, &bitsets, &arrays, - &runs); - art_iterator_next(&it); +uint32_t ra_portable_header_size(const roaring_array_t *ra) { + if (ra_has_run_container(ra)) { + if (ra->size < + NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets + return 4 + (ra->size + 7) / 8 + 4 * ra->size; + } + return 4 + (ra->size + 7) / 8 + + 8 * ra->size; // - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * ra->size; } +} - // Padding to make overall size a multiple of required alignment. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); +size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { + size_t count = ra_portable_header_size(ra); - return buf - initial_buf; + for (int32_t k = 0; k < ra->size; ++k) { + count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + return count; } -static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, - const uint64_t **bitsets, - const uint16_t **arrays, - const rle16_t **runs) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *c = (bitset_container_t *)roaring_malloc( - sizeof(bitset_container_t)); - c->cardinality = elem_count; - c->words = (uint64_t *)*bitsets; - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - return (container_t *)c; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *c = - (array_container_t *)roaring_malloc(sizeof(array_container_t)); - c->cardinality = elem_count; - c->capacity = elem_count; - c->array = (uint16_t *)*arrays; - *arrays += elem_count; - return (container_t *)c; - } - case RUN_CONTAINER_TYPE: { - run_container_t *c = - (run_container_t *)roaring_malloc(sizeof(run_container_t)); - c->n_runs = elem_count; - c->capacity = elem_count; - c->runs = (rle16_t *)*runs; - *runs += elem_count; - return (container_t *)c; +// This function is endian-sensitive. +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { + char *initbuf = buf; + uint32_t startOffset = 0; + bool hasrun = ra_has_run_container(ra); + if (hasrun) { + uint32_t cookie = SERIAL_COOKIE | ((uint32_t)(ra->size - 1) << 16); + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + uint32_t s = (ra->size + 7) / 8; + uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); + assert(bitmapOfRunContainers != NULL); // todo: handle + for (int32_t i = 0; i < ra->size; ++i) { + if (get_container_type(ra->containers[i], ra->typecodes[i]) == + RUN_CONTAINER_TYPE) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); + } } - default: { - assert(false); - roaring_unreachable; - return NULL; + memcpy(buf, bitmapOfRunContainers, s); + buf += s; + roaring_free(bitmapOfRunContainers); + if (ra->size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * ra->size + s; + } else { + startOffset = 4 + 8 * ra->size + s; } - } -} + } else { // backwards compatibility + uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; -roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, - size_t maxbytes) { - if (buf == NULL) { - return NULL; + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + memcpy(buf, &ra->size, sizeof(ra->size)); + buf += sizeof(ra->size); + + startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; } - if ((uintptr_t)buf % CROARING_BITSET_ALIGNMENT != 0) { - return NULL; + for (int32_t k = 0; k < ra->size; ++k) { + memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); + buf += sizeof(ra->keys[k]); + // get_cardinality returns a value in [1,1<<16], subtracting one + // we get [0,1<<16 - 1] which fits in 16 bits + uint16_t card = (uint16_t)(container_get_cardinality(ra->containers[k], + ra->typecodes[k]) - + 1); + memcpy(buf, &card, sizeof(card)); + buf += sizeof(card); } - - roaring64_bitmap_t *r = roaring64_bitmap_create(); - - // Flags. - if (maxbytes < sizeof(r->flags)) { - roaring64_bitmap_free(r); - return NULL; + if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { + // writing the containers offsets + for (int32_t k = 0; k < ra->size; k++) { + memcpy(buf, &startOffset, sizeof(startOffset)); + buf += sizeof(startOffset); + startOffset = + startOffset + + container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } } - memcpy(&r->flags, buf, sizeof(r->flags)); - buf += sizeof(r->flags); - maxbytes -= sizeof(r->flags); - r->flags |= ROARING_FLAG_FROZEN; - - // Container count. - if (maxbytes < sizeof(r->capacity)) { - roaring64_bitmap_free(r); - return NULL; + for (int32_t k = 0; k < ra->size; ++k) { + buf += container_write(ra->containers[k], ra->typecodes[k], buf); } - memcpy(&r->capacity, buf, sizeof(r->capacity)); - buf += sizeof(r->capacity); - maxbytes -= sizeof(r->capacity); - - r->containers = - (container_t **)roaring_malloc(r->capacity * sizeof(container_t *)); + return buf - initbuf; +} - // Container element counts. - if (maxbytes < r->capacity * sizeof(uint16_t)) { - roaring64_bitmap_free(r); - return NULL; +// Quickly checks whether there is a serialized bitmap at the pointer, +// not exceeding size "maxbytes" in bytes. This function does not allocate +// memory dynamically. +// +// This function returns 0 if and only if no valid bitmap is found. +// Otherwise, it returns how many bytes are occupied. +// +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { + size_t bytestotal = sizeof(int32_t); // for cookie + if (bytestotal > maxbytes) return 0; + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + return 0; } - const char *elem_counts = buf; - buf += r->capacity * sizeof(uint16_t); - maxbytes -= r->capacity * sizeof(uint16_t); + int32_t size; - // Total container sizes. - uint64_t total_sizes[4]; - if (maxbytes < sizeof(uint64_t) * 3) { - roaring64_bitmap_free(r); - return NULL; + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + bytestotal += sizeof(int32_t); + if (bytestotal > maxbytes) return 0; + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); } - memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - - // ART (8 byte aligned). - buf = CROARING_ALIGN_BUF(buf, 8); - size_t art_size = art_frozen_view(buf, maxbytes, &r->art); - if (art_size == 0) { - roaring64_bitmap_free(r); - return NULL; + if (size > (1 << 16) || size < 0) { + return 0; } - buf += art_size; - maxbytes -= art_size; - - // Containers (aligned). - const char *before_containers = buf; - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); - const uint64_t *bitsets = (const uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); - const rle16_t *runs = (const rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); - const uint16_t *arrays = (const uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; - if (maxbytes < (uint64_t)(buf - before_containers)) { - roaring64_bitmap_free(r); - return NULL; + char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + bytestotal += s; + if (bytestotal > maxbytes) return 0; + bitmapOfRunContainers = (char *)buf; + buf += s; } - maxbytes -= buf - before_containers; + bytestotal += size * 2 * sizeof(uint16_t); + if (bytestotal > maxbytes) return 0; + uint16_t *keyscards = (uint16_t *)buf; + buf += size * 2 * sizeof(uint16_t); + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + bytestotal += size * 4; + if (bytestotal > maxbytes) return 0; + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + size_t containersize = + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; + } else if (isrun) { + bytestotal += sizeof(uint16_t); + if (bytestotal > maxbytes) return 0; + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + buf += sizeof(uint16_t); + size_t containersize = n_runs * sizeof(rle16_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; + } else { + size_t containersize = thiscard * sizeof(uint16_t); + bytestotal += containersize; + if (bytestotal > maxbytes) return 0; + buf += containersize; + } + } + return bytestotal; +} - // Deserialize in ART iteration order. - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - for (size_t i = 0; it.value != NULL; ++i) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); +// This function populates answer from the content of buf (reading up to +// maxbytes bytes). The function returns false if a properly serialized bitmap +// cannot be found. If it returns true, readbytes is populated by how many bytes +// were read, we have that *readbytes <= maxbytes. +// +// This function is endian-sensitive. +bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, + const size_t maxbytes, size_t *readbytes) { + *readbytes = sizeof(int32_t); // for cookie + if (*readbytes > maxbytes) { + // Ran out of bytes while reading first 4 bytes. + return false; + } + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + // "I failed to find one of the right cookies. + return false; + } + int32_t size; - uint16_t compressed_elem_count; - memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), - sizeof(compressed_elem_count)); - uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + *readbytes += sizeof(int32_t); + if (*readbytes > maxbytes) { + // Ran out of bytes while reading second part of the cookie. + return false; + } + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + } + if (size < 0) { + // You cannot have a negative number of containers, the data must be + // corrupted. + return false; + } + if (size > (1 << 16)) { + // You cannot have so many containers, the data must be corrupted. + return false; + } + const char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + *readbytes += s; + if (*readbytes > maxbytes) { // data is corrupted? + // Ran out of bytes while reading run bitmap. + return false; + } + bitmapOfRunContainers = buf; + buf += s; + } + uint16_t *keyscards = (uint16_t *)buf; - // The container index is unrelated to the iteration order. - uint64_t index = get_index(leaf); - r->containers[index] = container_frozen_view(typecode, elem_count, - &bitsets, &arrays, &runs); + *readbytes += size * 2 * sizeof(uint16_t); + if (*readbytes > maxbytes) { + // Ran out of bytes while reading key-cardinality array. + return false; + } + buf += size * 2 * sizeof(uint16_t); - art_iterator_next(&it); + bool is_ok = ra_init_with_capacity(answer, size); + if (!is_ok) { + // Failed to allocate memory for roaring array. Bailing out. + return false; } - // Padding to make overall size a multiple of required alignment. - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k, sizeof(tmp)); + answer->keys[k] = tmp; + } + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + *readbytes += size * 4; + if (*readbytes > maxbytes) { // data is corrupted? + // Ran out of bytes while reading offsets. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } - return r; -} + // skipping the offsets + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2 * k + 1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if (hasrun) { + if ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + // we check that the read is allowed + size_t containersize = + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { + // Running out of bytes while reading a bitset container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + // it is now safe to read + bitset_container_t *c = bitset_container_create(); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for a bitset container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + answer->size++; + buf += bitset_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = BITSET_CONTAINER_TYPE; + } else if (isrun) { + // we check that the read is allowed + *readbytes += sizeof(uint16_t); + if (*readbytes > maxbytes) { + // Running out of bytes while reading a run container (header). + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + size_t containersize = n_runs * sizeof(rle16_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { // data is corrupted? + // Running out of bytes while reading a run container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + // it is now safe to read -bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, - roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - uint64_t high48 = combine_key(it.key, 0); - uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; - uint32_t low32 = high48; - leaf_t leaf = (leaf_t)*it.value; - if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), - low32, iterator, high32, ptr)) { - return false; + run_container_t *c = run_container_create(); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for a run container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + answer->size++; + buf += run_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = RUN_CONTAINER_TYPE; + } else { + // we check that the read is allowed + size_t containersize = thiscard * sizeof(uint16_t); + *readbytes += containersize; + if (*readbytes > maxbytes) { // data is corrupted? + // Running out of bytes while reading an array container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + // it is now safe to read + array_container_t *c = + array_container_create_given_capacity(thiscard); + if (c == NULL) { // memory allocation failure + // Failed to allocate memory for an array container. + ra_clear(answer); // we need to clear the containers already + // allocated, and the roaring array + return false; + } + answer->size++; + buf += array_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = ARRAY_CONTAINER_TYPE; } - art_iterator_next(&it); } return true; } -void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, - uint64_t *out) { - roaring64_iterator_t it; // gets initialized in the next line - roaring64_iterator_init_at(r, &it, /*first=*/true); - roaring64_iterator_read(&it, out, UINT64_MAX); +#ifdef __cplusplus } - -roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) { - roaring64_iterator_t *it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - return roaring64_iterator_init_at(r, it, /*first=*/true); } +} // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/roaring_array.c */ +/* begin file src/roaring_priority_queue.c */ -roaring64_iterator_t *roaring64_iterator_create_last( - const roaring64_bitmap_t *r) { - roaring64_iterator_t *it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - return roaring64_iterator_init_at(r, it, /*first=*/false); -} +#ifdef __cplusplus +using namespace ::roaring::internal; -void roaring64_iterator_reinit(const roaring64_bitmap_t *r, - roaring64_iterator_t *it) { - roaring64_iterator_init_at(r, it, /*first=*/true); -} +extern "C" { +namespace roaring { +namespace api { +#endif -void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r, - roaring64_iterator_t *it) { - roaring64_iterator_init_at(r, it, /*first=*/false); +struct roaring_pq_element_s { + uint64_t size; + bool is_temporary; + roaring_bitmap_t *bitmap; +}; + +typedef struct roaring_pq_element_s roaring_pq_element_t; + +struct roaring_pq_s { + roaring_pq_element_t *elements; + uint64_t size; +}; + +typedef struct roaring_pq_s roaring_pq_t; + +static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { + return t1->size < t2->size; } -roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it) { - roaring64_iterator_t *new_it = - (roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t)); - memcpy(new_it, it, sizeof(*it)); - return new_it; +static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { + uint64_t i = pq->size; + pq->elements[pq->size++] = *t; + while (i > 0) { + uint64_t p = (i - 1) >> 1; + roaring_pq_element_t ap = pq->elements[p]; + if (!compare(t, &ap)) break; + pq->elements[i] = ap; + i = p; + } + pq->elements[i] = *t; } -void roaring64_iterator_free(roaring64_iterator_t *it) { roaring_free(it); } +static void pq_free(roaring_pq_t *pq) { roaring_free(pq); } -bool roaring64_iterator_has_value(const roaring64_iterator_t *it) { - return it->has_value; +static void percolate_down(roaring_pq_t *pq, uint32_t i) { + uint32_t size = (uint32_t)pq->size; + uint32_t hsize = size >> 1; + roaring_pq_element_t ai = pq->elements[i]; + while (i < hsize) { + uint32_t l = (i << 1) + 1; + uint32_t r = l + 1; + roaring_pq_element_t bestc = pq->elements[l]; + if (r < size) { + if (compare(pq->elements + r, &bestc)) { + l = r; + bestc = pq->elements[r]; + } + } + if (!compare(&bestc, &ai)) { + break; + } + pq->elements[i] = bestc; + i = l; + } + pq->elements[i] = ai; +} + +static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { + size_t alloc_size = + sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; + roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); + answer->elements = (roaring_pq_element_t *)(answer + 1); + answer->size = length; + for (uint32_t i = 0; i < length; i++) { + answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; + answer->elements[i].is_temporary = false; + answer->elements[i].size = + roaring_bitmap_portable_size_in_bytes(arr[i]); + } + for (int32_t i = (length >> 1); i >= 0; i--) { + percolate_down(answer, i); + } + return answer; } -uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) { - return it->value; +static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { + roaring_pq_element_t ans = *pq->elements; + if (pq->size > 1) { + pq->elements[0] = pq->elements[--pq->size]; + percolate_down(pq, 0); + } else + --pq->size; + // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; + return ans; } -bool roaring64_iterator_advance(roaring64_iterator_t *it) { - if (it->art_it.value == NULL) { - if (it->saturated_forward) { - return (it->has_value = false); - } - roaring64_iterator_init_at(it->r, it, /*first=*/true); - return it->has_value; - } - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), - &it->container_it, &low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); +// this function consumes and frees the inputs +static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, + roaring_bitmap_t *x2) { + uint8_t result_type = 0; + const int length1 = ra_get_size(&x1->high_low_container), + length2 = ra_get_size(&x2->high_low_container); + if (0 == length1) { + roaring_bitmap_free(x1); + return x2; } - if (art_iterator_next(&it->art_it)) { - return roaring64_iterator_init_at_leaf_first(it); + if (0 == length2) { + roaring_bitmap_free(x2); + return x1; } - it->saturated_forward = true; - return (it->has_value = false); -} + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + int pos1 = 0, pos2 = 0; + uint8_t type1, type2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + while (true) { + if (s1 == s2) { + // todo: unsharing can be inefficient as it may create a clone where + // none + // is needed, but it has the benefit of being easy to reason about. -bool roaring64_iterator_previous(roaring64_iterator_t *it) { - if (it->art_it.value == NULL) { - if (!it->saturated_forward) { - // Saturated backward. - return (it->has_value = false); + ra_unshare_container_at_index(&x1->high_low_container, + (uint16_t)pos1); + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + assert(type1 != SHARED_CONTAINER_TYPE); + + ra_unshare_container_at_index(&x2->high_low_container, + (uint16_t)pos2); + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + assert(type2 != SHARED_CONTAINER_TYPE); + + container_t *c; + + if ((type2 == BITSET_CONTAINER_TYPE) && + (type1 != BITSET_CONTAINER_TYPE)) { + c = container_lazy_ior(c2, type2, c1, type1, &result_type); + container_free(c1, type1); + if (c != c2) { + container_free(c2, type2); + } + } else { + c = container_lazy_ior(c1, type1, c2, type2, &result_type); + container_free(c2, type2); + if (c != c1) { + container_free(c1, type1); + } + } + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); + + } else if (s1 < s2) { // s1 < s2 + container_t *c1 = ra_get_container_at_index(&x1->high_low_container, + (uint16_t)pos1, &type1); + ra_append(&answer->high_low_container, s1, c1, type1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1); + + } else { // s1 > s2 + container_t *c2 = ra_get_container_at_index(&x2->high_low_container, + (uint16_t)pos2, &type2); + ra_append(&answer->high_low_container, s2, c2, type2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2); } - roaring64_iterator_init_at(it->r, it, /*first=*/false); - return it->has_value; - } - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), - &it->container_it, &low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); } - if (art_iterator_prev(&it->art_it)) { - return roaring64_iterator_init_at_leaf_last(it); + if (pos1 == length1) { + ra_append_move_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2); + } else if (pos2 == length2) { + ra_append_move_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1); } - it->saturated_forward = false; // Saturated backward. - return (it->has_value = false); + ra_clear_without_containers(&x1->high_low_container); + ra_clear_without_containers(&x2->high_low_container); + roaring_free(x1); + roaring_free(x2); + return answer; } -bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, - uint64_t val) { - uint8_t val_high48[ART_KEY_BYTES]; - uint16_t val_low16 = split_key(val, val_high48); - if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { - // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a - // key equal or greater. - if (!art_iterator_lower_bound(&it->art_it, val_high48)) { - // Only smaller keys found. - it->saturated_forward = true; - return (it->has_value = false); - } - it->high48 = combine_key(it->art_it.key, 0); - // Fall through to the next if statement. +/** + * Compute the union of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_or_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); } - - if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found - // in this container. - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound( - get_container(it->r, leaf), get_typecode(leaf), - &it->container_it, &low16, val_low16)) { - it->value = it->high48 | low16; - return (it->has_value = true); - } - // Only smaller entries in this container, move to the next. - if (!art_iterator_next(&it->art_it)) { - it->saturated_forward = true; - return (it->has_value = false); - } + if (number == 1) { + return roaring_bitmap_copy(x[0]); } + roaring_pq_t *pq = create_pq(x, number); + while (pq->size > 1) { + roaring_pq_element_t x1 = pq_poll(pq); + roaring_pq_element_t x2 = pq_poll(pq); - // We're at a leaf with high bits greater than `val`, so the first entry - // in this container is our result. - return roaring64_iterator_init_at_leaf_first(it); -} + if (x1.is_temporary && x2.is_temporary) { + roaring_bitmap_t *newb = + lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); + // should normally return a fresh new bitmap *except* that + // it can return x1.bitmap or x2.bitmap in degenerate cases + bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = temporary, .bitmap = newb}; + pq_add(pq, &newelement); + } else if (x2.is_temporary) { + roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); + x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); + pq_add(pq, &x2); + } else if (x1.is_temporary) { + roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); + x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); -uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, - uint64_t count) { - uint64_t consumed = 0; - while (it->has_value && consumed < count) { - uint32_t container_consumed; - leaf_t leaf = (leaf_t)*it->art_it.value; - uint16_t low16 = (uint16_t)it->value; - uint32_t container_count = UINT32_MAX; - if (count - consumed < (uint64_t)UINT32_MAX) { - container_count = count - consumed; - } - bool has_value = container_iterator_read_into_uint64( - get_container(it->r, leaf), get_typecode(leaf), &it->container_it, - it->high48, buf, container_count, &container_consumed, &low16); - consumed += container_consumed; - buf += container_consumed; - if (has_value) { - it->has_value = true; - it->value = it->high48 | low16; - assert(consumed == count); - return consumed; - } - it->has_value = art_iterator_next(&it->art_it); - if (it->has_value) { - roaring64_iterator_init_at_leaf_first(it); + pq_add(pq, &x1); + } else { + roaring_bitmap_t *newb = + roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = true, .bitmap = newb}; + + pq_add(pq, &newelement); } } - return consumed; + roaring_pq_element_t X = pq_poll(pq); + roaring_bitmap_t *answer = X.bitmap; + roaring_bitmap_repair_after_lazy(answer); + pq_free(pq); + return answer; } #ifdef __cplusplus -} // extern "C" -} // namespace roaring -} // namespace api +} +} +} // extern "C" { namespace roaring { namespace api { #endif -/* end file src/roaring64.c */ +/* end file src/roaring_priority_queue.c */ diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 02aa6db..9965870 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-06-05T04:01:50Z +// Created by amalgamation.sh on 2025-02-28T15:35:21Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.3.5" +#define ROARING_VERSION "4.3.0" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 3, - ROARING_VERSION_REVISION = 5 + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -657,12 +657,6 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { { 0 } #endif -#if defined(__cplusplus) -#define CROARING_STATIC_ASSERT(x, y) static_assert(x, y) -#else -#define CROARING_STATIC_ASSERT(x, y) _Static_assert(x, y) -#endif - // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). @@ -1186,14 +1180,12 @@ inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) { /** * Add all the values between min (included) and max (excluded) that are at a * distance k*step from min. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); /** * Creates a new bitmap from a pointer of uint32_t integers - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); @@ -1217,11 +1209,6 @@ inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) { } } -/** - * Return a copy of the bitmap with all values shifted by offset. - * The returned pointer may be NULL in case of errors. The caller is responsible - * for freeing the return bitmap. - */ roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset); /** @@ -1280,7 +1267,6 @@ CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); /** * Copies a bitmap (this does memory allocation). * The caller is responsible for memory management. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); @@ -1312,7 +1298,6 @@ void roaring_bitmap_printf(const roaring_bitmap_t *r); * bitmaps, two-by-two, it is best to start with the smallest bitmap. * You may also rely on roaring_bitmap_and_inplace to avoid creating * many temporary bitmaps. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1375,7 +1360,6 @@ void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1391,7 +1375,6 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, * Compute the union of 'number' bitmaps. * Caller is responsible for freeing the result. * See also `roaring_bitmap_or_many_heap()` - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **rs); @@ -1407,7 +1390,6 @@ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, /** * Computes the symmetric difference (xor) between two bitmaps * and returns new bitmap. The caller is responsible for memory management. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1421,7 +1403,6 @@ void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, /** * Compute the xor of 'number' bitmaps. * Caller is responsible for freeing the result. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, const roaring_bitmap_t **rs); @@ -1429,7 +1410,6 @@ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, /** * Computes the difference (andnot) between two bitmaps and returns new bitmap. * Caller is responsible for freeing the result. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -1723,8 +1703,6 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); @@ -1741,8 +1719,6 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); * The difference with `roaring_bitmap_deserialize()` is that this function * checks that the input buffer is a valid bitmap. If the buffer is too small, * NULL is returned. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes); @@ -1767,8 +1743,6 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); @@ -1804,8 +1778,6 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); @@ -1829,8 +1801,6 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); @@ -1980,8 +1950,6 @@ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, * * `bitsetconversion` is a flag which determines whether container-container * operations force a bitset conversion. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2, @@ -2017,8 +1985,6 @@ void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); * * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on * the result. - * - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -2035,7 +2001,6 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, * Compute the negation of the bitmap in the interval [range_start, range_end). * The number of negated values is range_end - range_start. * Areas outside the range are passed through unchanged. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); @@ -2044,7 +2009,6 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, * Compute the negation of the bitmap in the interval [range_start, range_end]. * The number of negated values is range_end - range_start + 1. * Areas outside the range are passed through unchanged. - * The returned pointer may be NULL in case of errors. */ roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, uint32_t range_start, @@ -2423,14 +2387,12 @@ typedef struct roaring64_bulk_context_s { /** * Dynamically allocates a new bitmap (initially empty). * Client is responsible for calling `roaring64_bitmap_free()`. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_create(void); void roaring64_bitmap_free(roaring64_bitmap_t *r); /** * Returns a copy of a bitmap. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r); @@ -2490,7 +2452,6 @@ roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32(roaring_bitmap_t *r); /** * Create a new bitmap containing all the values in [min, max) that are at a * distance k*step from min. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint64_t step); @@ -2753,8 +2714,6 @@ bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary * bitmaps. - * - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2799,7 +2758,6 @@ double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for free-ing the result. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2819,7 +2777,6 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, /** * Computes the symmetric difference (xor) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2840,7 +2797,6 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, /** * Computes the difference (andnot) between two bitmaps and returns a new * bitmap. The caller is responsible for free-ing the result. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2); @@ -2862,7 +2818,6 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, * Compute the negation of the bitmap in the interval [min, max). * The number of negated values is `max - min`. Areas outside the range are * passed through unchanged. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); @@ -2871,7 +2826,6 @@ roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, * Compute the negation of the bitmap in the interval [min, max]. * The number of negated values is `max - min + 1`. Areas outside the range are * passed through unchanged. - * The returned pointer may be NULL in case of errors. */ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r, uint64_t min, uint64_t max); From 8dd63e5e5686dee43502c21985ba194441d6c993 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:04:38 +0200 Subject: [PATCH 03/11] Version v4.2.0 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 3161 +++++++++++--------------------- pyroaring/roaring.h | 112 +- 3 files changed, 1076 insertions(+), 2199 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 007b74b..084686f 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.3.0" \ No newline at end of file +__croaring_version__ = "v4.2.0" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 2e76a6d..324bfa5 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-28T15:35:21Z +// Created by amalgamation.sh on 2024-09-30T21:45:33Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -6758,8 +6758,8 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); * chunks _differ_. This means that if there are two entries with different * high 48 bits, then there is only one inner node containing the common key * prefix, and two leaves. - * * Mostly pointer-free: nodes are referred to by index rather than pointer, - * so that the structure can be deserialized with a backing buffer. + * * Intrusive leaves: the leaf struct is included in user values. This removes + * a layer of indirection. */ // Fixed length of keys in the ART. All keys are assumed to be of this length. @@ -6772,33 +6772,25 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; - -// Internal node reference type. Contains the node typecode in the low 8 bits, -// and the index in the relevant node array in the high 48 bits. Has a value of -// CROARING_ART_NULL_REF when pointing to a non-existent node. -typedef uint64_t art_ref_t; - -typedef void art_node_t; +typedef struct art_node_s art_node_t; /** - * The ART is empty when root is a null ref. - * - * Each node type has its own dynamic array of node structs, indexed by - * art_ref_t. The arrays are expanded as needed, and shrink only when - * `shrink_to_fit` is called. + * Wrapper to allow an empty tree. */ typedef struct art_s { - art_ref_t root; - - // Indexed by node typecode, thus 1 larger than they need to be for - // convenience. `first_free` indicates the index where the first free node - // lives, which may be equal to the capacity. - uint64_t first_free[6]; - uint64_t capacities[6]; - art_node_t *nodes[6]; + art_node_t *root; } art_t; -typedef uint64_t art_val_t; +/** + * Values inserted into the tree have to be cast-able to art_val_t. This + * improves performance by reducing indirection. + * + * NOTE: Value pointers must be unique! This is because each value struct + * contains the key corresponding to the value. + */ +typedef struct art_val_s { + art_key_chunk_t key[ART_KEY_BYTES]; +} art_val_t; /** * Compares two keys, returns their relative order: @@ -6810,21 +6802,14 @@ int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]); /** - * Initializes the ART. - */ -void art_init_cleared(art_t *art); - -/** - * Inserts the given key and value. Returns a pointer to the value inserted, - * valid as long as the ART is not modified. + * Inserts the given key and value. */ -art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val); +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); /** - * Returns true if a value was erased. Sets `*erased_val` to the value erased, - * if any. + * Returns the value erased, NULL if not found. */ -bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val); +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); /** * Returns the value associated with the given key, NULL if not found. @@ -6837,39 +6822,42 @@ art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); bool art_is_empty(const art_t *art); /** - * Frees the contents of the ART. Should not be called when using - * `art_deserialize_frozen_safe`. + * Frees the nodes of the ART except the values, which the user is expected to + * free. */ void art_free(art_t *art); +/** + * Returns the size in bytes of the ART. Includes size of pointers to values, + * but not the values themselves. + */ +size_t art_size_in_bytes(const art_t *art); + /** * Prints the ART using printf, useful for debugging. */ void art_printf(const art_t *art); /** - * Callback for validating the value stored in a leaf. `context` is a - * user-provided value passed to the callback without modification. + * Callback for validating the value stored in a leaf. * * Should return true if the value is valid, false otherwise * If false is returned, `*reason` should be set to a static string describing * the reason for the failure. */ -typedef bool (*art_validate_cb_t)(const art_val_t val, const char **reason, - void *context); +typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason); /** - * Validate the ART tree, ensuring it is internally consistent. `context` is a - * user-provided value passed to the callback without modification. + * Validate the ART tree, ensuring it is internally consistent. */ bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb, void *context); + art_validate_cb_t validate_cb); /** * ART-internal iterator bookkeeping. Users should treat this as an opaque type. */ typedef struct art_iterator_frame_s { - art_ref_t ref; + art_node_t *node; uint8_t index_in_node; } art_iterator_frame_t; @@ -6881,8 +6869,6 @@ typedef struct art_iterator_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t *value; - art_t *art; - uint8_t depth; // Key depth uint8_t frame; // Node depth @@ -6896,19 +6882,19 @@ typedef struct art_iterator_s { * depending on `first`. The iterator is not valid if there are no entries in * the ART. */ -art_iterator_t art_init_iterator(art_t *art, bool first); +art_iterator_t art_init_iterator(const art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than * the given key, if it exists. */ -art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key); +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); /** * Returns an initialized iterator positioned at a key greater than the given * key, if it exists. */ -art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key); +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); /** * The following iterator movement functions return true if a new entry was @@ -6927,49 +6913,14 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, /** * Insert the value and positions the iterator at the key. */ -void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, - art_val_t val); +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val); /** * Erase the value pointed at by the iterator. Moves the iterator to the next - * leaf. - * Returns true if a value was erased. Sets `*erased_val` to the value erased, - * if any. - */ -bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); - -/** - * Shrinks the internal arrays in the ART to remove any unused elements. Returns - * the number of bytes freed. - */ -size_t art_shrink_to_fit(art_t *art); - -/** - * Returns true if the ART has no unused elements. - */ -bool art_is_shrunken(const art_t *art); - -/** - * Returns the serialized size in bytes. - * Requires `art_shrink_to_fit` to be called first. - */ -size_t art_size_in_bytes(const art_t *art); - -/** - * Serializes the ART and returns the number of bytes written. Returns 0 on - * error. Requires `art_shrink_to_fit` to be called first. - */ -size_t art_serialize(const art_t *art, char *buf); - -/** - * Deserializes the ART from a serialized buffer, reading up to `maxbytes` - * bytes. Returns 0 on error. Requires `buf` to be 8 byte aligned. - * - * An ART deserialized in this way should only be used in a readonly context.The - * underlying buffer must not be freed before the ART. `art_free` should not be - * called on the ART deserialized in this way. + * leaf. Returns the value erased or NULL if nothing was erased. */ -size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art); +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); #ifdef __cplusplus } // extern "C" @@ -9151,36 +9102,37 @@ CROARING_UNTARGET_AVX512 #endif/* end file src/array_util.c */ /* begin file src/art/art.c */ #include -#include #include #include -#define CROARING_ART_NULL_REF 0 - -#define CROARING_ART_LEAF_TYPE 1 -#define CROARING_ART_NODE4_TYPE 2 -#define CROARING_ART_NODE16_TYPE 3 -#define CROARING_ART_NODE48_TYPE 4 -#define CROARING_ART_NODE256_TYPE 5 - -#define CROARING_ART_MIN_TYPE CROARING_ART_LEAF_TYPE -#define CROARING_ART_MAX_TYPE CROARING_ART_NODE256_TYPE +#define CROARING_ART_NODE4_TYPE 0 +#define CROARING_ART_NODE16_TYPE 1 +#define CROARING_ART_NODE48_TYPE 2 +#define CROARING_ART_NODE256_TYPE 3 +#define CROARING_ART_NUM_TYPES 4 // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 -#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) -#define CROARING_ART_ALIGN_BUF(buf, alignment) \ - (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) +// We use the least significant bit of node pointers to indicate whether a node +// is a leaf or an inner node. This is never surfaced to the user. +// +// Using pointer tagging to indicate leaves not only saves a bit of memory by +// sparing the typecode, but also allows us to use an intrusive leaf struct. +// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon +// deallocation of the ART, we know not to free the leaves without having to +// dereference the leaf pointers. +// +// All internal operations on leaves should use CROARING_CAST_LEAF before using +// the leaf. The only places that use CROARING_SET_LEAF are locations where a +// field is directly assigned to a leaf pointer. After using CROARING_SET_LEAF, +// the leaf should be treated as a node of unknown type. +#define CROARING_IS_LEAF(p) (((uintptr_t)(p) & 1)) +#define CROARING_SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) +#define CROARING_CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) -// Gives the byte difference needed to align the current buffer to the -// alignment, relative to the start of the buffer. -#define CROARING_ART_ALIGN_SIZE_RELATIVE(buf_cur, buf_start, alignment) \ - ((((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) - \ - (ptrdiff_t)((buf_cur) - (buf_start))) +#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) #ifdef __cplusplus extern "C" { @@ -9190,20 +9142,30 @@ namespace internal { typedef uint8_t art_typecode_t; -typedef struct art_leaf_s { - union { - struct { - art_key_chunk_t key[ART_KEY_BYTES]; - art_val_t val; - }; - uint64_t next_free; - }; -} art_leaf_t; +// Aliasing with a "leaf" naming so that its purpose is clearer in the context +// of the trie internals. +typedef art_val_t art_leaf_t; + +typedef struct art_internal_validate_s { + const char **reason; + art_validate_cb_t validate_cb; + + int depth; + art_key_chunk_t current_key[ART_KEY_BYTES]; +} art_internal_validate_t; + +// Set the reason message, and return false for convenience. +static inline bool art_validate_fail(const art_internal_validate_t *validate, + const char *msg) { + *validate->reason = msg; + return false; +} // Inner node, with prefix. // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { + art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; } art_inner_node_t; @@ -9212,232 +9174,119 @@ typedef struct art_inner_node_s { // Node4: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node4_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - uint8_t keys[4]; - art_ref_t children[4]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + uint8_t keys[4]; + art_node_t *children[4]; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - uint8_t keys[16]; - art_ref_t children[16]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + uint8_t keys[16]; + art_node_t *children[16]; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != // CROARING_ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct // indexing. typedef struct art_node48_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits - // are used. - uint64_t available_children; - uint8_t keys[256]; - art_ref_t children[48]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits are used. + uint64_t available_children; + uint8_t keys[256]; + art_node_t *children[48]; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if // children[i] != NULL. typedef struct art_node256_s { - union { - struct { - art_inner_node_t base; - uint16_t count; - art_ref_t children[256]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint16_t count; + art_node_t *children[256]; } art_node256_t; -// Size of each node type, indexed by typecode for convenience. -static const size_t ART_NODE_SIZES[] = { - 0, - sizeof(art_leaf_t), - sizeof(art_node4_t), - sizeof(art_node16_t), - sizeof(art_node48_t), - sizeof(art_node256_t), -}; - // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { - art_ref_t child; + art_node_t *child; uint8_t index; art_key_chunk_t key_chunk; } art_indexed_child_t; -typedef struct art_internal_validate_s { - const char **reason; - art_validate_cb_t validate_cb; - void *context; - - int depth; - art_key_chunk_t current_key[ART_KEY_BYTES]; -} art_internal_validate_t; - -// Set the reason message, and return false for convenience. -static inline bool art_validate_fail(const art_internal_validate_t *validate, - const char *msg) { - *validate->reason = msg; - return false; -} - -static inline art_ref_t art_to_ref(uint64_t index, art_typecode_t typecode) { - return ((art_ref_t)index) << 16 | typecode; -} - -static inline uint64_t art_ref_index(art_ref_t ref) { - return ((uint64_t)ref) >> 16; -} - -static inline art_typecode_t art_ref_typecode(art_ref_t ref) { - return (art_typecode_t)ref; -} - -/** - * Gets a pointer to a node from its reference. The pointer only remains valid - * under non-mutating operations. If any mutating operations occur, this - * function should be called again to get a valid pointer to the node. - */ -static art_node_t *art_deref(const art_t *art, art_ref_t ref) { - assert(ref != CROARING_ART_NULL_REF); - art_typecode_t typecode = art_ref_typecode(ref); - return (art_node_t *)((char *)art->nodes[typecode] + - art_ref_index(ref) * ART_NODE_SIZES[typecode]); -} - -static inline art_node_t *art_get_node(const art_t *art, uint64_t index, - art_typecode_t typecode) { - return art_deref(art, art_to_ref(index, typecode)); -} - -static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, - art_typecode_t typecode) { - art_node_t *nodes = art->nodes[typecode]; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_leaf_t *)node - (art_leaf_t *)nodes; - case CROARING_ART_NODE4_TYPE: - return (art_node4_t *)node - (art_node4_t *)nodes; - case CROARING_ART_NODE16_TYPE: - return (art_node16_t *)node - (art_node16_t *)nodes; - case CROARING_ART_NODE48_TYPE: - return (art_node48_t *)node - (art_node48_t *)nodes; - case CROARING_ART_NODE256_TYPE: - return (art_node256_t *)node - (art_node256_t *)nodes; - default: - assert(false); - return 0; - } +static inline bool art_is_leaf(const art_node_t *node) { + return CROARING_IS_LEAF(node); } -/** - * Creates a reference from a pointer. - */ -static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, - art_typecode_t typecode) { - return art_to_ref(art_get_index(art, node, typecode), typecode); +static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { + memcpy(leaf->key, key, ART_KEY_BYTES); } -static inline bool art_is_leaf(art_ref_t ref) { - return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; +static inline uint8_t art_get_type(const art_inner_node_t *node) { + return node->typecode; } static inline void art_init_inner_node(art_inner_node_t *node, + art_typecode_t typecode, const art_key_chunk_t prefix[], uint8_t prefix_size) { + node->typecode = typecode; node->prefix_size = prefix_size; memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); } -static void art_node_free(art_t *art, art_node_t *node, - art_typecode_t typecode); - -static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); +static void art_free_node(art_node_t *node); // ===================== Start of node-specific functions ====================== -static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], - art_val_t val) { - uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); - art_leaf_t *leaf = - ((art_leaf_t *)art->nodes[CROARING_ART_LEAF_TYPE]) + index; - memcpy(leaf->key, key, ART_KEY_BYTES); - leaf->val = val; - return art_to_ref(index, CROARING_ART_LEAF_TYPE); -} - -static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { - leaf->next_free = next_free; -} - -static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node16_t *art_node16_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node48_t *art_node48_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node256_t *art_node256_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, - art_ref_t child, uint8_t key); +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key); -static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); - art_node4_t *node = - ((art_node4_t *)art->nodes[CROARING_ART_NODE4_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE4_TYPE, prefix, + prefix_size); node->count = 0; return node; } -static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node4(art_node4_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); } -static inline art_ref_t art_node4_find_child(const art_node4_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key) { if (node->count < 4) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9450,26 +9299,26 @@ static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_ref_t)); + after * sizeof(art_node_t *)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } art_node16_t *new_node = - art_node16_create(art, node->base.prefix, node->base.prefix_size); + art_node16_create(node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node16_insert(art, new_node, node->children[i], node->keys[i]); + art_node16_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); - return art_node16_insert(art, new_node, child, key); + roaring_free(node); + return art_node16_insert(new_node, child, key); } -static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, - art_key_chunk_t key_chunk) { +static inline art_node_t *art_node4_erase(art_node4_t *node, + art_key_chunk_t key_chunk) { int idx = -1; for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { @@ -9477,18 +9326,17 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, } } if (idx == -1) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } if (node->count == 2) { // Only one child remains after erasing, so compress the path by // removing this node. uint8_t other_idx = idx ^ 1; - art_ref_t remaining_child = node->children[other_idx]; + art_node_t *remaining_child = node->children[other_idx]; art_key_chunk_t remaining_child_key = node->keys[other_idx]; if (!art_is_leaf(remaining_child)) { // Correct the prefix of the child node. - art_inner_node_t *inner_node = - (art_inner_node_t *)art_deref(art, remaining_child); + art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; memmove(inner_node->prefix + node->base.prefix_size + 1, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, @@ -9496,7 +9344,7 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + roaring_free(node); return remaining_child; } // Shift other keys to maintain sorted order. @@ -9504,14 +9352,14 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, memmove(node->keys + idx, node->keys + idx + 1, after_next * sizeof(art_key_chunk_t)); memmove(node->children + idx, node->children + idx + 1, - after_next * sizeof(art_ref_t)); + after_next * sizeof(art_node_t *)); node->count--; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } static inline void art_node4_replace(art_node4_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9525,7 +9373,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9542,7 +9390,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9555,7 +9403,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9575,15 +9423,14 @@ static inline art_indexed_child_t art_node4_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_internal_validate_at(const art_t *art, art_ref_t ref, +static bool art_internal_validate_at(const art_node_t *node, art_internal_validate_t validator); -static bool art_node4_internal_validate(const art_t *art, - const art_node4_t *node, +static bool art_node4_internal_validate(const art_node4_t *node, art_internal_validate_t validator) { if (node->count == 0) { return art_validate_fail(&validator, "Node4 has no children"); @@ -9610,41 +9457,41 @@ static bool art_node4_internal_validate(const art_t *art, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } return true; } -static art_node16_t *art_node16_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); - art_node16_t *node = - ((art_node16_t *)art->nodes[CROARING_ART_NODE16_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE16_TYPE, prefix, + prefix_size); node->count = 0; return node; } -static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node16(art_node16_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); } -static inline art_ref_t art_node16_find_child(const art_node16_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key) { if (node->count < 16) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9657,24 +9504,24 @@ static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_ref_t)); + after * sizeof(art_node_t *)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return (art_node_t *)node; } art_node48_t *new_node = - art_node48_create(art, node->base.prefix, node->base.prefix_size); + art_node48_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 16; ++i) { - art_node48_insert(art, new_node, node->children[i], node->keys[i]); + art_node48_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); - return art_node48_insert(art, new_node, child, key); + roaring_free(node); + return art_node48_insert(new_node, child, key); } -static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, - uint8_t key_chunk) { +static inline art_node_t *art_node16_erase(art_node16_t *node, + uint8_t key_chunk) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { // Shift other keys to maintain sorted order. @@ -9682,28 +9529,28 @@ static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, memmove(node->keys + i, node->keys + i + 1, after_next * sizeof(key_chunk)); memmove(node->children + i, node->children + i + 1, - after_next * sizeof(art_ref_t)); + after_next * sizeof(art_node_t *)); node->count--; break; } } if (node->count > 4) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return (art_node_t *)node; } art_node4_t *new_node = - art_node4_create(art, node->base.prefix, node->base.prefix_size); + art_node4_create(node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node4_insert(art, new_node, node->children[i], node->keys[i]); + art_node4_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE4_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node16_replace(art_node16_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { for (uint8_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9717,7 +9564,7 @@ static inline art_indexed_child_t art_node16_next_child( art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9734,7 +9581,7 @@ static inline art_indexed_child_t art_node16_prev_child( index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9747,7 +9594,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9767,12 +9614,11 @@ static inline art_indexed_child_t art_node16_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node16_internal_validate(const art_t *art, - const art_node16_t *node, +static bool art_node16_internal_validate(const art_node16_t *node, art_internal_validate_t validator) { if (node->count <= 4) { return art_validate_fail(&validator, "Node16 has too few children"); @@ -9795,20 +9641,18 @@ static bool art_node16_internal_validate(const art_t *art, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } return true; } -static art_node48_t *art_node48_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); - art_node48_t *node = - ((art_node48_t *)art->nodes[CROARING_ART_NODE48_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE48_TYPE, prefix, + prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; for (size_t i = 0; i < 256; ++i) { @@ -9817,22 +9661,29 @@ static art_node48_t *art_node48_create(art_t *art, return node; } -static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node48(art_node48_t *node) { + uint64_t used_children = + (node->available_children) ^ CROARING_NODE48_AVAILABLE_CHILDREN_MASK; + while (used_children != 0) { + // We checked above that used_children is not zero + uint8_t child_idx = roaring_trailing_zeroes(used_children); + art_free_node(node->children[child_idx]); + used_children &= ~(UINT64_C(1) << child_idx); + } + roaring_free(node); } -static inline art_ref_t art_node48_find_child(const art_node48_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { return node->children[val_idx]; } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key) { if (node->count < 48) { // node->available_children is only zero when the node is full (count == // 48), we just checked count < 48 @@ -9841,48 +9692,48 @@ static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, node->children[val_idx] = child; node->count++; node->available_children &= ~(UINT64_C(1) << val_idx); - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } art_node256_t *new_node = - art_node256_create(art, node->base.prefix, node->base.prefix_size); + art_node256_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { uint8_t val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node256_insert(art, new_node, node->children[val_idx], i); + art_node256_insert(new_node, node->children[val_idx], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); - return art_node256_insert(art, new_node, child, key); + roaring_free(node); + return art_node256_insert(new_node, child, key); } -static inline art_ref_t art_node48_erase(art_t *art, art_node48_t *node, - uint8_t key_chunk) { +static inline art_node_t *art_node48_erase(art_node48_t *node, + uint8_t key_chunk) { uint8_t val_idx = node->keys[key_chunk]; if (val_idx == CROARING_ART_NODE48_EMPTY_VAL) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } node->keys[key_chunk] = CROARING_ART_NODE48_EMPTY_VAL; node->available_children |= UINT64_C(1) << val_idx; node->count--; if (node->count > 16) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } art_node16_t *new_node = - art_node16_create(art, node->base.prefix, node->base.prefix_size); + art_node16_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node16_insert(art, new_node, node->children[val_idx], i); + art_node16_insert(new_node, node->children[val_idx], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE16_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node48_replace(art_node48_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { uint8_t val_idx = node->keys[key_chunk]; assert(val_idx != CROARING_ART_NODE48_EMPTY_VAL); node->children[val_idx] = new_child; @@ -9900,7 +9751,7 @@ static inline art_indexed_child_t art_node48_next_child( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -9919,7 +9770,7 @@ static inline art_indexed_child_t art_node48_prev_child( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -9927,7 +9778,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9947,12 +9798,11 @@ static inline art_indexed_child_t art_node48_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node48_internal_validate(const art_t *art, - const art_node48_t *node, +static bool art_node48_internal_validate(const art_node48_t *node, art_internal_validate_t validator) { if (node->count <= 16) { return art_validate_fail(&validator, "Node48 has too few children"); @@ -9969,8 +9819,8 @@ static bool art_node48_internal_validate(const art_t *art, &validator, "Node48 keys point to the same child index"); } - art_ref_t child = node->children[child_idx]; - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = node->children[child_idx]; + if (child == NULL) { return art_validate_fail(&validator, "Node48 has a NULL child"); } used_children |= UINT64_C(1) << child_idx; @@ -10002,7 +9852,7 @@ static bool art_node48_internal_validate(const art_t *art, for (int i = 0; i < 256; ++i) { if (node->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(art, node->children[node->keys[i]], + if (!art_internal_validate_at(node->children[node->keys[i]], validator)) { return false; } @@ -10011,59 +9861,62 @@ static bool art_node48_internal_validate(const art_t *art, return true; } -static art_node256_t *art_node256_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); art_node256_t *node = - ((art_node256_t *)art->nodes[CROARING_ART_NODE256_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + (art_node256_t *)roaring_malloc(sizeof(art_node256_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE256_TYPE, prefix, + prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { - node->children[i] = CROARING_ART_NULL_REF; + node->children[i] = NULL; } return node; } -static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node256(art_node256_t *node) { + for (size_t i = 0; i < 256; ++i) { + if (node->children[i] != NULL) { + art_free_node(node->children[i]); + } + } + roaring_free(node); } -static inline art_ref_t art_node256_find_child(const art_node256_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { return node->children[key]; } -static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key) { node->children[key] = child; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return (art_node_t *)node; } -static inline art_ref_t art_node256_erase(art_t *art, art_node256_t *node, - uint8_t key_chunk) { - node->children[key_chunk] = CROARING_ART_NULL_REF; +static inline art_node_t *art_node256_erase(art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = NULL; node->count--; if (node->count > 48) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return (art_node_t *)node; } art_node48_t *new_node = - art_node48_create(art, node->base.prefix, node->base.prefix_size); + art_node48_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { - art_node48_insert(art, new_node, node->children[i], i); + if (node->children[i] != NULL) { + art_node48_insert(new_node, node->children[i], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE48_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node256_replace(art_node256_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { node->children[key_chunk] = new_child; } @@ -10072,14 +9925,14 @@ static inline art_indexed_child_t art_node256_next_child( art_indexed_child_t indexed_child; index++; for (size_t i = index; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -10091,14 +9944,14 @@ static inline art_indexed_child_t art_node256_prev_child( index--; art_indexed_child_t indexed_child; for (int i = index; i >= 0; --i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -10106,7 +9959,7 @@ static inline art_indexed_child_t art_node256_child_at( const art_node256_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -10119,19 +9972,18 @@ static inline art_indexed_child_t art_node256_lower_bound( art_node256_t *node, art_key_chunk_t key_chunk) { art_indexed_child_t indexed_child; for (size_t i = key_chunk; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node256_internal_validate(const art_t *art, - const art_node256_t *node, +static bool art_node256_internal_validate(const art_node256_t *node, art_internal_validate_t validator) { if (node->count <= 48) { return art_validate_fail(&validator, "Node256 has too few children"); @@ -10142,7 +9994,7 @@ static bool art_node256_internal_validate(const art_t *art, validator.depth++; int actual_count = 0; for (int i = 0; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { actual_count++; for (int j = i + 1; j < 256; ++j) { @@ -10153,7 +10005,7 @@ static bool art_node256_internal_validate(const art_t *art, } validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } @@ -10167,10 +10019,9 @@ static bool art_node256_internal_validate(const art_t *art, // Finds the child with the given key chunk in the inner node, returns NULL if // no such child is found. -static art_ref_t art_find_child(const art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk) { - switch (typecode) { +static art_node_t *art_find_child(const art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: return art_node4_find_child((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10181,14 +10032,14 @@ static art_ref_t art_find_child(const art_inner_node_t *node, return art_node256_find_child((art_node256_t *)node, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } // Replaces the child with the given key chunk in the inner node. -static void art_replace(art_inner_node_t *node, art_typecode_t typecode, - art_key_chunk_t key_chunk, art_ref_t new_child) { - switch (typecode) { +static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, + art_node_t *new_child) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: art_node4_replace((art_node4_t *)node, key_chunk, new_child); break; @@ -10208,112 +10059,78 @@ static void art_replace(art_inner_node_t *node, art_typecode_t typecode, // Erases the child with the given key chunk from the inner node, returns the // updated node (the same as the initial node if it was not shrunk). -static art_ref_t art_node_erase(art_t *art, art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk) { - switch (typecode) { +static art_node_t *art_node_erase(art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: - return art_node4_erase(art, (art_node4_t *)node, key_chunk); + return art_node4_erase((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_erase(art, (art_node16_t *)node, key_chunk); + return art_node16_erase((art_node16_t *)node, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_erase(art, (art_node48_t *)node, key_chunk); + return art_node48_erase((art_node48_t *)node, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_erase(art, (art_node256_t *)node, key_chunk); + return art_node256_erase((art_node256_t *)node, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } // Inserts the leaf with the given key chunk in the inner node, returns a // pointer to the (possibly expanded) node. -static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk, - art_ref_t leaf) { - switch (typecode) { +static art_node_t *art_node_insert_leaf(art_inner_node_t *node, + art_key_chunk_t key_chunk, + art_leaf_t *leaf) { + art_node_t *child = (art_node_t *)(CROARING_SET_LEAF(leaf)); + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: - return art_node4_insert(art, (art_node4_t *)node, leaf, key_chunk); + return art_node4_insert((art_node4_t *)node, child, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_insert(art, (art_node16_t *)node, leaf, - key_chunk); + return art_node16_insert((art_node16_t *)node, child, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_insert(art, (art_node48_t *)node, leaf, - key_chunk); + return art_node48_insert((art_node48_t *)node, child, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_insert(art, (art_node256_t *)node, leaf, - key_chunk); + return art_node256_insert((art_node256_t *)node, child, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } -static uint64_t art_node_get_next_free(const art_t *art, art_ref_t ref) { - art_node_t *node = art_deref(art, ref); - art_typecode_t typecode = art_ref_typecode(ref); - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return ((art_leaf_t *)node)->next_free; - case CROARING_ART_NODE4_TYPE: - return ((art_node4_t *)node)->next_free; - case CROARING_ART_NODE16_TYPE: - return ((art_node16_t *)node)->next_free; - case CROARING_ART_NODE48_TYPE: - return ((art_node48_t *)node)->next_free; - case CROARING_ART_NODE256_TYPE: - return ((art_node256_t *)node)->next_free; - default: - assert(false); - return 0; +// Frees the node and its children. Leaves are freed by the user. +static void art_free_node(art_node_t *node) { + if (art_is_leaf(node)) { + // We leave it up to the user to free leaves. + return; } -} - -static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, - uint64_t next_free) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - ((art_leaf_t *)node)->next_free = next_free; - break; + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: - ((art_node4_t *)node)->next_free = next_free; + art_free_node4((art_node4_t *)node); break; case CROARING_ART_NODE16_TYPE: - ((art_node16_t *)node)->next_free = next_free; + art_free_node16((art_node16_t *)node); break; case CROARING_ART_NODE48_TYPE: - ((art_node48_t *)node)->next_free = next_free; + art_free_node48((art_node48_t *)node); break; case CROARING_ART_NODE256_TYPE: - ((art_node256_t *)node)->next_free = next_free; + art_free_node256((art_node256_t *)node); break; default: assert(false); } } -// Marks the node as unoccopied and frees its index. -static void art_node_free(art_t *art, art_node_t *node, - art_typecode_t typecode) { - uint64_t index = art_get_index(art, node, typecode); - uint64_t next_free = art->first_free[typecode]; - art_node_set_next_free(node, typecode, next_free); - art->first_free[typecode] = index; -} - // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_next_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10331,15 +10148,13 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, // Returns the previous child in key order, or NULL if called on a leaf. // Provided index may be in the range [0, 256]. static art_indexed_child_t art_node_prev_child(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_prev_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10354,19 +10169,16 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, } } -// Returns the child found at the provided index, or NULL if called on a -// leaf. Provided index is only valid if returned by -// art_node_(next|prev)_child. +// Returns the child found at the provided index, or NULL if called on a leaf. +// Provided index is only valid if returned by art_node_(next|prev)_child. static art_indexed_child_t art_node_child_at(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_child_at((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10381,18 +10193,16 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, } } -// Returns the child with the smallest key equal to or greater than the -// given key chunk, NULL if called on a leaf or no such child was found. +// Returns the child with the smallest key equal to or greater than the given +// key chunk, NULL if called on a leaf or no such child was found. static art_indexed_child_t art_node_lower_bound(const art_node_t *node, - art_typecode_t typecode, art_key_chunk_t key_chunk) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_lower_bound((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10407,7 +10217,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, } } -// ====================== End of node-specific functions ====================== +// ====================== End of node-specific functions ======================= // Compares the given ranges of two keys, returns their relative order: // * Key range 1 < key range 2: a negative value @@ -10445,112 +10255,45 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } -/** - * Extends the array of nodes of the given typecode. Invalidates pointers into - * the array obtained by `art_deref`. - */ -static void art_extend(art_t *art, art_typecode_t typecode) { - uint64_t size = art->first_free[typecode]; - uint64_t capacity = art->capacities[typecode]; - if (size < capacity) { - return; - } - uint64_t new_capacity; - if (capacity == 0) { - new_capacity = 2; - } else if (capacity < 1024) { - new_capacity = 2 * capacity; - } else { - new_capacity = 5 * capacity / 4; - } - art->capacities[typecode] = new_capacity; - art->nodes[typecode] = roaring_realloc( - art->nodes[typecode], new_capacity * ART_NODE_SIZES[typecode]); - uint64_t increase = new_capacity - capacity; - memset(art_get_node(art, capacity, typecode), 0, - increase * ART_NODE_SIZES[typecode]); - for (uint64_t i = capacity; i < new_capacity; ++i) { - art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); - } -} - -/** - * Returns the next free index for the given typecode, may be equal to the - * capacity of the array. - */ -static uint64_t art_next_free(const art_t *art, art_typecode_t typecode) { - uint64_t index = art->first_free[typecode]; - return art_node_get_next_free(art, art_to_ref(index, typecode)); -} - -/** - * Marks an index for the given typecode as used, expanding the relevant node - * array if necessary. - */ -static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode) { - uint64_t first_free = art->first_free[typecode]; - if (first_free == art->capacities[typecode]) { - art_extend(art, typecode); - art->first_free[typecode]++; - return first_free; - } - art->first_free[typecode] = art_next_free(art, typecode); - return first_free; -} - -// Returns a pointer to the rootmost node where the value was inserted, may -// not be equal to `node`. -static art_ref_t art_insert_at(art_t *art, art_ref_t ref, - const art_key_chunk_t key[], uint8_t depth, - art_ref_t new_leaf) { - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); +// Returns a pointer to the rootmost node where the value was inserted, may not +// be equal to `node`. +static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], + uint8_t depth, art_leaf_t *new_leaf) { + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); uint8_t common_prefix = art_common_prefix( leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); - // Previously this was a leaf, create an inner node instead and add - // both the existing and new leaf to it. + // Previously this was a leaf, create an inner node instead and add both + // the existing and new leaf to it. art_node_t *new_node = - (art_node_t *)art_node4_create(art, key + depth, common_prefix); + (art_node_t *)art_node4_create(key + depth, common_prefix); - art_ref_t new_ref = art_node_insert_leaf( - art, (art_inner_node_t *)new_node, CROARING_ART_NODE4_TYPE, - leaf->key[depth + common_prefix], ref); - new_ref = art_node_insert_leaf(art, (art_inner_node_t *)new_node, - CROARING_ART_NODE4_TYPE, - key[depth + common_prefix], new_leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + leaf->key[depth + common_prefix], leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. - return new_ref; + return new_node; } - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; // Not a leaf: inner node uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { - // Partial prefix match. Create a new internal node to hold the common + // Partial prefix match. Create a new internal node to hold the common // prefix. - // We create a copy of the node's prefix as the creation of a new - // node may invalidate the prefix pointer. - art_key_chunk_t *prefix_copy = (art_key_chunk_t *)roaring_malloc( - common_prefix * sizeof(art_key_chunk_t)); - memcpy(prefix_copy, inner_node->prefix, - common_prefix * sizeof(art_key_chunk_t)); - art_node4_t *node4 = art_node4_create(art, prefix_copy, common_prefix); - roaring_free(prefix_copy); - - // Deref as a new node was created. - inner_node = (art_inner_node_t *)art_deref(art, ref); + art_node4_t *node4 = + art_node4_create(inner_node->prefix, common_prefix); // Make the existing internal node a child of the new internal node. - art_node4_insert(art, node4, ref, inner_node->prefix[common_prefix]); + node4 = (art_node4_t *)art_node4_insert( + node4, node, inner_node->prefix[common_prefix]); - // Deref again as a new node was created. - inner_node = (art_inner_node_t *)art_deref(art, ref); - - // Correct the prefix of the moved internal node, trimming off the - // chunk inserted into the new internal node. + // Correct the prefix of the moved internal node, trimming off the chunk + // inserted into the new internal node. inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; if (inner_node->prefix_size > 0) { // Move the remaining prefix to the correct position. @@ -10559,67 +10302,55 @@ static art_ref_t art_insert_at(art_t *art, art_ref_t ref, } // Insert the value in the new internal node. - return art_node_insert_leaf(art, (art_inner_node_t *)node4, - CROARING_ART_NODE4_TYPE, - key[common_prefix + depth], new_leaf); + return art_node_insert_leaf(&node4->base, key[common_prefix + depth], + new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_ref_t child = - art_find_child(inner_node, art_ref_typecode(ref), key_chunk); - if (child != CROARING_ART_NULL_REF) { - art_ref_t new_child = - art_insert_at(art, child, key, depth + common_prefix + 1, new_leaf); + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child != NULL) { + art_node_t *new_child = + art_insert_at(child, key, depth + common_prefix + 1, new_leaf); if (new_child != child) { - // Deref again as a new node may have been created. - inner_node = (art_inner_node_t *)art_deref(art, ref); // Node type changed. - art_replace(inner_node, art_ref_typecode(ref), key_chunk, - new_child); + art_replace(inner_node, key_chunk, new_child); } - return ref; + return node; } - return art_node_insert_leaf(art, inner_node, art_ref_typecode(ref), - key_chunk, new_leaf); + return art_node_insert_leaf(inner_node, key_chunk, new_leaf); } // Erase helper struct. typedef struct art_erase_result_s { - // The rootmost node where the value was erased, may not be equal to - // the original node. If no value was removed, this is - // CROARING_ART_NULL_REF. - art_ref_t rootmost_node; - - // True if a value was erased. - bool erased; + // The rootmost node where the value was erased, may not be equal to `node`. + // If no value was removed, this is null. + art_node_t *rootmost_node; - // Value removed, if any. - art_val_t value_erased; + // Value removed, null if not removed. + art_val_t *value_erased; } art_erase_result_t; // Searches for the given key starting at `node`, erases it if found. -static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, +static art_erase_result_t art_erase_at(art_node_t *node, const art_key_chunk_t *key, uint8_t depth) { art_erase_result_t result; - result.rootmost_node = CROARING_ART_NULL_REF; - result.erased = false; + result.rootmost_node = NULL; + result.value_erased = NULL; - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix != ART_KEY_BYTES) { // Leaf key mismatch. return result; } - result.erased = true; - result.value_erased = leaf->val; - art_node_free(art, (art_node_t *)leaf, CROARING_ART_LEAF_TYPE); + result.value_erased = (art_val_t *)leaf; return result; } - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); @@ -10628,76 +10359,101 @@ static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, return result; } art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_ref_t child = - art_find_child(inner_node, art_ref_typecode(ref), key_chunk); - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child == NULL) { // No child with key chunk. return result; } - // Try to erase the key further down. Skip the key chunk associated with - // the child in the node. + // Try to erase the key further down. Skip the key chunk associated with the + // child in the node. art_erase_result_t child_result = - art_erase_at(art, child, key, depth + common_prefix + 1); - if (!child_result.erased) { + art_erase_at(child, key, depth + common_prefix + 1); + if (child_result.value_erased == NULL) { return result; } - result.erased = true; result.value_erased = child_result.value_erased; - result.rootmost_node = ref; - - // Deref again as nodes may have changed location. - inner_node = (art_inner_node_t *)art_deref(art, ref); - if (child_result.rootmost_node == CROARING_ART_NULL_REF) { + result.rootmost_node = node; + if (child_result.rootmost_node == NULL) { // Child node was fully erased, erase it from this node's children. - result.rootmost_node = - art_node_erase(art, inner_node, art_ref_typecode(ref), key_chunk); + result.rootmost_node = art_node_erase(inner_node, key_chunk); } else if (child_result.rootmost_node != child) { // Child node was not fully erased, update the pointer to it in this // node. - art_replace(inner_node, art_ref_typecode(ref), key_chunk, - child_result.rootmost_node); + art_replace(inner_node, key_chunk, child_result.rootmost_node); } return result; } -// Searches for the given key starting at `node`, returns NULL if the key -// was not found. -static art_val_t *art_find_at(const art_t *art, art_ref_t ref, +// Searches for the given key starting at `node`, returns NULL if the key was +// not found. +static art_val_t *art_find_at(const art_node_t *node, const art_key_chunk_t *key, uint8_t depth) { - while (!art_is_leaf(ref)) { - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { return NULL; } - art_ref_t child = art_find_child(inner_node, art_ref_typecode(ref), - key[depth + inner_node->prefix_size]); - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = + art_find_child(inner_node, key[depth + inner_node->prefix_size]); + if (child == NULL) { return NULL; } - ref = child; + node = child; // Include both the prefix and the child key chunk in the depth. depth += inner_node->prefix_size + 1; } - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (depth >= ART_KEY_BYTES) { - return &leaf->val; + return (art_val_t *)leaf; } uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix == ART_KEY_BYTES) { - return &leaf->val; + return (art_val_t *)leaf; } return NULL; } -static void art_node_print_type(art_ref_t ref) { - switch (art_ref_typecode(ref)) { - case CROARING_ART_LEAF_TYPE: - printf("Leaf"); - return; +// Returns the size in bytes of the subtrie. +size_t art_size_in_bytes_at(const art_node_t *node) { + if (art_is_leaf(node)) { + return 0; + } + size_t size = 0; + switch (art_get_type((art_inner_node_t *)node)) { + case CROARING_ART_NODE4_TYPE: { + size += sizeof(art_node4_t); + } break; + case CROARING_ART_NODE16_TYPE: { + size += sizeof(art_node16_t); + } break; + case CROARING_ART_NODE48_TYPE: { + size += sizeof(art_node48_t); + } break; + case CROARING_ART_NODE256_TYPE: { + size += sizeof(art_node256_t); + } break; + default: + assert(false); + break; + } + art_indexed_child_t indexed_child = art_node_next_child(node, -1); + while (indexed_child.child != NULL) { + size += art_size_in_bytes_at(indexed_child.child); + indexed_child = art_node_next_child(node, indexed_child.index); + } + return size; +} + +static void art_node_print_type(const art_node_t *node) { + if (art_is_leaf(node)) { + printf("Leaf"); + return; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: printf("Node4"); return; @@ -10716,10 +10472,10 @@ static void art_node_print_type(art_ref_t ref) { } } -void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { - if (art_is_leaf(ref)) { +void art_node_printf(const art_node_t *node, uint8_t depth) { + if (art_is_leaf(node)) { printf("{ type: Leaf, key: "); - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); for (size_t i = 0; i < ART_KEY_BYTES; ++i) { printf("%02x", leaf->key[i]); } @@ -10731,10 +10487,10 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { printf("%*s", depth, ""); printf("type: "); - art_node_print_type(ref); + art_node_print_type(node); printf("\n"); - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; printf("%*s", depth, ""); printf("prefix_size: %d\n", inner_node->prefix_size); @@ -10745,42 +10501,41 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { } printf("\n"); - switch (art_ref_typecode(ref)) { + switch (art_get_type(inner_node)) { case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)inner_node; + art_node4_t *node4 = (art_node4_t *)node; for (uint8_t i = 0; i < node4->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node4->keys[i]); - art_node_printf(art, node4->children[i], depth); + art_node_printf(node4->children[i], depth); } } break; case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)inner_node; + art_node16_t *node16 = (art_node16_t *)node; for (uint8_t i = 0; i < node16->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node16->keys[i]); - art_node_printf(art, node16->children[i], depth); + art_node_printf(node16->children[i], depth); } } break; case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)inner_node; + art_node48_t *node48 = (art_node48_t *)node; for (int i = 0; i < 256; ++i) { if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { printf("%*s", depth, ""); printf("key: %02x ", i); printf("child: %02x ", node48->keys[i]); - art_node_printf(art, node48->children[node48->keys[i]], - depth); + art_node_printf(node48->children[node48->keys[i]], depth); } } } break; case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)inner_node; + art_node256_t *node256 = (art_node256_t *)node; for (int i = 0; i < 256; ++i) { - if (node256->children[i] != CROARING_ART_NULL_REF) { + if (node256->children[i] != NULL) { printf("%*s", depth, ""); printf("key: %02x ", i); - art_node_printf(art, node256->children[i], depth); + art_node_printf(node256->children[i], depth); } } } break; @@ -10793,310 +10548,118 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { printf("}\n"); } -/** - * Moves the node at `ref` to the earliest free index before it (if any), - * returns the new ref. Assumes `art->first_free[typecode]` points to the - * smallest free index. - */ -static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { - uint64_t idx = art_ref_index(ref); - art_typecode_t typecode = art_ref_typecode(ref); - uint64_t first_free = art->first_free[typecode]; - assert(idx != first_free); - if (idx < first_free) { - return ref; - } - uint64_t from = idx; - uint64_t to = first_free; - uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); - memcpy(art_get_node(art, to, typecode), art_get_node(art, from, typecode), - ART_NODE_SIZES[typecode]); - - // With an integer representing the next free index, and an `x` representing - // an occupied index, assume the following scenario at the start of this - // function: - // nodes = [1,2,5,x,x] - // first_free = 0 - // - // We just moved a node from index 3 to 0: - // nodes = [x,2,5,?,x] - // - // We need to modify the free list so that the free indices are ascending. - // This can be done by traversing the list until we find a node with a - // `next_free` greater than the index we copied the node from, and inserting - // the new index in between. This leads to the following: - // nodes = [x,2,3,5,x] - // first_free = 1 - uint64_t initial_next_free = next_free; - uint64_t current = next_free; - while (next_free < from) { - current = next_free; - next_free = - art_node_get_next_free(art, art_to_ref(next_free, typecode)); - } - art_node_set_next_free(art_deref(art, ref), typecode, next_free); - if (current < from) { - art_node_set_next_free(art_get_node(art, current, typecode), typecode, - from); - } - art->first_free[typecode] = - from < initial_next_free ? from : initial_next_free; - return art_to_ref(to, typecode); +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { + art_leaf_t *leaf = (art_leaf_t *)val; + art_leaf_populate(leaf, key); + if (art->root == NULL) { + art->root = (art_node_t *)CROARING_SET_LEAF(leaf); + return; + } + art->root = art_insert_at(art->root, key, 0, leaf); } -/** - * Sorts the free lists pointed to by art->first_free in ascending index order. - */ -static void art_sort_free_lists(art_t *art) { - for (art_typecode_t type = CROARING_ART_LEAF_TYPE; - type <= CROARING_ART_NODE256_TYPE; ++type) { - bool *free_indices = - (bool *)roaring_calloc(art->capacities[type], sizeof(bool)); - - for (uint64_t i = art->first_free[type]; i < art->capacities[type]; - i = art_node_get_next_free(art, art_to_ref(i, type))) { - free_indices[i] = true; - } - - uint64_t first_free = art->capacities[type]; - for (uint64_t i = art->capacities[type]; i > 0; --i) { - uint64_t index = i - 1; - if (free_indices[index]) { - art_node_set_next_free(art_get_node(art, index, type), type, - first_free); - first_free = index; - } - } - art->first_free[type] = first_free; - roaring_free(free_indices); +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; } + art_erase_result_t result = art_erase_at(art->root, key, 0); + if (result.value_erased == NULL) { + return NULL; + } + art->root = result.rootmost_node; + return result.value_erased; } -/** - * Shrinks all node arrays to `first_free`. Assumes all indices after - * `first_free` are unused. - */ -static size_t art_shrink_node_arrays(art_t *art) { - size_t freed = 0; - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->first_free[t] < art->capacities[t]) { - uint64_t new_capacity = art->first_free[t]; - art->nodes[t] = roaring_realloc(art->nodes[t], - new_capacity * ART_NODE_SIZES[t]); - freed += (art->capacities[t] - new_capacity) * ART_NODE_SIZES[t]; - art->capacities[t] = new_capacity; - } +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; } - return freed; + return art_find_at(art->root, key, 0); } -/** - * Traverses the ART, moving nodes to earlier free indices and modifying their - * references along the way. - */ -static void art_shrink_at(art_t *art, art_ref_t ref) { - if (art_is_leaf(ref)) { +bool art_is_empty(const art_t *art) { return art->root == NULL; } + +void art_free(art_t *art) { + if (art->root == NULL) { return; } - switch (art_ref_typecode(ref)) { - case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)art_deref(art, ref); - for (uint8_t i = 0; i < node4->count; ++i) { - node4->children[i] = - art_move_node_to_shrink(art, node4->children[i]); - art_shrink_at(art, node4->children[i]); - } - } break; - case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)art_deref(art, ref); - for (uint8_t i = 0; i < node16->count; ++i) { - node16->children[i] = - art_move_node_to_shrink(art, node16->children[i]); - art_shrink_at(art, node16->children[i]); - } - } break; - case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)art_deref(art, ref); - for (int i = 0; i < 256; ++i) { - if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { - uint8_t idx = node48->keys[i]; - node48->children[idx] = - art_move_node_to_shrink(art, node48->children[idx]); - art_shrink_at(art, node48->children[idx]); - } - } - } break; - case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)art_deref(art, ref); - for (int i = 0; i < 256; ++i) { - if (node256->children[i] != CROARING_ART_NULL_REF) { - node256->children[i] = - art_move_node_to_shrink(art, node256->children[i]); - art_shrink_at(art, node256->children[i]); - } - } - } break; - default: - assert(false); - break; - } -} - -void art_init_cleared(art_t *art) { - art->root = CROARING_ART_NULL_REF; - memset(art->first_free, 0, sizeof(art->first_free)); - memset(art->capacities, 0, sizeof(art->capacities)); - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - art->nodes[t] = NULL; - } -} - -size_t art_shrink_to_fit(art_t *art) { - if (art_is_shrunken(art)) { - return 0; - } - if (art->root != CROARING_ART_NULL_REF) { - art_sort_free_lists(art); - art->root = art_move_node_to_shrink(art, art->root); - art_shrink_at(art, art->root); - } - return art_shrink_node_arrays(art); -} - -bool art_is_shrunken(const art_t *art) { - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->first_free[t] != art->capacities[t]) { - return false; - } - } - return true; -} - -art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { - art_ref_t leaf = art_leaf_create(art, key, val); - if (art->root == CROARING_ART_NULL_REF) { - art->root = leaf; - return &((art_leaf_t *)art_deref(art, leaf))->val; - } - art->root = art_insert_at(art, art->root, key, 0, leaf); - return &((art_leaf_t *)art_deref(art, leaf))->val; + art_free_node(art->root); } -bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val) { - art_val_t erased_val_local; - if (erased_val == NULL) { - erased_val = &erased_val_local; - } - if (art->root == CROARING_ART_NULL_REF) { - return false; - } - art_erase_result_t result = art_erase_at(art, art->root, key, 0); - if (!result.erased) { - return false; - } - art->root = result.rootmost_node; - *erased_val = result.value_erased; - return true; -} - -art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { - if (art->root == CROARING_ART_NULL_REF) { - return NULL; - } - return art_find_at(art, art->root, key, 0); -} - -bool art_is_empty(const art_t *art) { - return art->root == CROARING_ART_NULL_REF; -} - -void art_free(art_t *art) { - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - roaring_free(art->nodes[t]); +size_t art_size_in_bytes(const art_t *art) { + size_t size = sizeof(art_t); + if (art->root != NULL) { + size += art_size_in_bytes_at(art->root); } + return size; } void art_printf(const art_t *art) { - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return; } - art_node_printf(art, art->root, 0); -} - -// Returns a reference to the current node that the iterator is positioned -// at. -static inline art_ref_t art_iterator_ref(art_iterator_t *iterator) { - return iterator->frames[iterator->frame].ref; + art_node_printf(art->root, 0); } // Returns the current node that the iterator is positioned at. static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { - return art_deref(iterator->art, art_iterator_ref(iterator)); + return iterator->frames[iterator->frame].node; } -// Sets the iterator key and value to the leaf's key and value. Always -// returns true for convenience. +// Sets the iterator key and value to the leaf's key and value. Always returns +// true for convenience. static inline bool art_iterator_valid_loc(art_iterator_t *iterator, - art_ref_t leaf_ref) { - iterator->frames[iterator->frame].ref = leaf_ref; + art_leaf_t *leaf) { + iterator->frames[iterator->frame].node = CROARING_SET_LEAF(leaf); iterator->frames[iterator->frame].index_in_node = 0; - art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, leaf_ref); memcpy(iterator->key, leaf->key, ART_KEY_BYTES); - iterator->value = &leaf->val; + iterator->value = (art_val_t *)leaf; return true; } -// Invalidates the iterator key and value. Always returns false for -// convenience. +// Invalidates the iterator key and value. Always returns false for convenience. static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { memset(iterator->key, 0, ART_KEY_BYTES); iterator->value = NULL; return false; } -// Moves the iterator one level down in the tree, given a node at the -// current level and the index of the child that we're going down to. +// Moves the iterator one level down in the tree, given a node at the current +// level and the index of the child that we're going down to. // // Note: does not set the index at the new level. -static void art_iterator_down(art_iterator_t *iterator, art_ref_t ref, +static void art_iterator_down(art_iterator_t *iterator, + const art_inner_node_t *node, uint8_t index_in_node) { - iterator->frames[iterator->frame].ref = ref; + iterator->frames[iterator->frame].node = (art_node_t *)node; iterator->frames[iterator->frame].index_in_node = index_in_node; iterator->frame++; - art_inner_node_t *node = (art_inner_node_t *)art_deref(iterator->art, ref); - art_indexed_child_t indexed_child = art_node_child_at( - (art_node_t *)node, art_ref_typecode(ref), index_in_node); - assert(indexed_child.child != CROARING_ART_NULL_REF); - iterator->frames[iterator->frame].ref = indexed_child.child; + art_indexed_child_t indexed_child = + art_node_child_at((art_node_t *)node, index_in_node); + assert(indexed_child.child != NULL); + iterator->frames[iterator->frame].node = indexed_child.child; iterator->depth += node->prefix_size + 1; } -// Moves the iterator to the next/previous child of the current node. -// Returns the child moved to, or NULL if there is no neighboring child. -static art_ref_t art_iterator_neighbor_child(art_iterator_t *iterator, - bool forward) { +// Moves the iterator to the next/previous child of the current node. Returns +// the child moved to, or NULL if there is no neighboring child. +static art_node_t *art_iterator_neighbor_child( + art_iterator_t *iterator, const art_inner_node_t *inner_node, + bool forward) { art_iterator_frame_t frame = iterator->frames[iterator->frame]; - art_node_t *node = art_deref(iterator->art, frame.ref); art_indexed_child_t indexed_child; if (forward) { - indexed_child = art_node_next_child(node, art_ref_typecode(frame.ref), - frame.index_in_node); + indexed_child = art_node_next_child(frame.node, frame.index_in_node); } else { - indexed_child = art_node_prev_child(node, art_ref_typecode(frame.ref), - frame.index_in_node); + indexed_child = art_node_prev_child(frame.node, frame.index_in_node); } - if (indexed_child.child != CROARING_ART_NULL_REF) { - art_iterator_down(iterator, frame.ref, indexed_child.index); + if (indexed_child.child != NULL) { + art_iterator_down(iterator, inner_node, indexed_child.index); } return indexed_child.child; } -// Moves the iterator one level up in the tree, returns false if not -// possible. +// Moves the iterator one level up in the tree, returns false if not possible. static bool art_iterator_up(art_iterator_t *iterator) { if (iterator->frame == 0) { return false; @@ -11108,8 +10671,8 @@ static bool art_iterator_up(art_iterator_t *iterator) { return true; } -// Moves the iterator one level, followed by a move to the next / previous -// leaf. Sets the status of the iterator. +// Moves the iterator one level, followed by a move to the next / previous leaf. +// Sets the status of the iterator. static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { if (!art_iterator_up(iterator)) { // We're at the root. @@ -11120,29 +10683,27 @@ static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { // Initializes the iterator at the first / last leaf of the given node. // Returns true for convenience. -static bool art_node_init_iterator(art_ref_t ref, art_iterator_t *iterator, - bool first) { - while (!art_is_leaf(ref)) { - art_node_t *node = art_deref(iterator->art, ref); +static bool art_node_init_iterator(const art_node_t *node, + art_iterator_t *iterator, bool first) { + while (!art_is_leaf(node)) { art_indexed_child_t indexed_child; if (first) { - indexed_child = - art_node_next_child(node, art_ref_typecode(ref), -1); + indexed_child = art_node_next_child(node, -1); } else { - indexed_child = - art_node_prev_child(node, art_ref_typecode(ref), 256); + indexed_child = art_node_prev_child(node, 256); } - art_iterator_down(iterator, ref, indexed_child.index); - ref = indexed_child.child; + art_iterator_down(iterator, (art_inner_node_t *)node, + indexed_child.index); + node = indexed_child.child; } // We're at a leaf. - iterator->frames[iterator->frame].ref = ref; + iterator->frames[iterator->frame].node = (art_node_t *)node; iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. - return art_iterator_valid_loc(iterator, ref); + return art_iterator_valid_loc(iterator, CROARING_CAST_LEAF(node)); } bool art_iterator_move(art_iterator_t *iterator, bool forward) { - if (art_is_leaf(art_iterator_ref(iterator))) { + if (art_is_leaf(art_iterator_node(iterator))) { bool went_up = art_iterator_up(iterator); if (!went_up) { // This leaf is the root, we're done. @@ -11150,69 +10711,67 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { } } // Advance within inner node. - art_ref_t neighbor_child = art_iterator_neighbor_child(iterator, forward); - if (neighbor_child != CROARING_ART_NULL_REF) { - // There is another child at this level, go down to the first or - // last leaf. + art_node_t *neighbor_child = art_iterator_neighbor_child( + iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); + if (neighbor_child != NULL) { + // There is another child at this level, go down to the first or last + // leaf. return art_node_init_iterator(neighbor_child, iterator, forward); } // No more children at this level, go up. return art_iterator_up_and_move(iterator, forward); } -// Assumes the iterator is positioned at a node with an equal prefix path up -// to the depth of the iterator. -static bool art_node_iterator_lower_bound(art_ref_t ref, +// Assumes the iterator is positioned at a node with an equal prefix path up to +// the depth of the iterator. +static bool art_node_iterator_lower_bound(const art_node_t *node, art_iterator_t *iterator, const art_key_chunk_t key[]) { - while (!art_is_leaf(ref)) { - art_inner_node_t *inner_node = - (art_inner_node_t *)art_deref(iterator->art, ref); + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; int prefix_comparison = art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. - // Since we take the lower bound within each node, we can return - // the next leaf. + // Since we take the lower bound within each node, we can return the + // next leaf. return art_iterator_up_and_move(iterator, true); } else if (prefix_comparison > 0) { - // No key equal to the key we're looking for, return the first - // leaf. - return art_node_init_iterator(ref, iterator, true); + // No key equal to the key we're looking for, return the first leaf. + return art_node_init_iterator(node, iterator, true); } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = key[iterator->depth + inner_node->prefix_size]; - art_indexed_child_t indexed_child = art_node_lower_bound( - (art_node_t *)inner_node, art_ref_typecode(ref), key_chunk); - if (indexed_child.child == CROARING_ART_NULL_REF) { + art_indexed_child_t indexed_child = + art_node_lower_bound(node, key_chunk); + if (indexed_child.child == NULL) { // Only smaller keys among children. return art_iterator_up_and_move(iterator, true); } if (indexed_child.key_chunk > key_chunk) { // Only larger children, return the first larger child. - art_iterator_down(iterator, ref, indexed_child.index); + art_iterator_down(iterator, inner_node, indexed_child.index); return art_node_init_iterator(indexed_child.child, iterator, true); } // We found a child with an equal prefix. - art_iterator_down(iterator, ref, indexed_child.index); - ref = indexed_child.child; + art_iterator_down(iterator, inner_node, indexed_child.index); + node = indexed_child.child; } - art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (art_compare_keys(leaf->key, key) >= 0) { // Leaf has an equal or larger key. - return art_iterator_valid_loc(iterator, ref); + return art_iterator_valid_loc(iterator, leaf); } - // Leaf has an equal prefix, but the full key is smaller. Move to the - // next leaf. + // Leaf has an equal prefix, but the full key is smaller. Move to the next + // leaf. return art_iterator_up_and_move(iterator, true); } -art_iterator_t art_init_iterator(art_t *art, bool first) { +art_iterator_t art_init_iterator(const art_t *art, bool first) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return iterator; } art_node_init_iterator(art->root, &iterator, first); @@ -11230,12 +10789,12 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { if (iterator->value == NULL) { - // We're beyond the end / start of the ART so the iterator does not - // have a valid key. Start from the root. + // We're beyond the end / start of the ART so the iterator does not have + // a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - art_ref_t root = art_iterator_ref(iterator); - if (root == CROARING_ART_NULL_REF) { + art_node_t *root = art_iterator_node(iterator); + if (root == NULL) { return false; } return art_node_iterator_lower_bound(root, iterator, key); @@ -11250,7 +10809,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // Only smaller keys found. return art_iterator_invalid_loc(iterator); } else { - return art_node_init_iterator(art_iterator_ref(iterator), + return art_node_init_iterator(art_iterator_node(iterator), iterator, true); } } @@ -11263,26 +10822,24 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, iterator->depth + inner_node->prefix_size); } if (compare_result > 0) { - return art_node_init_iterator(art_iterator_ref(iterator), iterator, + return art_node_init_iterator(art_iterator_node(iterator), iterator, true); } - return art_node_iterator_lower_bound(art_iterator_ref(iterator), iterator, + return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, key); } -art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root != CROARING_ART_NULL_REF) { + if (art->root != NULL) { art_node_iterator_lower_bound(art->root, &iterator, key); } return iterator; } -art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root != CROARING_ART_NULL_REF) { + if (art->root != NULL) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); @@ -11291,100 +10848,90 @@ art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { return iterator; } -void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, - art_val_t val) { +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val) { // TODO: This can likely be faster. - art_insert(iterator->art, key, val); - assert(iterator->art->root != CROARING_ART_NULL_REF); + art_insert(art, key, val); + assert(art->root != NULL); iterator->frame = 0; iterator->depth = 0; - art_node_iterator_lower_bound(iterator->art->root, iterator, key); + art_node_iterator_lower_bound(art->root, iterator, key); } -bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val) { - art_val_t erased_val_local; - if (erased_val == NULL) { - erased_val = &erased_val_local; - } +// TODO: consider keeping `art_t *art` in the iterator. +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { if (iterator->value == NULL) { - return false; + return NULL; } art_key_chunk_t initial_key[ART_KEY_BYTES]; memcpy(initial_key, iterator->key, ART_KEY_BYTES); - *erased_val = *iterator->value; - // Erase the leaf. - art_node_free(iterator->art, art_iterator_node(iterator), - art_ref_typecode(art_iterator_ref(iterator))); + art_val_t *value_erased = iterator->value; bool went_up = art_iterator_up(iterator); if (!went_up) { // We're erasing the root. - iterator->art->root = CROARING_ART_NULL_REF; + art->root = NULL; art_iterator_invalid_loc(iterator); - return true; + return value_erased; } - // Erase the leaf in its parent. - art_ref_t parent_ref = art_iterator_ref(iterator); + // Erase the leaf. art_inner_node_t *parent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_parent = iterator->key[iterator->depth + parent_node->prefix_size]; - art_ref_t new_parent_ref = - art_node_erase(iterator->art, parent_node, art_ref_typecode(parent_ref), - key_chunk_in_parent); + art_node_t *new_parent_node = + art_node_erase(parent_node, key_chunk_in_parent); - if (new_parent_ref != parent_ref) { + if (new_parent_node != ((art_node_t *)parent_node)) { // Replace the pointer to the inner node we erased from in its // parent (it may be a leaf now). - iterator->frames[iterator->frame].ref = new_parent_ref; + iterator->frames[iterator->frame].node = new_parent_node; went_up = art_iterator_up(iterator); if (went_up) { - art_ref_t grandparent_ref = art_iterator_ref(iterator); art_inner_node_t *grandparent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_grandparent = iterator->key[iterator->depth + grandparent_node->prefix_size]; - art_replace(grandparent_node, art_ref_typecode(grandparent_ref), - key_chunk_in_grandparent, new_parent_ref); + art_replace(grandparent_node, key_chunk_in_grandparent, + new_parent_node); } else { // We were already at the rootmost node. - iterator->art->root = new_parent_ref; + art->root = new_parent_node; } } iterator->frame = 0; iterator->depth = 0; - // Do a lower bound search for the initial key, which will find the - // first greater key if it exists. This can likely be mildly faster if - // we instead start from the current position. - art_node_iterator_lower_bound(iterator->art->root, iterator, initial_key); - return true; + // Do a lower bound search for the initial key, which will find the first + // greater key if it exists. This can likely be mildly faster if we instead + // start from the current position. + art_node_iterator_lower_bound(art->root, iterator, initial_key); + return value_erased; } -static bool art_internal_validate_at(const art_t *art, art_ref_t ref, +static bool art_internal_validate_at(const art_node_t *node, art_internal_validate_t validator) { - if (ref == CROARING_ART_NULL_REF) { + if (node == NULL) { return art_validate_fail(&validator, "node is null"); } - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (art_compare_prefix(leaf->key, 0, validator.current_key, 0, validator.depth) != 0) { - return art_validate_fail(&validator, - "leaf key does not match its " - "position's prefix in the tree"); + return art_validate_fail( + &validator, + "leaf key does not match its position's prefix in the tree"); } if (validator.validate_cb != NULL && - !validator.validate_cb(leaf->val, validator.reason, - validator.context)) { + !validator.validate_cb(leaf, validator.reason)) { if (*validator.reason == NULL) { *validator.reason = "leaf validation failed"; } return false; } } else { - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) { return art_validate_fail(&validator, @@ -11394,28 +10941,28 @@ static bool art_internal_validate_at(const art_t *art, art_ref_t ref, inner_node->prefix_size); validator.depth += inner_node->prefix_size; - switch (art_ref_typecode(ref)) { + switch (inner_node->typecode) { case CROARING_ART_NODE4_TYPE: - if (!art_node4_internal_validate(art, (art_node4_t *)inner_node, + if (!art_node4_internal_validate((art_node4_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE16_TYPE: - if (!art_node16_internal_validate( - art, (art_node16_t *)inner_node, validator)) { + if (!art_node16_internal_validate((art_node16_t *)inner_node, + validator)) { return false; } break; case CROARING_ART_NODE48_TYPE: - if (!art_node48_internal_validate( - art, (art_node48_t *)inner_node, validator)) { + if (!art_node48_internal_validate((art_node48_t *)inner_node, + validator)) { return false; } break; case CROARING_ART_NODE256_TYPE: - if (!art_node256_internal_validate( - art, (art_node256_t *)inner_node, validator)) { + if (!art_node256_internal_validate((art_node256_t *)inner_node, + validator)) { return false; } break; @@ -11427,143 +10974,23 @@ static bool art_internal_validate_at(const art_t *art, art_ref_t ref, } bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb, void *context) { + art_validate_cb_t validate_cb) { const char *reason_local; if (reason == NULL) { // Always allow assigning through *reason reason = &reason_local; } *reason = NULL; - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return true; } art_internal_validate_t validator = { .reason = reason, .validate_cb = validate_cb, - .context = context, .depth = 0, - .current_key = CROARING_ZERO_INITIALIZER, + .current_key = {0}, }; - for (art_typecode_t type = CROARING_ART_LEAF_TYPE; - type <= CROARING_ART_NODE256_TYPE; ++type) { - uint64_t capacity = art->capacities[type]; - for (uint64_t i = 0; i < capacity; ++i) { - uint64_t first_free = art->first_free[type]; - if (first_free > capacity) { - return art_validate_fail(&validator, "first_free > capacity"); - } - } - } - return art_internal_validate_at(art, art->root, validator); -} - -_Static_assert(alignof(art_leaf_t) == alignof(art_node4_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node16_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node48_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), - "Serialization assumes node type alignment is equal"); - -size_t art_size_in_bytes(const art_t *art) { - if (!art_is_shrunken(art)) { - return 0; - } - // Root. - size_t size = sizeof(art->root); - // Node counts. - size += sizeof(art->capacities); - // Alignment for leaves. The rest of the nodes are aligned the same way. - size += - ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - size += art->capacities[t] * ART_NODE_SIZES[t]; - } - return size; -} - -size_t art_serialize(const art_t *art, char *buf) { - if (buf == NULL) { - return 0; - } - if (!art_is_shrunken(art)) { - return 0; - } - const char *initial_buf = buf; - - // Root. - memcpy(buf, &art->root, sizeof(art->root)); - buf += sizeof(art->root); - - // Node counts. - memcpy(buf, art->capacities, sizeof(art->capacities)); - buf += sizeof(art->capacities); - - // Alignment for leaves. The rest of the nodes are aligned the same way. - size_t align_bytes = - CROARING_ART_ALIGN_SIZE_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); - memset(buf, 0, align_bytes); - buf += align_bytes; - - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->capacities[t] > 0) { - size_t size = art->capacities[t] * ART_NODE_SIZES[t]; - memcpy(buf, art->nodes[t], size); - buf += size; - } - } - - return buf - initial_buf; -} - -size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { - if (buf == NULL || art == NULL) { - return 0; - } - const char *initial_buf = buf; - art_init_cleared(art); - - if (maxbytes < sizeof(art->root)) { - return 0; - } - memcpy(&art->root, buf, sizeof(art->root)); - buf += sizeof(art->root); - maxbytes -= sizeof(art->root); - - if (maxbytes < sizeof(art->capacities)) { - return 0; - } - _Static_assert(sizeof(art->first_free) == sizeof(art->capacities), - "first_free is read from capacities"); - memcpy(art->first_free, buf, sizeof(art->capacities)); - memcpy(art->capacities, buf, sizeof(art->capacities)); - buf += sizeof(art->capacities); - maxbytes -= sizeof(art->capacities); - - // Alignment for leaves. The rest of the nodes are aligned the same way. - const char *before_align = buf; - buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); - if (maxbytes < (size_t)(buf - before_align)) { - return 0; - } - maxbytes -= buf - before_align; - - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->capacities[t] > 0) { - size_t size = art->capacities[t] * ART_NODE_SIZES[t]; - if (maxbytes < size) { - return 0; - } - art->nodes[t] = (char *)buf; - buf += size; - maxbytes -= size; - } - } - return buf - initial_buf; + return art_internal_validate_at(art->root, validator); } #ifdef __cplusplus @@ -13783,6 +13210,9 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base, * bitset.c * */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif #include #include #include @@ -14676,7 +14106,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ } \ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ words_1 = src_1->words; \ + printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \ const uint64_t * __restrict__ words_2 = src_2->words; \ int32_t sum = 0; \ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ @@ -23295,7 +22725,6 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { /* end file src/roaring.c */ /* begin file src/roaring64.c */ #include -#include #include #include #include @@ -23304,12 +22733,6 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { // For serialization / deserialization // containers.h last to avoid conflict with ROARING_CONTAINER_T. -#define CROARING_ALIGN_BUF(buf, alignment) \ - (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) - -#define CROARING_BITSET_ALIGNMENT 64 - #ifdef __cplusplus using namespace ::roaring::internal; @@ -23324,19 +22747,22 @@ namespace api { typedef struct roaring64_bitmap_s { art_t art; uint8_t flags; - uint64_t first_free; - uint64_t capacity; - container_t **containers; } roaring64_bitmap_t; // Leaf type of the ART used to keep the high 48 bits of each entry. -// Low 8 bits: typecode -// High 56 bits: container index -typedef roaring64_leaf_t leaf_t; +typedef struct roaring64_leaf_s { + art_val_t _pad; + uint8_t typecode; + container_t *container; +} roaring64_leaf_t; + +// Alias to make it easier to work with, since it's an internal-only type +// anyway. +typedef struct roaring64_leaf_s leaf_t; // Iterator struct to hold iteration state. typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *r; + const roaring64_bitmap_t *parent; art_iterator_t art_it; roaring_container_iterator_t container_it; uint64_t high48; // Key that art_it points to. @@ -23351,10 +22777,6 @@ typedef struct roaring64_iterator_s { bool saturated_forward; } roaring64_iterator_t; -static inline bool is_frozen64(const roaring64_bitmap_t *r) { - return r->flags & ROARING_FLAG_FROZEN; -} - // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { @@ -23375,96 +22797,24 @@ static inline uint64_t minimum(uint64_t a, uint64_t b) { return (a < b) ? a : b; } -static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { - return (container_index << 8) | typecode; -} - -static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } - -static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } - -static inline container_t *get_container(const roaring64_bitmap_t *r, - leaf_t leaf) { - return r->containers[get_index(leaf)]; -} - -// Replaces the container of `leaf` with the given container. Returns the -// modified leaf for convenience. -static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, - container_t *container, - uint8_t typecode) { - uint64_t index = get_index(*leaf); - r->containers[index] = container; - *leaf = create_leaf(index, typecode); - return *leaf; +static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { + leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + leaf->container = container; + leaf->typecode = typecode; + return leaf; } -/** - * Extends the array of container pointers. - */ -static void extend_containers(roaring64_bitmap_t *r) { - uint64_t size = r->first_free; - if (size < r->capacity) { - return; - } - uint64_t new_capacity; - if (r->capacity == 0) { - new_capacity = 2; - } else if (r->capacity < 1024) { - new_capacity = 2 * r->capacity; - } else { - new_capacity = 5 * r->capacity / 4; - } - uint64_t increase = new_capacity - r->capacity; - r->containers = - roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); - memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); - r->capacity = new_capacity; -} - -static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { - for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { - if (r->containers[i] == NULL) { - return i; - } - } - return r->capacity; -} - -static uint64_t allocate_index(roaring64_bitmap_t *r) { - uint64_t first_free = r->first_free; - if (first_free == r->capacity) { - extend_containers(r); - } - r->first_free = next_free_container_idx(r); - return first_free; -} - -static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, - uint8_t typecode) { - uint64_t index = allocate_index(r); - r->containers[index] = container; - return create_leaf(index, typecode); -} - -static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { - uint64_t index = get_index(leaf); - r->containers[index] = NULL; - if (index < r->first_free) { - r->first_free = index; - } -} - -// Copies the container referenced by `leaf` from `r1` to `r2`. -static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, leaf_t leaf) { - uint8_t typecode = get_typecode(leaf); +static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->typecode = leaf->typecode; // get_copy_of_container modifies the typecode passed in. - container_t *container = get_copy_of_container( - get_container(r1, leaf), &typecode, /*copy_on_write=*/false); - return add_container(r2, container, typecode); + result_leaf->container = get_copy_of_container( + leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); + return result_leaf; } +static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } + static inline int compare_high48(art_key_chunk_t key1[], art_key_chunk_t key2[]) { return art_compare_keys(key1, key2); @@ -23473,10 +22823,10 @@ static inline int compare_high48(art_key_chunk_t key1[], static inline bool roaring64_iterator_init_at_leaf_first( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = 0; - it->container_it = container_init_iterator(get_container(it->r, leaf), - get_typecode(leaf), &low16); + it->container_it = + container_init_iterator(leaf->container, leaf->typecode, &low16); it->value = it->high48 | low16; return (it->has_value = true); } @@ -23484,18 +22834,18 @@ static inline bool roaring64_iterator_init_at_leaf_first( static inline bool roaring64_iterator_init_at_leaf_last( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = 0; - it->container_it = container_init_iterator_last(get_container(it->r, leaf), - get_typecode(leaf), &low16); + it->container_it = + container_init_iterator_last(leaf->container, leaf->typecode, &low16); it->value = it->high48 | low16; return (it->has_value = true); } static inline roaring64_iterator_t *roaring64_iterator_init_at( const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->r = r; - it->art_it = art_init_iterator((art_t *)&r->art, first); + it->parent = r; + it->art_it = art_init_iterator(&r->art, first); it->has_value = it->art_it.value != NULL; if (it->has_value) { if (first) { @@ -23512,11 +22862,8 @@ static inline roaring64_iterator_t *roaring64_iterator_init_at( roaring64_bitmap_t *roaring64_bitmap_create(void) { roaring64_bitmap_t *r = (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - art_init_cleared(&r->art); + r->art.root = NULL; r->flags = 0; - r->capacity = 0; - r->first_free = 0; - r->containers = NULL; return r; } @@ -23526,35 +22873,26 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { } art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - if (is_frozen64(r)) { - // Only free the container itself, not the buffer-backed contents - // within. - roaring_free(get_container(r, leaf)); - } else { - container_free(get_container(r, leaf), get_typecode(leaf)); - } + leaf_t *leaf = (leaf_t *)it.value; + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); art_iterator_next(&it); } - if (!is_frozen64(r)) { - art_free(&r->art); - } - roaring_free(r->containers); + art_free(&r->art); roaring_free(r); } roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t result_typecode = get_typecode(leaf); + leaf_t *leaf = (leaf_t *)it.value; + uint8_t result_typecode = leaf->typecode; container_t *result_container = get_copy_of_container( - get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t)result_leaf); + leaf->container, &result_typecode, /*copy_on_write=*/false); + leaf_t *result_leaf = create_leaf(result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t *)result_leaf); art_iterator_next(&it); } return result; @@ -23581,8 +22919,8 @@ static void move_from_roaring32_offset(roaring64_bitmap_t *dst, uint8_t high48[ART_KEY_BYTES]; uint64_t high48_bits = key_base | ((uint64_t)key << 16); split_key(high48_bits, high48); - leaf_t leaf = add_container(dst, container, typecode); - art_insert(&dst->art, high48, (art_val_t)leaf); + leaf_t *leaf = create_leaf(container, typecode); + art_insert(&dst->art, high48, (art_val_t *)leaf); } // We stole all the containers, so leave behind a size of zero src->high_low_container.size = 0; @@ -23624,8 +22962,8 @@ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint8_t high48[ART_KEY_BYTES]; split_key(min, high48); - leaf_t leaf = add_container(r, container, typecode); - art_insert(&r->art, high48, (art_val_t)leaf); + leaf_t *leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); uint64_t gap = container_max - container_min + step - 1; uint64_t increment = gap - (gap % step); @@ -23644,19 +22982,31 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, return r; } +roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) { + roaring64_bitmap_t *r = roaring64_bitmap_create(); + roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; + va_list ap; + va_start(ap, n_args); + for (size_t i = 0; i < n_args; i++) { + uint64_t val = va_arg(ap, uint64_t); + roaring64_bitmap_add_bulk(r, &context, val); + } + va_end(ap); + return r; +} + static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { if (leaf != NULL) { - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); + container_add(leaf->container, low16, leaf->typecode, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; } return leaf; } else { @@ -23665,8 +23015,9 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, container_t *container = container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); assert(ac == container); - leaf_t new_leaf = add_container(r, container, typecode); - return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); + leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); + return leaf; } } @@ -23684,12 +23035,12 @@ bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { int old_cardinality = 0; if (leaf != NULL) { - old_cardinality = container_get_cardinality(get_container(r, *leaf), - get_typecode(*leaf)); + old_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); } leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + container_get_cardinality(leaf->container, leaf->typecode); return old_cardinality != new_cardinality; } @@ -23698,22 +23049,22 @@ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - leaf_t *leaf = context->leaf; - if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(container1, low16, typecode1, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); + container_add(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; } } else { // We're not positioned anywhere yet or the high bits of the key // differ. - leaf = (leaf_t *)art_find(&r->art, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); context->leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); @@ -23733,19 +23084,17 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { +static inline void add_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf != NULL) { - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = - container_add_range(container1, typecode1, min, max, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); + container_t *container2 = container_add_range( + leaf->container, leaf->typecode, min, max, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; } return; } @@ -23753,8 +23102,8 @@ static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, // container_add_range is inclusive, but `container_range_of_ones` is // exclusive. container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf_t new_leaf = add_container(r, container, typecode); - art_insert(art, high48, (art_val_t)new_leaf); + leaf = create_leaf(container, typecode); + art_insert(art, high48, (art_val_t *)leaf); } void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, @@ -23778,22 +23127,22 @@ void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only populate range within one container. - add_range_closed_at(r, art, min_high48, min_low16, max_low16); + add_range_closed_at(art, min_high48, min_low16, max_low16); return; } // Populate a range across containers. Fill intermediate containers // entirely. - add_range_closed_at(r, art, min_high48, min_low16, 0xffff); + add_range_closed_at(art, min_high48, min_low16, 0xffff); uint64_t min_high_bits = min >> 16; uint64_t max_high_bits = max >> 16; for (uint64_t current = min_high_bits + 1; current < max_high_bits; ++current) { uint8_t current_high48[ART_KEY_BYTES]; split_key(current << 16, current_high48); - add_range_closed_at(r, art, current_high48, 0, 0xffff); + add_range_closed_at(art, current_high48, 0, 0xffff); } - add_range_closed_at(r, art, max_high48, 0, max_low16); + add_range_closed_at(art, max_high48, 0, max_low16); } bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { @@ -23801,8 +23150,7 @@ bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { uint16_t low16 = split_key(val, high48); leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); if (leaf != NULL) { - return container_contains(get_container(r, *leaf), low16, - get_typecode(*leaf)); + return container_contains(leaf->container, low16, leaf->typecode); } return false; } @@ -23819,7 +23167,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + art_iterator_t it = art_lower_bound(&r->art, min_high48); if (it.value == NULL || combine_key(it.key, 0) > min) { return false; } @@ -23836,7 +23184,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, return false; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; uint32_t container_min = 0; if (compare_high48(it.key, min_high48) == 0) { container_min = min_low16; @@ -23849,13 +23197,11 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, // For the first and last containers we use container_contains_range, // for the intermediate containers we can use container_is_full. if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(get_container(r, leaf), - get_typecode(leaf))) { + if (!container_is_full(leaf->container, leaf->typecode)) { return false; } - } else if (!container_contains_range(get_container(r, leaf), - container_min, container_max, - get_typecode(leaf))) { + } else if (!container_contains_range(leaf->container, container_min, + container_max, leaf->typecode)) { return false; } prev_high48_bits = current_high48_bits; @@ -23881,24 +23227,24 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return container_contains(get_container(r, *context->leaf), low16, - get_typecode(*context->leaf)); + return container_contains(context->leaf->container, low16, + context->leaf->typecode); } bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t start_rank = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint64_t cardinality = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + leaf_t *leaf = (leaf_t *)it.value; + uint64_t cardinality = + container_get_cardinality(leaf->container, leaf->typecode); if (start_rank + cardinality > rank) { uint32_t uint32_start = 0; uint32_t uint32_rank = rank - start_rank; uint32_t uint32_element = 0; - if (container_select(get_container(r, leaf), get_typecode(leaf), - &uint32_start, uint32_rank, &uint32_element)) { + if (container_select(leaf->container, leaf->typecode, &uint32_start, + uint32_rank, &uint32_element)) { *element = combine_key(it.key, (uint16_t)uint32_element); return true; } @@ -23914,17 +23260,16 @@ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t rank = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - rank += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + rank += container_get_cardinality(leaf->container, leaf->typecode); } else if (compare_result == 0) { - return rank + container_rank(get_container(r, leaf), - get_typecode(leaf), low16); + return rank + + container_rank(leaf->container, leaf->typecode, low16); } else { return rank; } @@ -23938,17 +23283,16 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t index = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - index += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + index += container_get_cardinality(leaf->container, leaf->typecode); } else if (compare_result == 0) { - int index16 = container_get_index(get_container(r, leaf), - get_typecode(leaf), low16); + int index16 = + container_get_index(leaf->container, leaf->typecode, low16); if (index16 < 0) { return false; } @@ -23962,31 +23306,31 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, return false; } -// Returns true if a container was removed. -static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, - uint8_t *high48, - uint16_t low16, - leaf_t *leaf) { +static inline leaf_t *containerptr_roaring64_bitmap_remove( + roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { if (leaf == NULL) { - return false; + return NULL; } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); + container_t *container = leaf->container; + uint8_t typecode = leaf->typecode; uint8_t typecode2; container_t *container2 = container_remove(container, low16, typecode, &typecode2); if (container2 != container) { container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); + leaf->container = container2; + leaf->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); - assert(erased); - return true; + leaf = (leaf_t *)art_erase(&r->art, high48); + if (leaf != NULL) { + free_leaf(leaf); + } + return NULL; } - return false; + return leaf; } void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { @@ -24008,12 +23352,13 @@ bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { return false; } int old_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); - if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { + container_get_cardinality(leaf->container, leaf->typecode); + leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + if (leaf == NULL) { return true; } int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + container_get_cardinality(leaf->container, leaf->typecode); return new_cardinality != old_cardinality; } @@ -24026,28 +23371,26 @@ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, if (context->leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. - uint8_t typecode = get_typecode(*context->leaf); - container_t *container = get_container(r, *context->leaf); uint8_t typecode2; container_t *container2 = - container_remove(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, context->leaf, container2, typecode2); + container_remove(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { + leaf_t *leaf = (leaf_t *)art_erase(art, high48); container_free(container2, typecode2); - leaf_t leaf; - bool erased = art_erase(art, high48, (art_val_t *)&leaf); - assert(erased); - remove_container(r, leaf); + free_leaf(leaf); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. leaf_t *leaf = (leaf_t *)art_find(art, high48); - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - context->leaf = leaf; + context->leaf = + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -24065,26 +23408,23 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { +static inline void remove_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf == NULL) { return; } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = - container_remove_range(container, typecode, min, max, &typecode2); - if (container2 != container) { - container_free(container, typecode); + container_t *container2 = container_remove_range( + leaf->container, leaf->typecode, min, max, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); if (container2 != NULL) { - replace_container(r, leaf, container2, typecode2); + leaf->container = container2; + leaf->typecode = typecode2; } else { - bool erased = art_erase(art, high48, NULL); - assert(erased); - remove_container(r, *leaf); + art_erase(art, high48); + free_leaf(leaf); } } } @@ -24110,23 +23450,21 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only remove a range within one container. - remove_range_closed_at(r, art, min_high48, min_low16, max_low16); + remove_range_closed_at(art, min_high48, min_low16, max_low16); return; } // Remove a range across containers. Remove intermediate containers // entirely. - remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); + remove_range_closed_at(art, min_high48, min_low16, 0xffff); art_iterator_t it = art_upper_bound(art, min_high48); while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t leaf; - bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); - assert(erased); - container_free(get_container(r, leaf), get_typecode(leaf)); - remove_container(r, leaf); + leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } - remove_range_closed_at(r, art, max_high48, 0, max_low16); + remove_range_closed_at(art, max_high48, 0, max_low16); } void roaring64_bitmap_clear(roaring64_bitmap_t *r) { @@ -24134,12 +23472,12 @@ void roaring64_bitmap_clear(roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t cardinality = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - cardinality += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + leaf_t *leaf = (leaf_t *)it.value; + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); art_iterator_next(&it); } return cardinality; @@ -24168,7 +23506,7 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, uint8_t max_high48[ART_KEY_BYTES]; uint16_t max_low16 = split_key(max, max_high48); - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + art_iterator_t it = art_lower_bound(&r->art, min_high48); while (it.value != NULL) { int max_compare_result = compare_high48(it.key, max_high48); if (max_compare_result > 0) { @@ -24176,22 +23514,23 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, break; } - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); + leaf_t *leaf = (leaf_t *)it.value; if (max_compare_result == 0) { // We're at the max high key, add only the range up to the low // 16 bits of max. - cardinality += container_rank(container, typecode, max_low16); + cardinality += + container_rank(leaf->container, leaf->typecode, max_low16); } else { // We're not yet at the max high key, add the full container // range. - cardinality += container_get_cardinality(container, typecode); + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); } if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { // We're at the min high key, remove the range up to the low 16 // bits of min. - cardinality -= container_rank(container, typecode, min_low16 - 1); + cardinality -= + container_rank(leaf->container, leaf->typecode, min_low16 - 1); } art_iterator_next(&it); } @@ -24203,23 +23542,23 @@ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); if (it.value == NULL) { return UINT64_MAX; } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_minimum(leaf->container, leaf->typecode)); } uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); if (it.value == NULL) { return 0; } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_maximum(leaf->container, leaf->typecode)); } bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { @@ -24230,53 +23569,15 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { uint8_t new_typecode; // We don't need to free the existing container if a new one was // created, convert_run_optimize does that internally. - container_t *new_container = convert_run_optimize( - get_container(r, *leaf), get_typecode(*leaf), &new_typecode); - replace_container(r, leaf, new_container, new_typecode); + leaf->container = convert_run_optimize(leaf->container, leaf->typecode, + &new_typecode); + leaf->typecode = new_typecode; has_run_container |= new_typecode == RUN_CONTAINER_TYPE; art_iterator_next(&it); } return has_run_container; } -static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { - uint64_t idx = get_index(*leaf); - if (idx < r->first_free) { - return; - } - r->containers[r->first_free] = get_container(r, *leaf); - r->containers[idx] = NULL; - *leaf = create_leaf(r->first_free, get_typecode(*leaf)); - r->first_free = next_free_container_idx(r); -} - -static inline bool is_shrunken(const roaring64_bitmap_t *r) { - return art_is_shrunken(&r->art) && r->first_free == r->capacity; -} - -size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { - size_t freed = art_shrink_to_fit(&r->art); - art_iterator_t it = art_init_iterator(&r->art, true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - freed += container_shrink_to_fit(get_container(r, *leaf), - get_typecode(*leaf)); - move_to_shrink(r, leaf); - art_iterator_next(&it); - } - if (is_shrunken(r)) { - return freed; - } - uint64_t new_capacity = r->first_free; - if (new_capacity < r->capacity) { - r->containers = roaring_realloc(r->containers, - new_capacity * sizeof(container_t *)); - freed += (r->capacity - new_capacity) * sizeof(container_t *); - r->capacity = new_capacity; - } - return freed; -} - /** * (For advanced users.) * Collect statistics about the bitmap @@ -24287,16 +23588,15 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, stat->min_value = roaring64_bitmap_minimum(r); stat->max_value = roaring64_bitmap_maximum(r); - art_iterator_t it = art_init_iterator((art_t *)&r->art, true); + art_iterator_t it = art_init_iterator(&r->art, true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; stat->n_containers++; - uint8_t truetype = - get_container_type(get_container(r, leaf), get_typecode(leaf)); - uint32_t card = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + uint8_t truetype = get_container_type(leaf->container, leaf->typecode); + uint32_t card = + container_get_cardinality(leaf->container, leaf->typecode); uint32_t sbytes = - container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); + container_size_in_bytes(leaf->container, leaf->typecode); stat->cardinality += card; switch (truetype) { case BITSET_CONTAINER_TYPE: @@ -24322,34 +23622,31 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, } } -static bool roaring64_leaf_internal_validate(const art_val_t val, - const char **reason, - void *context) { - leaf_t leaf = (leaf_t)val; - roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; - return container_internal_validate(get_container(r, leaf), - get_typecode(leaf), reason); +static bool roaring64_leaf_internal_validate(const art_val_t *val, + const char **reason) { + leaf_t *leaf = (leaf_t *)val; + return container_internal_validate(leaf->container, leaf->typecode, reason); } bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) { return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate, (void *)r); + roaring64_leaf_internal_validate); } bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { if (compare_high48(it1.key, it2.key) != 0) { return false; } - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_equals(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { return false; } art_iterator_next(&it1); @@ -24360,8 +23657,8 @@ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { bool it2_present = it2.value != NULL; @@ -24370,11 +23667,10 @@ bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, if (it2_present) { compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_is_subset( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_is_subset(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { return false; } art_iterator_next(&it1); @@ -24401,8 +23697,8 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24412,20 +23708,19 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_and(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24444,8 +23739,8 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { uint64_t result = 0; - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24455,11 +23750,11 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - result += container_and_cardinality( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result += + container_and_cardinality(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -24480,7 +23775,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24496,7 +23791,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2a: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; + leaf_t *leaf2 = (leaf_t *)it2.value; // We do the computation "in place" only when c1 is not a // shared container. Rationale: using a shared container @@ -24504,31 +23799,28 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, // copy and then doing the computation in place which is // likely less efficient than avoiding in place entirely and // always generating a new container. - uint8_t typecode = get_typecode(*leaf1); - container_t *container = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and(container, typecode, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } else { container2 = container_iand( - container, typecode, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } - if (container2 != container) { - container_free(container, typecode); + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&it1, NULL); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24539,11 +23831,10 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (!it2_present || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t leaf; - bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); - assert(erased); - container_free(get_container(r1, leaf), get_typecode(leaf)); - remove_container(r1, leaf); + leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); + assert(leaf != NULL); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } else if (compare_result > 0) { // Case 2c: it1 is after it2. art_iterator_lower_bound(&it2, it1.key); @@ -24554,8 +23845,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { bool intersect = false; - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24565,11 +23856,10 @@ bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - intersect |= container_intersect( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + intersect |= container_intersect(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -24609,8 +23899,8 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24628,31 +23918,26 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_or(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_or( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); art_iterator_next(&it2); } } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24673,7 +23958,7 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24692,23 +23977,22 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; uint8_t typecode2; container_t *container2; - if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { - container2 = container_or(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_or(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode, + &typecode2); } else { - container2 = container_ior(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + container2 = container_ior( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r1, leaf1, container2, typecode2); + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24719,9 +24003,9 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24731,8 +24015,8 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24750,20 +24034,19 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_xor(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24771,15 +24054,13 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24798,7 +24079,7 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { assert(r1 != r2); art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24817,15 +24098,15 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; uint8_t typecode2; container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { - container2 = container_xor(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); if (container2 != container1) { // We only free when doing container_xor, not // container_ixor, as ixor frees the original @@ -24834,19 +24115,17 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_ixor( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } + leaf1->container = container2; + leaf1->typecode = typecode2; if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24859,13 +24138,13 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); if (it1_present) { - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); art_iterator_next(&it1); } else { - art_insert(&r1->art, it2.key, (art_val_t)result_leaf); + art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); } art_iterator_next(&it2); } @@ -24876,8 +24155,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24892,21 +24171,20 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2b: iterators at the same high key position. + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = container_andnot( - get_container(r1, *leaf1), get_typecode(*leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_andnot( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24914,9 +24192,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, } if (!it2_present || compare_result < 0) { // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if (compare_result > 0) { // Case 2c: it1 is after it2. @@ -24936,7 +24213,7 @@ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24952,15 +24229,15 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; uint8_t typecode2; container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { container2 = container_andnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); if (container2 != container1) { // We only free when doing container_andnot, not // container_iandnot, as iandnot frees the original @@ -24969,19 +24246,19 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_iandnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + if (container2 != container1) { + leaf1->container = container2; + leaf1->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -25000,39 +24277,38 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } /** - * Flips the leaf at high48 in the range [min, max), adding the result to - * `r2`. If the high48 key is not found in `r1`, a new container is created. - */ -static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, uint8_t high48[], - uint32_t min, uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); - uint8_t typecode2; + * Flips the leaf at high48 in the range [min, max), returning a new leaf with a + * new container. If the high48 key is not found in the existing bitmap, a new + * container is created. Returns null if the negation results in an empty range. + */ +static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r, + uint8_t high48[], uint32_t min, + uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48); container_t *container2; + uint8_t typecode2; if (leaf1 == NULL) { // No container at this key, create a full container. container2 = container_range_of_ones(min, max, &typecode2); } else if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = container_not(get_container(r1, *leaf1), - get_typecode(*leaf1), &typecode2); + container2 = + container_not(leaf1->container, leaf1->typecode, &typecode2); } else { // Partially flip a container. - container2 = - container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), - min, max, &typecode2); + container2 = container_not_range(leaf1->container, leaf1->typecode, min, + max, &typecode2); } if (container_nonzero_cardinality(container2, typecode2)) { - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, high48, (art_val_t)leaf2); - } else { - container_free(container2, typecode2); + return create_leaf(container2, typecode2); } + container_free(container2, typecode2); + return NULL; } /** - * Flips the leaf at high48 in the range [min, max). If the high48 key is - * not found in the bitmap, a new container is created. Deletes the leaf and + * Flips the leaf at high48 in the range [min, max). If the high48 key is not + * found in the bitmap, a new container is created. Deletes the leaf and * associated container if the negation results in an empty range. */ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], @@ -25043,28 +24319,28 @@ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], if (leaf == NULL) { // No container at this key, insert a full container. container2 = container_range_of_ones(min, max, &typecode2); - leaf_t new_leaf = add_container(r, container2, typecode2); - art_insert(&r->art, high48, (art_val_t)new_leaf); + art_insert(&r->art, high48, + (art_val_t *)create_leaf(container2, typecode2)); return; } if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = container_inot(get_container(r, *leaf), - get_typecode(*leaf), &typecode2); + container2 = + container_inot(leaf->container, leaf->typecode, &typecode2); } else { // Partially flip a container. - container2 = container_inot_range( - get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); + container2 = container_inot_range(leaf->container, leaf->typecode, min, + max, &typecode2); } - if (container_nonzero_cardinality(container2, typecode2)) { - replace_container(r, leaf, container2, typecode2); - } else { - bool erased = art_erase(&r->art, high48, NULL); - assert(erased); - container_free(container2, typecode2); - remove_container(r, *leaf); + leaf->container = container2; + leaf->typecode = typecode2; + + if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) { + art_erase(&r->art, high48); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } } @@ -25089,21 +24365,20 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true); // Copy the containers before min unchanged. while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); + leaf_t *leaf1 = (leaf_t *)it.value; + uint8_t typecode2 = leaf1->typecode; container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); + leaf1->container, &typecode2, /*copy_on_write=*/false); + art_insert(&r2->art, it.key, + (art_val_t *)create_leaf(container2, typecode2)); art_iterator_next(&it); } - // Flip the range (including non-existent containers!) between min and - // max. + // Flip the range (including non-existent containers!) between min and max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -25118,19 +24393,22 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, max_container = max_low16 + 1; // Exclusive. } - roaring64_flip_leaf(r1, r2, current_high48_key, min_container, - max_container); + leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key, + min_container, max_container); + if (leaf != NULL) { + art_insert(&r2->art, current_high48_key, (art_val_t *)leaf); + } } // Copy the containers after max unchanged. - it = art_upper_bound((art_t *)&r1->art, max_high48_key); + it = art_upper_bound(&r1->art, max_high48_key); while (it.value != NULL) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); + leaf_t *leaf1 = (leaf_t *)it.value; + uint8_t typecode2 = leaf1->typecode; container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); + leaf1->container, &typecode2, /*copy_on_write=*/false); + art_insert(&r2->art, it.key, + (art_val_t *)create_leaf(container2, typecode2)); art_iterator_next(&it); } @@ -25155,8 +24433,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - // Flip the range (including non-existent containers!) between min and - // max. + // Flip the range (including non-existent containers!) between min and max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -25178,7 +24455,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, // Returns the number of distinct high 32-bit entries in the bitmap. static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t high32_count = 0; uint32_t prev_high32 = 0; while (it.value != NULL) { @@ -25207,7 +24484,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint64_t high32_count; size += sizeof(high32_count); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -25216,8 +24493,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. + // Write as uint32 the most significant 32 bits of the bucket. size += sizeof(prev_high32); // Write the 32-bit Roaring bitmaps representing the least @@ -25239,10 +24515,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { prev_high32 = current_high32; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), get_container(r, leaf), - get_typecode(leaf)); + (uint16_t)(current_high32 >> 16), leaf->container, + leaf->typecode); art_iterator_next(&it); } @@ -25273,7 +24549,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, memcpy(buf, &high32_count, sizeof(high32_count)); buf += sizeof(high32_count); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -25283,8 +24559,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, uint32_t current_high32 = (uint32_t)(current_high48 >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. + // Write as uint32 the most significant 32 bits of the bucket. memcpy(buf, &prev_high32, sizeof(prev_high32)); buf += sizeof(prev_high32); @@ -25307,10 +24582,10 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, prev_high32 = current_high32; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), get_container(r, leaf), - get_typecode(leaf)); + (uint16_t)(current_high48 >> 16), leaf->container, + leaf->typecode); art_iterator_next(&it); } @@ -25361,8 +24636,8 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, buf += sizeof(high32); read_bytes += sizeof(high32); - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least significant + // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -25417,8 +24692,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( } previous_high32 = high32; - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least significant + // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -25460,364 +24735,16 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( return r; } -// Returns an "element count" for the given container. This has a different -// meaning for each container type, but the purpose is the minimal information -// required to serialize the container metadata. -static inline uint32_t container_get_element_count(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return ((bitset_container_t *)c)->cardinality; - } - case ARRAY_CONTAINER_TYPE: { - return ((array_container_t *)c)->cardinality; - } - case RUN_CONTAINER_TYPE: { - return ((run_container_t *)c)->n_runs; - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } -} - -static inline size_t container_get_frozen_size(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - } - case ARRAY_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(uint16_t); - } - case RUN_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(rle16_t); - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } -} - -uint64_t align_size(uint64_t size, uint64_t alignment) { - return (size + alignment - 1) & ~(alignment - 1); -} - -size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { - if (!is_shrunken(r)) { - return 0; - } - // Flags. - uint64_t size = sizeof(r->flags); - // Container count. - size += sizeof(r->capacity); - // Container element counts. - size += r->capacity * sizeof(uint16_t); - // Total container sizes. - size += 3 * sizeof(uint64_t); - // ART (8 byte aligned). - size = align_size(size, 8); - size += art_size_in_bytes(&r->art); - - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - total_sizes[typecode] += - container_get_frozen_size(get_container(r, leaf), typecode); - art_iterator_next(&it); - } - // Containers (aligned). - size = align_size(size, CROARING_BITSET_ALIGNMENT); - size += total_sizes[BITSET_CONTAINER_TYPE]; - size = align_size(size, alignof(rle16_t)); - size += total_sizes[ARRAY_CONTAINER_TYPE]; - size = align_size(size, alignof(uint16_t)); - size += total_sizes[RUN_CONTAINER_TYPE]; - // Padding to make overall size a multiple of required alignment. - size = align_size(size, CROARING_BITSET_ALIGNMENT); - return size; -} - -static inline void container_frozen_serialize(const container_t *container, - uint8_t typecode, - uint64_t **bitsets, - uint16_t **arrays, - rle16_t **runs) { - size_t size = container_get_frozen_size(container, typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *)container; - memcpy(*bitsets, bitset->words, size); - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *)container; - memcpy(*arrays, array->array, size); - *arrays += container_get_element_count(container, typecode); - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *)container; - memcpy(*runs, run->runs, size); - *runs += container_get_element_count(container, typecode); - break; - } - default: { - assert(false); - roaring_unreachable; - } - } -} - -static inline char *pad_align(char *buf, const char *initial_buf, - size_t alignment) { - uint64_t buf_size = buf - initial_buf; - uint64_t pad = align_size(buf_size, alignment) - buf_size; - memset(buf, 0, pad); - return buf + pad; -} - -size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, - char *buf) { - if (buf == NULL) { - return 0; - } - if (!is_shrunken(r)) { - return 0; - } - const char *initial_buf = buf; - - // Flags. - memcpy(buf, &r->flags, sizeof(r->flags)); - buf += sizeof(r->flags); - - // Container count. - memcpy(buf, &r->capacity, sizeof(r->capacity)); - buf += sizeof(r->capacity); - - // Container element counts. - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - - uint32_t elem_count = container_get_element_count(container, typecode); - uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); - memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); - buf += sizeof(compressed_elem_count); - - total_sizes[typecode] += container_get_frozen_size(container, typecode); - art_iterator_next(&it); - } - - // Total container sizes. - memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - - // ART. - buf = pad_align(buf, initial_buf, 8); - buf += art_serialize(&r->art, buf); - - // Containers (aligned). - // Runs before arrays as run elements are larger than array elements and - // smaller than bitset elements. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - uint64_t *bitsets = (uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(rle16_t)); - rle16_t *runs = (rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(uint16_t)); - uint16_t *arrays = (uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; - - it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - container_frozen_serialize(container, typecode, &bitsets, &arrays, - &runs); - art_iterator_next(&it); - } - - // Padding to make overall size a multiple of required alignment. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - - return buf - initial_buf; -} - -static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, - const uint64_t **bitsets, - const uint16_t **arrays, - const rle16_t **runs) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *c = (bitset_container_t *)roaring_malloc( - sizeof(bitset_container_t)); - c->cardinality = elem_count; - c->words = (uint64_t *)*bitsets; - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - return (container_t *)c; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *c = - (array_container_t *)roaring_malloc(sizeof(array_container_t)); - c->cardinality = elem_count; - c->capacity = elem_count; - c->array = (uint16_t *)*arrays; - *arrays += elem_count; - return (container_t *)c; - } - case RUN_CONTAINER_TYPE: { - run_container_t *c = - (run_container_t *)roaring_malloc(sizeof(run_container_t)); - c->n_runs = elem_count; - c->capacity = elem_count; - c->runs = (rle16_t *)*runs; - *runs += elem_count; - return (container_t *)c; - } - default: { - assert(false); - roaring_unreachable; - return NULL; - } - } -} - -roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, - size_t maxbytes) { - if (buf == NULL) { - return NULL; - } - - roaring64_bitmap_t *r = roaring64_bitmap_create(); - - // Flags. - if (maxbytes < sizeof(r->flags)) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&r->flags, buf, sizeof(r->flags)); - buf += sizeof(r->flags); - maxbytes -= sizeof(r->flags); - r->flags |= ROARING_FLAG_FROZEN; - - // Container count. - if (maxbytes < sizeof(r->capacity)) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&r->capacity, buf, sizeof(r->capacity)); - buf += sizeof(r->capacity); - maxbytes -= sizeof(r->capacity); - - r->containers = - (container_t *)roaring_malloc(r->capacity * sizeof(container_t *)); - - // Container element counts. - if (maxbytes < r->capacity * sizeof(uint16_t)) { - roaring64_bitmap_free(r); - return NULL; - } - const char *elem_counts = buf; - buf += r->capacity * sizeof(uint16_t); - maxbytes -= r->capacity * sizeof(uint16_t); - - // Total container sizes. - uint64_t total_sizes[4]; - if (maxbytes < sizeof(uint64_t) * 3) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - - // ART (8 byte aligned). - buf = CROARING_ALIGN_BUF(buf, 8); - size_t art_size = art_frozen_view(buf, maxbytes, &r->art); - if (art_size == 0) { - roaring64_bitmap_free(r); - return NULL; - } - buf += art_size; - maxbytes -= art_size; - - // Containers (aligned). - const char *before_containers = buf; - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); - const uint64_t *bitsets = (const uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); - const rle16_t *runs = (const rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); - const uint16_t *arrays = (const uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; - if (maxbytes < (uint64_t)(buf - before_containers)) { - roaring64_bitmap_free(r); - return NULL; - } - maxbytes -= buf - before_containers; - - // Deserialize in ART iteration order. - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - for (size_t i = 0; it.value != NULL; ++i) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - - uint16_t compressed_elem_count; - memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), - sizeof(compressed_elem_count)); - uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; - - // The container index is unrelated to the iteration order. - uint64_t index = get_index(leaf); - r->containers[index] = container_frozen_view(typecode, elem_count, - &bitsets, &arrays, &runs); - - art_iterator_next(&it); - } - - // Padding to make overall size a multiple of required alignment. - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); - - return r; -} - bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { uint64_t high48 = combine_key(it.key, 0); uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; uint32_t low32 = high48; - leaf_t leaf = (leaf_t)*it.value; - if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), - low32, iterator, high32, ptr)) { + leaf_t *leaf = (leaf_t *)it.value; + if (!container_iterate64(leaf->container, leaf->typecode, low32, + iterator, high32, ptr)) { return false; } art_iterator_next(&it); @@ -25877,12 +24804,12 @@ bool roaring64_iterator_advance(roaring64_iterator_t *it) { if (it->saturated_forward) { return (it->has_value = false); } - roaring64_iterator_init_at(it->r, it, /*first=*/true); + roaring64_iterator_init_at(it->parent, it, /*first=*/true); return it->has_value; } - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), + if (container_iterator_next(leaf->container, leaf->typecode, &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -25900,12 +24827,12 @@ bool roaring64_iterator_previous(roaring64_iterator_t *it) { // Saturated backward. return (it->has_value = false); } - roaring64_iterator_init_at(it->r, it, /*first=*/false); + roaring64_iterator_init_at(it->parent, it, /*first=*/false); return it->has_value; } - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), + if (container_iterator_prev(leaf->container, leaf->typecode, &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -25923,8 +24850,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint16_t val_low16 = split_key(val, val_high48); if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a - // key equal or greater. + // beyond the ART altogether), so we need to move to a leaf with a key + // equal or greater. if (!art_iterator_lower_bound(&it->art_it, val_high48)) { // Only smaller keys found. it->saturated_forward = true; @@ -25935,13 +24862,13 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found - // in this container. - leaf_t leaf = (leaf_t)*it->art_it.value; + // We're at equal high bits, check if a suitable value can be found in + // this container. + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound( - get_container(it->r, leaf), get_typecode(leaf), - &it->container_it, &low16, val_low16)) { + if (container_iterator_lower_bound(leaf->container, leaf->typecode, + &it->container_it, &low16, + val_low16)) { it->value = it->high48 | low16; return (it->has_value = true); } @@ -25952,8 +24879,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } } - // We're at a leaf with high bits greater than `val`, so the first entry - // in this container is our result. + // We're at a leaf with high bits greater than `val`, so the first entry in + // this container is our result. return roaring64_iterator_init_at_leaf_first(it); } @@ -25962,15 +24889,15 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t consumed = 0; while (it->has_value && consumed < count) { uint32_t container_consumed; - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; uint32_t container_count = UINT32_MAX; if (count - consumed < (uint64_t)UINT32_MAX) { container_count = count - consumed; } bool has_value = container_iterator_read_into_uint64( - get_container(it->r, leaf), get_typecode(leaf), &it->container_it, - it->high48, buf, container_count, &container_consumed, &low16); + leaf->container, leaf->typecode, &it->container_it, it->high48, buf, + container_count, &container_consumed, &low16); consumed += container_consumed; buf += container_consumed; if (has_value) { diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 9965870..1e2b9ac 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-28T15:35:21Z +// Created by amalgamation.sh on 2024-09-30T21:45:33Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,10 +59,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.3.0" +#define ROARING_VERSION "4.2.0" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 3, + ROARING_VERSION_MINOR = 2, ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION @@ -86,10 +86,9 @@ enum { #ifndef CROARING_INCLUDE_PORTABILITY_H_ #define CROARING_INCLUDE_PORTABILITY_H_ -// Users who need _GNU_SOURCE should define it? -// #ifndef _GNU_SOURCE -// #define _GNU_SOURCE 1 -// #endif // _GNU_SOURCE +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS @@ -119,6 +118,21 @@ enum { #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE +#endif + +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) +#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) +#define _XOPEN_SOURCE 700 +#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) + +#ifdef __illumos__ +#define __EXTENSIONS__ +#endif + #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above @@ -1058,8 +1072,7 @@ inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, return 0; // nothing more to iterate over } uint64_t w = bitset->array[x]; - // unset low bits inside the word less than *startfrom - w &= ~((UINT64_C(1) << (*startfrom & 63)) - 1); + w >>= (*startfrom & 63); size_t howmany = 0; size_t base = x << 6; while (howmany < capacity) { @@ -1766,14 +1779,11 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * If the source is untrusted, you should call - * roaring_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. Only after calling roaring_bitmap_internal_validate - * is the bitmap considered safe for use. + * You may use roaring_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. * - * We also recommend that you use checksums to check that serialized data - * corresponds to the serialized bitmap. The CRoaring library does not provide - * checksumming. + * We recommend that you use checksums to check that serialized data corresponds + * to a serialized bitmap. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not @@ -2303,10 +2313,6 @@ CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator( using namespace ::roaring::api; #endif #endif - -// roaring64 will include roaring.h, but we would -// prefer to avoid having our users include roaring64.h -// in addition to roaring.h. /* end file include/roaring/roaring.h */ /* begin file include/roaring/memory.h */ #ifndef INCLUDE_ROARING_MEMORY_H_ @@ -2365,7 +2371,7 @@ namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; -typedef uint64_t roaring64_leaf_t; +typedef struct roaring64_leaf_s roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** @@ -2660,12 +2666,6 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); -/** - * Shrinks internal arrays to eliminate any unused capacity. Returns the number - * of bytes freed. - */ -size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r); - /** * (For advanced users.) * Collect statistics about the bitmap @@ -2902,14 +2902,11 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * If the source is untrusted, you should call - * roaring64_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. Only after calling - * roaring64_bitmap_internal_validate is the bitmap considered safe for use. + * You may use roaring64_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. * - * We also recommend that you use checksums to check that serialized data - * corresponds to the serialized bitmap. The CRoaring library does not provide - * checksumming. + * We recommend that you use checksums to check that serialized data corresponds + * to a serialized bitmap. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not @@ -2918,53 +2915,6 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); -/** - * Returns the number of bytes required to serialize this bitmap in a "frozen" - * format. This is not compatible with any other serialization formats. - * - * `roaring64_bitmap_shrink_to_fit()` must be called before this method. - */ -size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r); - -/** - * Serializes the bitmap in a "frozen" format. The given buffer must be at least - * `roaring64_bitmap_frozen_size_in_bytes()` in size. Returns the number of - * bytes used for serialization. - * - * `roaring64_bitmap_shrink_to_fit()` must be called before this method. - * - * The frozen format is optimized for speed of (de)serialization, as well as - * allowing the user to create a bitmap based on a memory mapped file, which is - * possible because the format mimics the memory layout of the bitmap. - * - * Because the format mimics the memory layout of the bitmap, the format is not - * fixed across releases of Roaring Bitmaps, and may change in future releases. - * - * This function is endian-sensitive. If you have a big-endian system (e.g., a - * mainframe IBM s390x), the data format is going to be big-endian and not - * compatible with little-endian systems. - */ -size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, - char *buf); - -/** - * Creates a readonly bitmap that is a view of the given buffer. The buffer - * must be created with `roaring64_bitmap_frozen_serialize()`, and must be - * aligned by 64 bytes. - * - * Returns NULL if deserialization fails. - * - * The returned bitmap must only be used in a readonly manner. The bitmap must - * be freed using `roaring64_bitmap_free()` as normal. The backing buffer must - * only be freed after the bitmap. - * - * This function is endian-sensitive. If you have a big-endian system (e.g., a - * mainframe IBM s390x), the data format is going to be big-endian and not - * compatible with little-endian systems. - */ -roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, - size_t maxbytes); - /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call. From a572ab7b3a3fc6448fc86f7549824685faf73ea0 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:34:40 +0200 Subject: [PATCH 04/11] Version v4.1.6 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 96 +++++++++------------------------- pyroaring/roaring.h | 53 +++---------------- 3 files changed, 34 insertions(+), 117 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 084686f..19a707d 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.2.0" \ No newline at end of file +__croaring_version__ = "v4.1.6" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 324bfa5..12434c3 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-30T21:45:33Z +// Created by amalgamation.sh on 2024-09-20T14:21:41Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -11215,15 +11215,6 @@ bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, return true; } -bool bitset_empty(const bitset_t *bitset) { - for (size_t k = 0; k < bitset->arraysize; k++) { - if (bitset->array[k] != 0) { - return false; - } - } - return true; -} - size_t bitset_minimum(const bitset_t *bitset) { for (size_t k = 0; k < bitset->arraysize; k++) { uint64_t w = bitset->array[k]; @@ -11231,7 +11222,7 @@ size_t bitset_minimum(const bitset_t *bitset) { return roaring_trailing_zeroes(w) + k * 64; } } - return SIZE_MAX; + return 0; } bool bitset_grow(bitset_t *bitset, size_t newarraysize) { @@ -19328,7 +19319,6 @@ void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); } /* begin file src/roaring.c */ #include #include -#include #include #include #include @@ -20654,22 +20644,15 @@ uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return 0; - } - return roaring_bitmap_range_cardinality_closed(r, (uint32_t)range_start, - (uint32_t)(range_end - 1)); -} - -uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end) { const roaring_array_t *ra = &r->high_low_container; - if (range_start > range_end) { + if (range_end > UINT32_MAX) { + range_end = UINT32_MAX + UINT64_C(1); + } + if (range_start >= range_end) { return 0; } - + range_end--; // make range_end inclusive // now we have: 0 <= range_start <= range_end <= UINT32_MAX uint16_t minhb = (uint16_t)(range_start >> 16); @@ -21336,18 +21319,11 @@ static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + if (range_start >= range_end) { return roaring_bitmap_copy(x1); } - return roaring_bitmap_flip_closed(x1, (uint32_t)range_start, - (uint32_t)(range_end - 1)); -} - -roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, - uint32_t range_start, - uint32_t range_end) { - if (range_start > range_end) { - return roaring_bitmap_copy(x1); + if (range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); } roaring_bitmap_t *ans = roaring_bitmap_create(); @@ -21355,8 +21331,8 @@ roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, uint16_t hb_start = (uint16_t)(range_start >> 16); const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)(range_end >> 16); - const uint16_t lb_end = (uint16_t)range_end; // & 0xFFFF; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, hb_start, is_cow(x1)); @@ -21397,24 +21373,17 @@ roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return; - } - roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start, - (uint32_t)(range_end - 1)); -} - -void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *x1, - uint32_t range_start, - uint32_t range_end) { - if (range_start > range_end) { + if (range_start >= range_end) { return; // empty range } + if (range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } uint16_t hb_start = (uint16_t)(range_start >> 16); const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)(range_end >> 16); - const uint16_t lb_end = (uint16_t)range_end; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); if (hb_start == hb_end) { inplace_flip_container(&x1->high_low_container, hb_start, lb_start, @@ -22172,28 +22141,15 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { */ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { - return true; + if (range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); } - return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start, - (uint32_t)(range_end - 1)); -} - -/** - * Check whether a range of values from range_start (included) to range_end - * (included) is present - */ -bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end) { - if (range_start > range_end) { - return true; - } // empty range are always contained! - if (range_end == range_start) { + if (range_start >= range_end) + return true; // empty range are always contained! + if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); - } uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)(range_end >> 16); + uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); const int32_t span = hb_re - hb_rs; const int32_t hlc_sz = ra_get_size(&r->high_low_container); if (hlc_sz < span + 1) { @@ -22205,7 +22161,7 @@ bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, return false; } const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = (range_end & 0xFFFF) + 1; + const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; uint8_t type; container_t *c = ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); @@ -25556,7 +25512,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { memcpy(&size, buf, sizeof(int32_t)); buf += sizeof(uint32_t); } - if (size > (1 << 16) || size < 0) { + if (size > (1 << 16)) { return 0; } char *bitmapOfRunContainers = NULL; diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 1e2b9ac..a3111cd 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-30T21:45:33Z +// Created by amalgamation.sh on 2024-09-20T14:21:41Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.2.0" +#define ROARING_VERSION "4.1.6" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_MINOR = 1, + ROARING_VERSION_REVISION = 6 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -967,10 +967,7 @@ inline bool bitset_get(const bitset_t *bitset, size_t i) { /* Count number of bits set. */ size_t bitset_count(const bitset_t *bitset); -/* Returns true if no bit is set. */ -bool bitset_empty(const bitset_t *bitset); - -/* Find the index of the first bit set. Or SIZE_MAX if the bitset is empty. */ +/* Find the index of the first bit set. Or zero if the bitset is empty. */ size_t bitset_minimum(const bitset_t *bitset); /* Find the index of the last bit set. Or zero if the bitset is empty. */ @@ -1517,9 +1514,7 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min || min > (uint64_t)UINT32_MAX + 1) { - return; - } + if (max <= min) return; roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1539,9 +1534,7 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min || min > (uint64_t)UINT32_MAX + 1) { - return; - } + if (max <= min) return; roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1569,14 +1562,6 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); -/** - * Check whether a range of values from range_start (included) - * to range_end (included) is present - */ -bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end); - /** * Check if an items is present, using context from a previous insert or search * for speed optimization. @@ -1608,12 +1593,6 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); -/** - * Returns the number of elements in the range [range_start, range_end]. - */ -uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, - uint32_t range_start, - uint32_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ @@ -2015,14 +1994,6 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); -/** - * Compute the negation of the bitmap in the interval [range_start, range_end]. - * The number of negated values is range_end - range_start + 1. - * Areas outside the range are passed through unchanged. - */ -roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, - uint32_t range_start, - uint32_t range_end); /** * compute (in place) the negation of the roaring bitmap within a specified * interval: [range_start, range_end). The number of negated values is @@ -2032,16 +2003,6 @@ roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); -/** - * compute (in place) the negation of the roaring bitmap within a specified - * interval: [range_start, range_end]. The number of negated values is - * range_end - range_start + 1. - * Areas outside the range are passed through unchanged. - */ -void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *r1, - uint32_t range_start, - uint32_t range_end); - /** * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the roaring bitmap is strictly greater than rank, then this From 87167f0b1301a270f0e6384fd2ef34a55b544a3f Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:34:57 +0200 Subject: [PATCH 05/11] Version v4.1.3 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 27 +++----------------- pyroaring/roaring.h | 46 ++++++---------------------------- 3 files changed, 12 insertions(+), 63 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 19a707d..429bb37 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.1.6" \ No newline at end of file +__croaring_version__ = "v4.1.3" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 12434c3..41cddd2 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-20T14:21:41Z +// Created by amalgamation.sh on 2024-09-19T00:48:39Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -22633,7 +22633,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { uint32_t max_value = roaring_bitmap_maximum(r); - size_t new_array_size = (size_t)(max_value / 64 + 1); + size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64); bool resize_ok = bitset_resize(bitset, new_array_size, true); if (!resize_ok) { return false; @@ -24641,13 +24641,10 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( memcpy(&high32, buf, sizeof(high32)); buf += sizeof(high32); read_bytes += sizeof(high32); - // High 32 bits must be strictly increasing. - if (high32 <= previous_high32) { + if(high32 < previous_high32) { roaring64_bitmap_free(r); return NULL; } - previous_high32 = high32; - // Read the 32-bit Roaring bitmaps representing the least significant // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( @@ -24666,24 +24663,6 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( buf += bitmap32_size; read_bytes += bitmap32_size; - // While we don't attempt to validate much, we must ensure that there - // is no duplication in the high 48 bits - inserting into the ART - // assumes (or UB) no duplicate keys. The top 32 bits must be unique - // because we check for strict increasing values of high32, but we - // must also ensure the top 16 bits within each 32-bit bitmap are also - // at least unique (we ensure they're strictly increasing as well, - // which they must be for a _valid_ bitmap, since it's cheaper to check) - int32_t last_bitmap_key = -1; - for (int i = 0; i < bitmap32->high_low_container.size; i++) { - uint16_t key = bitmap32->high_low_container.keys[i]; - if (key <= last_bitmap_key) { - roaring_bitmap_free(bitmap32); - roaring64_bitmap_free(r); - return NULL; - } - last_bitmap_key = key; - } - // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. move_from_roaring32_offset(r, bitmap32, high32); roaring_bitmap_free(bitmap32); diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index a3111cd..4588ccf 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-20T14:21:41Z +// Created by amalgamation.sh on 2024-09-19T00:48:39Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.1.6" +#define ROARING_VERSION "4.1.3" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 6 + ROARING_VERSION_REVISION = 3 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -1679,10 +1679,6 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * When serializing data to a file, we recommend that you also use - * checksums so that, at deserialization, you can be confident - * that you are recovering the correct data. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -1746,10 +1742,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * https://github.com/RoaringBitmap/RoaringFormatSpec * * The function itself is safe in the sense that it will not cause buffer - * overflows: it will not read beyond the scope of the provided buffer - * (buf,maxbytes). - * - * However, for correct operations, it is assumed that the bitmap + * overflows. However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -1759,10 +1752,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * but not for random inputs. * * You may use roaring_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. - * - * We recommend that you use checksums to check that serialized data corresponds - * to a serialized bitmap. + * bitmap prior to using it. You may also use other strategies to check for + * corrupted inputs (e.g., checksums). * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not @@ -1824,10 +1815,6 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * When serializing data to a file, we recommend that you also use - * checksums so that, at deserialization, you can be confident - * that you are recovering the correct data. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -1862,10 +1849,6 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * When serializing data to a file, we recommend that you also use - * checksums so that, at deserialization, you can be confident - * that you are recovering the correct data. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -2826,10 +2809,6 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. - * - * When serializing data to a file, we recommend that you also use - * checksums so that, at deserialization, you can be confident - * that you are recovering the correct data. */ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf); @@ -2844,17 +2823,14 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** - * Read a bitmap from a serialized buffer (reading up to maxbytes). + * Read a bitmap from a serialized buffer safely (reading up to maxbytes). * In case of failure, NULL is returned. * * This is meant to be compatible with other languages * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations * * The function itself is safe in the sense that it will not cause buffer - * overflows: it will not read beyond the scope of the provided buffer - * (buf,maxbytes). - * - * However, for correct operations, it is assumed that the bitmap + * overflows. However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -2863,12 +2839,6 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * You may use roaring64_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. - * - * We recommend that you use checksums to check that serialized data corresponds - * to a serialized bitmap. - * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. From d3eb4dc08db39e812366bb70dd3bef7df11ec465 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:53:13 +0200 Subject: [PATCH 06/11] Version v4.1.4 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 6 ++--- pyroaring/roaring.h | 46 ++++++++++++++++++++++++++++------ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 429bb37..34ababf 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.1.3" \ No newline at end of file +__croaring_version__ = "v4.1.4" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 41cddd2..878e53f 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T00:48:39Z +// Created by amalgamation.sh on 2024-09-19T15:00:26Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -22633,7 +22633,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { uint32_t max_value = roaring_bitmap_maximum(r); - size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64); + size_t new_array_size = (size_t)(max_value / 64 + 1); bool resize_ok = bitset_resize(bitset, new_array_size, true); if (!resize_ok) { return false; @@ -24641,7 +24641,7 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( memcpy(&high32, buf, sizeof(high32)); buf += sizeof(high32); read_bytes += sizeof(high32); - if(high32 < previous_high32) { + if (high32 < previous_high32) { roaring64_bitmap_free(r); return NULL; } diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 4588ccf..9afcc19 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T00:48:39Z +// Created by amalgamation.sh on 2024-09-19T15:00:26Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.1.3" +#define ROARING_VERSION "4.1.4" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 3 + ROARING_VERSION_REVISION = 4 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -1679,6 +1679,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -1742,7 +1746,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * https://github.com/RoaringBitmap/RoaringFormatSpec * * The function itself is safe in the sense that it will not cause buffer - * overflows. However, for correct operations, it is assumed that the bitmap + * overflows: it will not read beyond the scope of the provided buffer + * (buf,maxbytes). + * + * However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -1752,8 +1759,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * but not for random inputs. * * You may use roaring_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. You may also use other strategies to check for - * corrupted inputs (e.g., checksums). + * bitmap prior to using it. + * + * We recommend that you use checksums to check that serialized data corresponds + * to a serialized bitmap. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not @@ -1815,6 +1824,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -1849,6 +1862,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -2809,6 +2826,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r); * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. + * + * When serializing data to a file, we recommend that you also use + * checksums so that, at deserialization, you can be confident + * that you are recovering the correct data. */ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf); @@ -2823,14 +2844,17 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); /** - * Read a bitmap from a serialized buffer safely (reading up to maxbytes). + * Read a bitmap from a serialized buffer (reading up to maxbytes). * In case of failure, NULL is returned. * * This is meant to be compatible with other languages * https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations * * The function itself is safe in the sense that it will not cause buffer - * overflows. However, for correct operations, it is assumed that the bitmap + * overflows: it will not read beyond the scope of the provided buffer + * (buf,maxbytes). + * + * However, for correct operations, it is assumed that the bitmap * read was once serialized from a valid bitmap (i.e., it follows the format * specification). If you provided an incorrect input (garbage), then the bitmap * read may not be in a valid state and following operations may not lead to @@ -2839,6 +2863,12 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * + * You may use roaring64_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. + * + * We recommend that you use checksums to check that serialized data corresponds + * to a serialized bitmap. + * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not * compatible with little-endian systems. From 61df09d5de5a5ceaf0d009e69f31edee59c3ce03 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sat, 28 Jun 2025 22:53:36 +0200 Subject: [PATCH 07/11] Version v4.1.5 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 23 +++++++++++++++++++++-- pyroaring/roaring.h | 6 +++--- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 34ababf..ec94454 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.1.4" \ No newline at end of file +__croaring_version__ = "v4.1.5" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 878e53f..a60e796 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T15:00:26Z +// Created by amalgamation.sh on 2024-09-19T23:52:46Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -24641,7 +24641,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( memcpy(&high32, buf, sizeof(high32)); buf += sizeof(high32); read_bytes += sizeof(high32); - if (high32 < previous_high32) { + // High 32 bits must be strictly increasing. + if (high32 <= previous_high32) { roaring64_bitmap_free(r); return NULL; } @@ -24663,6 +24664,24 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( buf += bitmap32_size; read_bytes += bitmap32_size; + // While we don't attempt to validate much, we must ensure that there + // is no duplication in the high 48 bits - inserting into the ART + // assumes (or UB) no duplicate keys. The top 32 bits must be unique + // because we check for strict increasing values of high32, but we + // must also ensure the top 16 bits within each 32-bit bitmap are also + // at least unique (we ensure they're strictly increasing as well, + // which they must be for a _valid_ bitmap, since it's cheaper to check) + int32_t last_bitmap_key = -1; + for (int i = 0; i < bitmap32->high_low_container.size; i++) { + uint16_t key = bitmap32->high_low_container.keys[i]; + if (key <= last_bitmap_key) { + roaring_bitmap_free(bitmap32); + roaring64_bitmap_free(r); + return NULL; + } + last_bitmap_key = key; + } + // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. move_from_roaring32_offset(r, bitmap32, high32); roaring_bitmap_free(bitmap32); diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 9afcc19..db0c893 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T15:00:26Z +// Created by amalgamation.sh on 2024-09-19T23:52:46Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.1.4" +#define ROARING_VERSION "4.1.5" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 4 + ROARING_VERSION_REVISION = 5 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ From ff135981acfc0cc33f3ce8f89c9b32ea09679c2e Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sun, 29 Jun 2025 22:07:04 +0200 Subject: [PATCH 08/11] Version v4.2.0 (with test fix) --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 98 +++++++++++++++++++++++++--------- pyroaring/roaring.h | 53 +++++++++++++++--- test.py | 2 +- 4 files changed, 120 insertions(+), 35 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index ec94454..084686f 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.1.5" \ No newline at end of file +__croaring_version__ = "v4.2.0" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index a60e796..324bfa5 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T23:52:46Z +// Created by amalgamation.sh on 2024-09-30T21:45:33Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -11215,6 +11215,15 @@ bool bitset_inplace_union(bitset_t *CROARING_CBITSET_RESTRICT b1, return true; } +bool bitset_empty(const bitset_t *bitset) { + for (size_t k = 0; k < bitset->arraysize; k++) { + if (bitset->array[k] != 0) { + return false; + } + } + return true; +} + size_t bitset_minimum(const bitset_t *bitset) { for (size_t k = 0; k < bitset->arraysize; k++) { uint64_t w = bitset->array[k]; @@ -11222,7 +11231,7 @@ size_t bitset_minimum(const bitset_t *bitset) { return roaring_trailing_zeroes(w) + k * 64; } } - return 0; + return SIZE_MAX; } bool bitset_grow(bitset_t *bitset, size_t newarraysize) { @@ -19319,6 +19328,7 @@ void roaring_aligned_free(void* p) { global_memory_hook.aligned_free(p); } /* begin file src/roaring.c */ #include #include +#include #include #include #include @@ -20644,15 +20654,22 @@ uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return 0; + } + return roaring_bitmap_range_cardinality_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { const roaring_array_t *ra = &r->high_low_container; - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); - } - if (range_start >= range_end) { + if (range_start > range_end) { return 0; } - range_end--; // make range_end inclusive + // now we have: 0 <= range_start <= range_end <= UINT32_MAX uint16_t minhb = (uint16_t)(range_start >> 16); @@ -21319,11 +21336,18 @@ static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end) { + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { return roaring_bitmap_copy(x1); } - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); + return roaring_bitmap_flip_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return roaring_bitmap_copy(x1); } roaring_bitmap_t *ans = roaring_bitmap_create(); @@ -21331,8 +21355,8 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, uint16_t hb_start = (uint16_t)(range_start >> 16); const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; // & 0xFFFF; ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, hb_start, is_cow(x1)); @@ -21373,17 +21397,24 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return; } - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); + roaring_bitmap_flip_inplace_closed(x1, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return; // empty range } uint16_t hb_start = (uint16_t)(range_start >> 16); const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); + uint16_t hb_end = (uint16_t)(range_end >> 16); + const uint16_t lb_end = (uint16_t)range_end; if (hb_start == hb_end) { inplace_flip_container(&x1->high_low_container, hb_start, lb_start, @@ -22141,15 +22172,28 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { */ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if (range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); + if (range_start >= range_end || range_start > (uint64_t)UINT32_MAX + 1) { + return true; } - if (range_start >= range_end) - return true; // empty range are always contained! - if (range_end - range_start == 1) + return roaring_bitmap_contains_range_closed(r, (uint32_t)range_start, + (uint32_t)(range_end - 1)); +} + +/** + * Check whether a range of values from range_start (included) to range_end + * (included) is present + */ +bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end) { + if (range_start > range_end) { + return true; + } // empty range are always contained! + if (range_end == range_start) { return roaring_bitmap_contains(r, (uint32_t)range_start); + } uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); + uint16_t hb_re = (uint16_t)(range_end >> 16); const int32_t span = hb_re - hb_rs; const int32_t hlc_sz = ra_get_size(&r->high_low_container); if (hlc_sz < span + 1) { @@ -22161,7 +22205,7 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, return false; } const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; + const uint32_t lb_re = (range_end & 0xFFFF) + 1; uint8_t type; container_t *c = ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type); @@ -24646,6 +24690,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( roaring64_bitmap_free(r); return NULL; } + previous_high32 = high32; + // Read the 32-bit Roaring bitmaps representing the least significant // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( @@ -25510,7 +25556,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { memcpy(&size, buf, sizeof(int32_t)); buf += sizeof(uint32_t); } - if (size > (1 << 16)) { + if (size > (1 << 16) || size < 0) { return 0; } char *bitmapOfRunContainers = NULL; diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index db0c893..1e2b9ac 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-19T23:52:46Z +// Created by amalgamation.sh on 2024-09-30T21:45:33Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.1.5" +#define ROARING_VERSION "4.2.0" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 1, - ROARING_VERSION_REVISION = 5 + ROARING_VERSION_MINOR = 2, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -967,7 +967,10 @@ inline bool bitset_get(const bitset_t *bitset, size_t i) { /* Count number of bits set. */ size_t bitset_count(const bitset_t *bitset); -/* Find the index of the first bit set. Or zero if the bitset is empty. */ +/* Returns true if no bit is set. */ +bool bitset_empty(const bitset_t *bitset); + +/* Find the index of the first bit set. Or SIZE_MAX if the bitset is empty. */ size_t bitset_minimum(const bitset_t *bitset); /* Find the index of the last bit set. Or zero if the bitset is empty. */ @@ -1514,7 +1517,9 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min) return; + if (max <= min || min > (uint64_t)UINT32_MAX + 1) { + return; + } roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1534,7 +1539,9 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, */ inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max) { - if (max <= min) return; + if (max <= min || min > (uint64_t)UINT32_MAX + 1) { + return; + } roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } @@ -1562,6 +1569,14 @@ bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Check whether a range of values from range_start (included) + * to range_end (included) is present + */ +bool roaring_bitmap_contains_range_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end); + /** * Check if an items is present, using context from a previous insert or search * for speed optimization. @@ -1593,6 +1608,12 @@ uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Returns the number of elements in the range [range_start, range_end]. + */ +uint64_t roaring_bitmap_range_cardinality_closed(const roaring_bitmap_t *r, + uint32_t range_start, + uint32_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ @@ -1994,6 +2015,14 @@ void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); +/** + * Compute the negation of the bitmap in the interval [range_start, range_end]. + * The number of negated values is range_end - range_start + 1. + * Areas outside the range are passed through unchanged. + */ +roaring_bitmap_t *roaring_bitmap_flip_closed(const roaring_bitmap_t *x1, + uint32_t range_start, + uint32_t range_end); /** * compute (in place) the negation of the roaring bitmap within a specified * interval: [range_start, range_end). The number of negated values is @@ -2003,6 +2032,16 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); +/** + * compute (in place) the negation of the roaring bitmap within a specified + * interval: [range_start, range_end]. The number of negated values is + * range_end - range_start + 1. + * Areas outside the range are passed through unchanged. + */ +void roaring_bitmap_flip_inplace_closed(roaring_bitmap_t *r1, + uint32_t range_start, + uint32_t range_end); + /** * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the roaring bitmap is strictly greater than rank, then this diff --git a/test.py b/test.py index ef3186c..e57369d 100755 --- a/test.py +++ b/test.py @@ -905,7 +905,7 @@ def test_invalid_deserialization( bm = cls(list(range(0, 1000000, 3))) bitmap_bytes = bm.serialize() bitmap_bytes = bitmap_bytes[:42] + wrong_input + bitmap_bytes[42:] - with pytest.raises(ValueError, match='Invalid bitmap after deserialization'): + with pytest.raises(ValueError, match='Invalid bitmap after deserialization|Could not deserialize bitmap'): bitmap = cls.deserialize(bitmap_bytes) From cd2152f039323476ac8e37a27034be3b94b74bdc Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sun, 29 Jun 2025 22:20:24 +0200 Subject: [PATCH 09/11] Version v4.2.3 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 21 +++------------ pyroaring/roaring.h | 47 +++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 084686f..0e32cf3 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.2.0" \ No newline at end of file +__croaring_version__ = "v4.2.3" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 324bfa5..07a5f2b 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-30T21:45:33Z +// Created by amalgamation.sh on 2025-02-26T22:28:04Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -10418,7 +10418,7 @@ static art_val_t *art_find_at(const art_node_t *node, } // Returns the size in bytes of the subtrie. -size_t art_size_in_bytes_at(const art_node_t *node) { +static size_t art_size_in_bytes_at(const art_node_t *node) { if (art_is_leaf(node)) { return 0; } @@ -10472,7 +10472,7 @@ static void art_node_print_type(const art_node_t *node) { } } -void art_node_printf(const art_node_t *node, uint8_t depth) { +static void art_node_printf(const art_node_t *node, uint8_t depth) { if (art_is_leaf(node)) { printf("{ type: Leaf, key: "); art_leaf_t *leaf = CROARING_CAST_LEAF(node); @@ -14106,7 +14106,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ } \ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ const bitset_container_t *src_2) { \ - printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_1 = src_1->words; \ const uint64_t * __restrict__ words_2 = src_2->words; \ int32_t sum = 0; \ for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ @@ -22982,19 +22982,6 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, return r; } -roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) { - roaring64_bitmap_t *r = roaring64_bitmap_create(); - roaring64_bulk_context_t context = CROARING_ZERO_INITIALIZER; - va_list ap; - va_start(ap, n_args); - for (size_t i = 0; i < n_args; i++) { - uint64_t val = va_arg(ap, uint64_t); - roaring64_bitmap_add_bulk(r, &context, val); - } - va_end(ap); - return r; -} - static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 1e2b9ac..7caeb1e 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2024-09-30T21:45:33Z +// Created by amalgamation.sh on 2025-02-26T22:28:04Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.2.0" +#define ROARING_VERSION "4.2.3" enum { ROARING_VERSION_MAJOR = 4, ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_REVISION = 3 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -86,9 +86,10 @@ enum { #ifndef CROARING_INCLUDE_PORTABILITY_H_ #define CROARING_INCLUDE_PORTABILITY_H_ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif // _GNU_SOURCE +// Users who need _GNU_SOURCE should define it? +// #ifndef _GNU_SOURCE +// #define _GNU_SOURCE 1 +// #endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS @@ -125,12 +126,11 @@ enum { #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) -#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) -#define _XOPEN_SOURCE 700 -#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #ifdef __illumos__ +#ifndef __EXTENSIONS__ #define __EXTENSIONS__ +#endif // __EXTENSIONS__ #endif #include @@ -1072,7 +1072,8 @@ inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, return 0; // nothing more to iterate over } uint64_t w = bitset->array[x]; - w >>= (*startfrom & 63); + // unset low bits inside the word less than *startfrom + w &= ~((UINT64_C(1) << (*startfrom & 63)) - 1); size_t howmany = 0; size_t base = x << 6; while (howmany < capacity) { @@ -1779,11 +1780,14 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * You may use roaring_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. + * If the source is untrusted, you should call + * roaring_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. Only after calling roaring_bitmap_internal_validate + * is the bitmap considered safe for use. * - * We recommend that you use checksums to check that serialized data corresponds - * to a serialized bitmap. + * We also recommend that you use checksums to check that serialized data + * corresponds to the serialized bitmap. The CRoaring library does not provide + * checksumming. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not @@ -2313,6 +2317,10 @@ CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator( using namespace ::roaring::api; #endif #endif + +// roaring64 will include roaring.h, but we would +// prefer to avoid having our users include roaring64.h +// in addition to roaring.h. /* end file include/roaring/roaring.h */ /* begin file include/roaring/memory.h */ #ifndef INCLUDE_ROARING_MEMORY_H_ @@ -2902,11 +2910,14 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, * order. This is is guaranteed to happen when serializing an existing bitmap, * but not for random inputs. * - * You may use roaring64_bitmap_internal_validate to check the validity of the - * bitmap prior to using it. + * If the source is untrusted, you should call + * roaring64_bitmap_internal_validate to check the validity of the + * bitmap prior to using it. Only after calling + * roaring64_bitmap_internal_validate is the bitmap considered safe for use. * - * We recommend that you use checksums to check that serialized data corresponds - * to a serialized bitmap. + * We also recommend that you use checksums to check that serialized data + * corresponds to the serialized bitmap. The CRoaring library does not provide + * checksumming. * * This function is endian-sensitive. If you have a big-endian system (e.g., a * mainframe IBM s390x), the data format is going to be big-endian and not From e8e5105fbb12783ff6f6a4cfe267e7dc06611c49 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sun, 29 Jun 2025 22:20:50 +0200 Subject: [PATCH 10/11] Version v4.3.0 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 3182 +++++++++++++++++++++----------- pyroaring/roaring.h | 77 +- 3 files changed, 2193 insertions(+), 1068 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 0e32cf3..007b74b 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.2.3" \ No newline at end of file +__croaring_version__ = "v4.3.0" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 07a5f2b..2e76a6d 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-26T22:28:04Z +// Created by amalgamation.sh on 2025-02-28T15:35:21Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -6758,8 +6758,8 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); * chunks _differ_. This means that if there are two entries with different * high 48 bits, then there is only one inner node containing the common key * prefix, and two leaves. - * * Intrusive leaves: the leaf struct is included in user values. This removes - * a layer of indirection. + * * Mostly pointer-free: nodes are referred to by index rather than pointer, + * so that the structure can be deserialized with a backing buffer. */ // Fixed length of keys in the ART. All keys are assumed to be of this length. @@ -6772,25 +6772,33 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; -typedef struct art_node_s art_node_t; + +// Internal node reference type. Contains the node typecode in the low 8 bits, +// and the index in the relevant node array in the high 48 bits. Has a value of +// CROARING_ART_NULL_REF when pointing to a non-existent node. +typedef uint64_t art_ref_t; + +typedef void art_node_t; /** - * Wrapper to allow an empty tree. + * The ART is empty when root is a null ref. + * + * Each node type has its own dynamic array of node structs, indexed by + * art_ref_t. The arrays are expanded as needed, and shrink only when + * `shrink_to_fit` is called. */ typedef struct art_s { - art_node_t *root; + art_ref_t root; + + // Indexed by node typecode, thus 1 larger than they need to be for + // convenience. `first_free` indicates the index where the first free node + // lives, which may be equal to the capacity. + uint64_t first_free[6]; + uint64_t capacities[6]; + art_node_t *nodes[6]; } art_t; -/** - * Values inserted into the tree have to be cast-able to art_val_t. This - * improves performance by reducing indirection. - * - * NOTE: Value pointers must be unique! This is because each value struct - * contains the key corresponding to the value. - */ -typedef struct art_val_s { - art_key_chunk_t key[ART_KEY_BYTES]; -} art_val_t; +typedef uint64_t art_val_t; /** * Compares two keys, returns their relative order: @@ -6802,14 +6810,21 @@ int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]); /** - * Inserts the given key and value. + * Initializes the ART. + */ +void art_init_cleared(art_t *art); + +/** + * Inserts the given key and value. Returns a pointer to the value inserted, + * valid as long as the ART is not modified. */ -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val); /** - * Returns the value erased, NULL if not found. + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. */ -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val); /** * Returns the value associated with the given key, NULL if not found. @@ -6822,42 +6837,39 @@ art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); bool art_is_empty(const art_t *art); /** - * Frees the nodes of the ART except the values, which the user is expected to - * free. + * Frees the contents of the ART. Should not be called when using + * `art_deserialize_frozen_safe`. */ void art_free(art_t *art); -/** - * Returns the size in bytes of the ART. Includes size of pointers to values, - * but not the values themselves. - */ -size_t art_size_in_bytes(const art_t *art); - /** * Prints the ART using printf, useful for debugging. */ void art_printf(const art_t *art); /** - * Callback for validating the value stored in a leaf. + * Callback for validating the value stored in a leaf. `context` is a + * user-provided value passed to the callback without modification. * * Should return true if the value is valid, false otherwise * If false is returned, `*reason` should be set to a static string describing * the reason for the failure. */ -typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason); +typedef bool (*art_validate_cb_t)(const art_val_t val, const char **reason, + void *context); /** - * Validate the ART tree, ensuring it is internally consistent. + * Validate the ART tree, ensuring it is internally consistent. `context` is a + * user-provided value passed to the callback without modification. */ bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb); + art_validate_cb_t validate_cb, void *context); /** * ART-internal iterator bookkeeping. Users should treat this as an opaque type. */ typedef struct art_iterator_frame_s { - art_node_t *node; + art_ref_t ref; uint8_t index_in_node; } art_iterator_frame_t; @@ -6869,6 +6881,8 @@ typedef struct art_iterator_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t *value; + art_t *art; + uint8_t depth; // Key depth uint8_t frame; // Node depth @@ -6882,19 +6896,19 @@ typedef struct art_iterator_s { * depending on `first`. The iterator is not valid if there are no entries in * the ART. */ -art_iterator_t art_init_iterator(const art_t *art, bool first); +art_iterator_t art_init_iterator(art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than * the given key, if it exists. */ -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key); /** * Returns an initialized iterator positioned at a key greater than the given * key, if it exists. */ -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key); /** * The following iterator movement functions return true if a new entry was @@ -6913,14 +6927,49 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, /** * Insert the value and positions the iterator at the key. */ -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val); +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val); /** * Erase the value pointed at by the iterator. Moves the iterator to the next - * leaf. Returns the value erased or NULL if nothing was erased. + * leaf. + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. + */ +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); + +/** + * Shrinks the internal arrays in the ART to remove any unused elements. Returns + * the number of bytes freed. + */ +size_t art_shrink_to_fit(art_t *art); + +/** + * Returns true if the ART has no unused elements. + */ +bool art_is_shrunken(const art_t *art); + +/** + * Returns the serialized size in bytes. + * Requires `art_shrink_to_fit` to be called first. + */ +size_t art_size_in_bytes(const art_t *art); + +/** + * Serializes the ART and returns the number of bytes written. Returns 0 on + * error. Requires `art_shrink_to_fit` to be called first. + */ +size_t art_serialize(const art_t *art, char *buf); + +/** + * Deserializes the ART from a serialized buffer, reading up to `maxbytes` + * bytes. Returns 0 on error. Requires `buf` to be 8 byte aligned. + * + * An ART deserialized in this way should only be used in a readonly context.The + * underlying buffer must not be freed before the ART. `art_free` should not be + * called on the ART deserialized in this way. */ -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art); #ifdef __cplusplus } // extern "C" @@ -9102,37 +9151,36 @@ CROARING_UNTARGET_AVX512 #endif/* end file src/array_util.c */ /* begin file src/art/art.c */ #include +#include #include #include -#define CROARING_ART_NODE4_TYPE 0 -#define CROARING_ART_NODE16_TYPE 1 -#define CROARING_ART_NODE48_TYPE 2 -#define CROARING_ART_NODE256_TYPE 3 -#define CROARING_ART_NUM_TYPES 4 +#define CROARING_ART_NULL_REF 0 + +#define CROARING_ART_LEAF_TYPE 1 +#define CROARING_ART_NODE4_TYPE 2 +#define CROARING_ART_NODE16_TYPE 3 +#define CROARING_ART_NODE48_TYPE 4 +#define CROARING_ART_NODE256_TYPE 5 + +#define CROARING_ART_MIN_TYPE CROARING_ART_LEAF_TYPE +#define CROARING_ART_MAX_TYPE CROARING_ART_NODE256_TYPE // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 +#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) -// We use the least significant bit of node pointers to indicate whether a node -// is a leaf or an inner node. This is never surfaced to the user. -// -// Using pointer tagging to indicate leaves not only saves a bit of memory by -// sparing the typecode, but also allows us to use an intrusive leaf struct. -// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon -// deallocation of the ART, we know not to free the leaves without having to -// dereference the leaf pointers. -// -// All internal operations on leaves should use CROARING_CAST_LEAF before using -// the leaf. The only places that use CROARING_SET_LEAF are locations where a -// field is directly assigned to a leaf pointer. After using CROARING_SET_LEAF, -// the leaf should be treated as a node of unknown type. -#define CROARING_IS_LEAF(p) (((uintptr_t)(p) & 1)) -#define CROARING_SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) -#define CROARING_CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) +#define CROARING_ART_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) -#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) +// Gives the byte difference needed to align the current buffer to the +// alignment, relative to the start of the buffer. +#define CROARING_ART_ALIGN_SIZE_RELATIVE(buf_cur, buf_start, alignment) \ + ((((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) - \ + (ptrdiff_t)((buf_cur) - (buf_start))) #ifdef __cplusplus extern "C" { @@ -9142,30 +9190,20 @@ namespace internal { typedef uint8_t art_typecode_t; -// Aliasing with a "leaf" naming so that its purpose is clearer in the context -// of the trie internals. -typedef art_val_t art_leaf_t; - -typedef struct art_internal_validate_s { - const char **reason; - art_validate_cb_t validate_cb; - - int depth; - art_key_chunk_t current_key[ART_KEY_BYTES]; -} art_internal_validate_t; - -// Set the reason message, and return false for convenience. -static inline bool art_validate_fail(const art_internal_validate_t *validate, - const char *msg) { - *validate->reason = msg; - return false; -} +typedef struct art_leaf_s { + union { + struct { + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t val; + }; + uint64_t next_free; + }; +} art_leaf_t; // Inner node, with prefix. // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { - art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; } art_inner_node_t; @@ -9174,119 +9212,232 @@ typedef struct art_inner_node_s { // Node4: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node4_s { - art_inner_node_t base; - uint8_t count; - uint8_t keys[4]; - art_node_t *children[4]; + union { + struct { + art_inner_node_t base; + uint8_t count; + uint8_t keys[4]; + art_ref_t children[4]; + }; + uint64_t next_free; + }; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { - art_inner_node_t base; - uint8_t count; - uint8_t keys[16]; - art_node_t *children[16]; + union { + struct { + art_inner_node_t base; + uint8_t count; + uint8_t keys[16]; + art_ref_t children[16]; + }; + uint64_t next_free; + }; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != // CROARING_ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct // indexing. typedef struct art_node48_s { - art_inner_node_t base; - uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits are used. - uint64_t available_children; - uint8_t keys[256]; - art_node_t *children[48]; + union { + struct { + art_inner_node_t base; + uint8_t count; + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits + // are used. + uint64_t available_children; + uint8_t keys[256]; + art_ref_t children[48]; + }; + uint64_t next_free; + }; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if // children[i] != NULL. typedef struct art_node256_s { - art_inner_node_t base; - uint16_t count; - art_node_t *children[256]; + union { + struct { + art_inner_node_t base; + uint16_t count; + art_ref_t children[256]; + }; + uint64_t next_free; + }; } art_node256_t; +// Size of each node type, indexed by typecode for convenience. +static const size_t ART_NODE_SIZES[] = { + 0, + sizeof(art_leaf_t), + sizeof(art_node4_t), + sizeof(art_node16_t), + sizeof(art_node48_t), + sizeof(art_node256_t), +}; + // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { - art_node_t *child; + art_ref_t child; uint8_t index; art_key_chunk_t key_chunk; } art_indexed_child_t; -static inline bool art_is_leaf(const art_node_t *node) { - return CROARING_IS_LEAF(node); +typedef struct art_internal_validate_s { + const char **reason; + art_validate_cb_t validate_cb; + void *context; + + int depth; + art_key_chunk_t current_key[ART_KEY_BYTES]; +} art_internal_validate_t; + +// Set the reason message, and return false for convenience. +static inline bool art_validate_fail(const art_internal_validate_t *validate, + const char *msg) { + *validate->reason = msg; + return false; } -static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { - memcpy(leaf->key, key, ART_KEY_BYTES); +static inline art_ref_t art_to_ref(uint64_t index, art_typecode_t typecode) { + return ((art_ref_t)index) << 16 | typecode; +} + +static inline uint64_t art_ref_index(art_ref_t ref) { + return ((uint64_t)ref) >> 16; +} + +static inline art_typecode_t art_ref_typecode(art_ref_t ref) { + return (art_typecode_t)ref; +} + +/** + * Gets a pointer to a node from its reference. The pointer only remains valid + * under non-mutating operations. If any mutating operations occur, this + * function should be called again to get a valid pointer to the node. + */ +static art_node_t *art_deref(const art_t *art, art_ref_t ref) { + assert(ref != CROARING_ART_NULL_REF); + art_typecode_t typecode = art_ref_typecode(ref); + return (art_node_t *)((char *)art->nodes[typecode] + + art_ref_index(ref) * ART_NODE_SIZES[typecode]); +} + +static inline art_node_t *art_get_node(const art_t *art, uint64_t index, + art_typecode_t typecode) { + return art_deref(art, art_to_ref(index, typecode)); +} + +static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + art_node_t *nodes = art->nodes[typecode]; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_leaf_t *)node - (art_leaf_t *)nodes; + case CROARING_ART_NODE4_TYPE: + return (art_node4_t *)node - (art_node4_t *)nodes; + case CROARING_ART_NODE16_TYPE: + return (art_node16_t *)node - (art_node16_t *)nodes; + case CROARING_ART_NODE48_TYPE: + return (art_node48_t *)node - (art_node48_t *)nodes; + case CROARING_ART_NODE256_TYPE: + return (art_node256_t *)node - (art_node256_t *)nodes; + default: + assert(false); + return 0; + } +} + +/** + * Creates a reference from a pointer. + */ +static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + return art_to_ref(art_get_index(art, node, typecode), typecode); } -static inline uint8_t art_get_type(const art_inner_node_t *node) { - return node->typecode; +static inline bool art_is_leaf(art_ref_t ref) { + return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; } static inline void art_init_inner_node(art_inner_node_t *node, - art_typecode_t typecode, const art_key_chunk_t prefix[], uint8_t prefix_size) { - node->typecode = typecode; node->prefix_size = prefix_size; memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); } -static void art_free_node(art_node_t *node); +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode); + +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); // ===================== Start of node-specific functions ====================== -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], + art_val_t val) { + uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); + art_leaf_t *leaf = + ((art_leaf_t *)art->nodes[CROARING_ART_LEAF_TYPE]) + index; + memcpy(leaf->key, key, ART_KEY_BYTES); + leaf->val = val; + return art_to_ref(index, CROARING_ART_LEAF_TYPE); +} + +static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { + leaf->next_free = next_free; +} + +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key); +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key); -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE4_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); + art_node4_t *node = + ((art_node4_t *)art->nodes[CROARING_ART_NODE4_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node4(art_node4_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); +static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; } -static inline art_node_t *art_node4_find_child(const art_node4_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key) { if (node->count < 4) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9299,26 +9450,26 @@ static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node16_insert(new_node, node->children[i], node->keys[i]); + art_node16_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node16_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return art_node16_insert(art, new_node, child, key); } -static inline art_node_t *art_node4_erase(art_node4_t *node, - art_key_chunk_t key_chunk) { +static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, + art_key_chunk_t key_chunk) { int idx = -1; for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { @@ -9326,17 +9477,18 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, } } if (idx == -1) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } if (node->count == 2) { // Only one child remains after erasing, so compress the path by // removing this node. uint8_t other_idx = idx ^ 1; - art_node_t *remaining_child = node->children[other_idx]; + art_ref_t remaining_child = node->children[other_idx]; art_key_chunk_t remaining_child_key = node->keys[other_idx]; if (!art_is_leaf(remaining_child)) { // Correct the prefix of the child node. - art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(art, remaining_child); memmove(inner_node->prefix + node->base.prefix_size + 1, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, @@ -9344,7 +9496,7 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } - roaring_free(node); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); return remaining_child; } // Shift other keys to maintain sorted order. @@ -9352,14 +9504,14 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, memmove(node->keys + idx, node->keys + idx + 1, after_next * sizeof(art_key_chunk_t)); memmove(node->children + idx, node->children + idx + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } static inline void art_node4_replace(art_node4_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9373,7 +9525,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9390,7 +9542,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9403,7 +9555,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9423,14 +9575,15 @@ static inline art_indexed_child_t art_node4_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator); -static bool art_node4_internal_validate(const art_node4_t *node, +static bool art_node4_internal_validate(const art_t *art, + const art_node4_t *node, art_internal_validate_t validator) { if (node->count == 0) { return art_validate_fail(&validator, "Node4 has no children"); @@ -9457,41 +9610,41 @@ static bool art_node4_internal_validate(const art_node4_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE16_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); + art_node16_t *node = + ((art_node16_t *)art->nodes[CROARING_ART_NODE16_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node16(art_node16_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); +static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; } -static inline art_node_t *art_node16_find_child(const art_node16_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key) { if (node->count < 16) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9504,24 +9657,24 @@ static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 16; ++i) { - art_node48_insert(new_node, node->children[i], node->keys[i]); + art_node48_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node48_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_node48_insert(art, new_node, child, key); } -static inline art_node_t *art_node16_erase(art_node16_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, + uint8_t key_chunk) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { // Shift other keys to maintain sorted order. @@ -9529,28 +9682,28 @@ static inline art_node_t *art_node16_erase(art_node16_t *node, memmove(node->keys + i, node->keys + i + 1, after_next * sizeof(key_chunk)); memmove(node->children + i, node->children + i + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; break; } } if (node->count > 4) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node4_t *new_node = - art_node4_create(node->base.prefix, node->base.prefix_size); + art_node4_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node4_insert(new_node, node->children[i], node->keys[i]); + art_node4_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE4_TYPE); } static inline void art_node16_replace(art_node16_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (uint8_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9564,7 +9717,7 @@ static inline art_indexed_child_t art_node16_next_child( art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9581,7 +9734,7 @@ static inline art_indexed_child_t art_node16_prev_child( index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9594,7 +9747,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9614,11 +9767,12 @@ static inline art_indexed_child_t art_node16_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node16_internal_validate(const art_node16_t *node, +static bool art_node16_internal_validate(const art_t *art, + const art_node16_t *node, art_internal_validate_t validator) { if (node->count <= 4) { return art_validate_fail(&validator, "Node16 has too few children"); @@ -9641,18 +9795,20 @@ static bool art_node16_internal_validate(const art_node16_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE48_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); + art_node48_t *node = + ((art_node48_t *)art->nodes[CROARING_ART_NODE48_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; for (size_t i = 0; i < 256; ++i) { @@ -9661,29 +9817,22 @@ static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], return node; } -static void art_free_node48(art_node48_t *node) { - uint64_t used_children = - (node->available_children) ^ CROARING_NODE48_AVAILABLE_CHILDREN_MASK; - while (used_children != 0) { - // We checked above that used_children is not zero - uint8_t child_idx = roaring_trailing_zeroes(used_children); - art_free_node(node->children[child_idx]); - used_children &= ~(UINT64_C(1) << child_idx); - } - roaring_free(node); +static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; } -static inline art_node_t *art_node48_find_child(const art_node48_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { return node->children[val_idx]; } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key) { if (node->count < 48) { // node->available_children is only zero when the node is full (count == // 48), we just checked count < 48 @@ -9692,48 +9841,48 @@ static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, node->children[val_idx] = child; node->count++; node->available_children &= ~(UINT64_C(1) << val_idx); - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node256_t *new_node = - art_node256_create(node->base.prefix, node->base.prefix_size); + art_node256_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { uint8_t val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node256_insert(new_node, node->children[val_idx], i); + art_node256_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return art_node256_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_node256_insert(art, new_node, child, key); } -static inline art_node_t *art_node48_erase(art_node48_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node48_erase(art_t *art, art_node48_t *node, + uint8_t key_chunk) { uint8_t val_idx = node->keys[key_chunk]; if (val_idx == CROARING_ART_NODE48_EMPTY_VAL) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } node->keys[key_chunk] = CROARING_ART_NODE48_EMPTY_VAL; node->available_children |= UINT64_C(1) << val_idx; node->count--; if (node->count > 16) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node16_insert(new_node, node->children[val_idx], i); + art_node16_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE16_TYPE); } static inline void art_node48_replace(art_node48_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { uint8_t val_idx = node->keys[key_chunk]; assert(val_idx != CROARING_ART_NODE48_EMPTY_VAL); node->children[val_idx] = new_child; @@ -9751,7 +9900,7 @@ static inline art_indexed_child_t art_node48_next_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9770,7 +9919,7 @@ static inline art_indexed_child_t art_node48_prev_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9778,7 +9927,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9798,11 +9947,12 @@ static inline art_indexed_child_t art_node48_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node48_internal_validate(const art_node48_t *node, +static bool art_node48_internal_validate(const art_t *art, + const art_node48_t *node, art_internal_validate_t validator) { if (node->count <= 16) { return art_validate_fail(&validator, "Node48 has too few children"); @@ -9819,8 +9969,8 @@ static bool art_node48_internal_validate(const art_node48_t *node, &validator, "Node48 keys point to the same child index"); } - art_node_t *child = node->children[child_idx]; - if (child == NULL) { + art_ref_t child = node->children[child_idx]; + if (child == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "Node48 has a NULL child"); } used_children |= UINT64_C(1) << child_idx; @@ -9852,7 +10002,7 @@ static bool art_node48_internal_validate(const art_node48_t *node, for (int i = 0; i < 256; ++i) { if (node->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[node->keys[i]], + if (!art_internal_validate_at(art, node->children[node->keys[i]], validator)) { return false; } @@ -9861,62 +10011,59 @@ static bool art_node48_internal_validate(const art_node48_t *node, return true; } -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { + uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); art_node256_t *node = - (art_node256_t *)roaring_malloc(sizeof(art_node256_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE256_TYPE, prefix, - prefix_size); + ((art_node256_t *)art->nodes[CROARING_ART_NODE256_TYPE]) + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { - node->children[i] = NULL; + node->children[i] = CROARING_ART_NULL_REF; } return node; } -static void art_free_node256(art_node256_t *node) { - for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_free_node(node->children[i]); - } - } - roaring_free(node); +static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; } -static inline art_node_t *art_node256_find_child(const art_node256_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { return node->children[key]; } -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key) { node->children[key] = child; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } -static inline art_node_t *art_node256_erase(art_node256_t *node, - uint8_t key_chunk) { - node->children[key_chunk] = NULL; +static inline art_ref_t art_node256_erase(art_t *art, art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = CROARING_ART_NULL_REF; node->count--; if (node->count > 48) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_node48_insert(new_node, node->children[i], i); + if (node->children[i] != CROARING_ART_NULL_REF) { + art_node48_insert(art, new_node, node->children[i], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE48_TYPE); } static inline void art_node256_replace(art_node256_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { node->children[key_chunk] = new_child; } @@ -9925,14 +10072,14 @@ static inline art_indexed_child_t art_node256_next_child( art_indexed_child_t indexed_child; index++; for (size_t i = index; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9944,14 +10091,14 @@ static inline art_indexed_child_t art_node256_prev_child( index--; art_indexed_child_t indexed_child; for (int i = index; i >= 0; --i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -9959,7 +10106,7 @@ static inline art_indexed_child_t art_node256_child_at( const art_node256_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -9972,18 +10119,19 @@ static inline art_indexed_child_t art_node256_lower_bound( art_node256_t *node, art_key_chunk_t key_chunk) { art_indexed_child_t indexed_child; for (size_t i = key_chunk; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node256_internal_validate(const art_node256_t *node, +static bool art_node256_internal_validate(const art_t *art, + const art_node256_t *node, art_internal_validate_t validator) { if (node->count <= 48) { return art_validate_fail(&validator, "Node256 has too few children"); @@ -9994,7 +10142,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, validator.depth++; int actual_count = 0; for (int i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { actual_count++; for (int j = i + 1; j < 256; ++j) { @@ -10005,7 +10153,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, } validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } @@ -10019,9 +10167,10 @@ static bool art_node256_internal_validate(const art_node256_t *node, // Finds the child with the given key chunk in the inner node, returns NULL if // no such child is found. -static art_node_t *art_find_child(const art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_find_child(const art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: return art_node4_find_child((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10032,14 +10181,14 @@ static art_node_t *art_find_child(const art_inner_node_t *node, return art_node256_find_child((art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Replaces the child with the given key chunk in the inner node. -static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { - switch (art_get_type(node)) { +static void art_replace(art_inner_node_t *node, art_typecode_t typecode, + art_key_chunk_t key_chunk, art_ref_t new_child) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: art_node4_replace((art_node4_t *)node, key_chunk, new_child); break; @@ -10059,78 +10208,112 @@ static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, // Erases the child with the given key chunk from the inner node, returns the // updated node (the same as the initial node if it was not shrunk). -static art_node_t *art_node_erase(art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_node_erase(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_erase((art_node4_t *)node, key_chunk); + return art_node4_erase(art, (art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_erase((art_node16_t *)node, key_chunk); + return art_node16_erase(art, (art_node16_t *)node, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_erase((art_node48_t *)node, key_chunk); + return art_node48_erase(art, (art_node48_t *)node, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_erase((art_node256_t *)node, key_chunk); + return art_node256_erase(art, (art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Inserts the leaf with the given key chunk in the inner node, returns a // pointer to the (possibly expanded) node. -static art_node_t *art_node_insert_leaf(art_inner_node_t *node, - art_key_chunk_t key_chunk, - art_leaf_t *leaf) { - art_node_t *child = (art_node_t *)(CROARING_SET_LEAF(leaf)); - switch (art_get_type(node)) { +static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk, + art_ref_t leaf) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_insert((art_node4_t *)node, child, key_chunk); + return art_node4_insert(art, (art_node4_t *)node, leaf, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_insert((art_node16_t *)node, child, key_chunk); + return art_node16_insert(art, (art_node16_t *)node, leaf, + key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_insert((art_node48_t *)node, child, key_chunk); + return art_node48_insert(art, (art_node48_t *)node, leaf, + key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_insert((art_node256_t *)node, child, key_chunk); + return art_node256_insert(art, (art_node256_t *)node, leaf, + key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } -// Frees the node and its children. Leaves are freed by the user. -static void art_free_node(art_node_t *node) { - if (art_is_leaf(node)) { - // We leave it up to the user to free leaves. - return; +static uint64_t art_node_get_next_free(const art_t *art, art_ref_t ref) { + art_node_t *node = art_deref(art, ref); + art_typecode_t typecode = art_ref_typecode(ref); + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return ((art_leaf_t *)node)->next_free; + case CROARING_ART_NODE4_TYPE: + return ((art_node4_t *)node)->next_free; + case CROARING_ART_NODE16_TYPE: + return ((art_node16_t *)node)->next_free; + case CROARING_ART_NODE48_TYPE: + return ((art_node48_t *)node)->next_free; + case CROARING_ART_NODE256_TYPE: + return ((art_node256_t *)node)->next_free; + default: + assert(false); + return 0; } - switch (art_get_type((art_inner_node_t *)node)) { +} + +static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, + uint64_t next_free) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + ((art_leaf_t *)node)->next_free = next_free; + break; case CROARING_ART_NODE4_TYPE: - art_free_node4((art_node4_t *)node); + ((art_node4_t *)node)->next_free = next_free; break; case CROARING_ART_NODE16_TYPE: - art_free_node16((art_node16_t *)node); + ((art_node16_t *)node)->next_free = next_free; break; case CROARING_ART_NODE48_TYPE: - art_free_node48((art_node48_t *)node); + ((art_node48_t *)node)->next_free = next_free; break; case CROARING_ART_NODE256_TYPE: - art_free_node256((art_node256_t *)node); + ((art_node256_t *)node)->next_free = next_free; break; default: assert(false); } } +// Marks the node as unoccopied and frees its index. +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode) { + uint64_t index = art_get_index(art, node, typecode); + uint64_t next_free = art->first_free[typecode]; + art_node_set_next_free(node, typecode, next_free); + art->first_free[typecode] = index; +} + // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_next_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10148,13 +10331,15 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, // Returns the previous child in key order, or NULL if called on a leaf. // Provided index may be in the range [0, 256]. static art_indexed_child_t art_node_prev_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_prev_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10169,16 +10354,19 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, } } -// Returns the child found at the provided index, or NULL if called on a leaf. -// Provided index is only valid if returned by art_node_(next|prev)_child. +// Returns the child found at the provided index, or NULL if called on a +// leaf. Provided index is only valid if returned by +// art_node_(next|prev)_child. static art_indexed_child_t art_node_child_at(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_child_at((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10193,16 +10381,18 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, } } -// Returns the child with the smallest key equal to or greater than the given -// key chunk, NULL if called on a leaf or no such child was found. +// Returns the child with the smallest key equal to or greater than the +// given key chunk, NULL if called on a leaf or no such child was found. static art_indexed_child_t art_node_lower_bound(const art_node_t *node, + art_typecode_t typecode, art_key_chunk_t key_chunk) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_lower_bound((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10217,7 +10407,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, } } -// ====================== End of node-specific functions ======================= +// ====================== End of node-specific functions ====================== // Compares the given ranges of two keys, returns their relative order: // * Key range 1 < key range 2: a negative value @@ -10255,45 +10445,112 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } -// Returns a pointer to the rootmost node where the value was inserted, may not -// be equal to `node`. -static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], - uint8_t depth, art_leaf_t *new_leaf) { - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); +/** + * Extends the array of nodes of the given typecode. Invalidates pointers into + * the array obtained by `art_deref`. + */ +static void art_extend(art_t *art, art_typecode_t typecode) { + uint64_t size = art->first_free[typecode]; + uint64_t capacity = art->capacities[typecode]; + if (size < capacity) { + return; + } + uint64_t new_capacity; + if (capacity == 0) { + new_capacity = 2; + } else if (capacity < 1024) { + new_capacity = 2 * capacity; + } else { + new_capacity = 5 * capacity / 4; + } + art->capacities[typecode] = new_capacity; + art->nodes[typecode] = roaring_realloc( + art->nodes[typecode], new_capacity * ART_NODE_SIZES[typecode]); + uint64_t increase = new_capacity - capacity; + memset(art_get_node(art, capacity, typecode), 0, + increase * ART_NODE_SIZES[typecode]); + for (uint64_t i = capacity; i < new_capacity; ++i) { + art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); + } +} + +/** + * Returns the next free index for the given typecode, may be equal to the + * capacity of the array. + */ +static uint64_t art_next_free(const art_t *art, art_typecode_t typecode) { + uint64_t index = art->first_free[typecode]; + return art_node_get_next_free(art, art_to_ref(index, typecode)); +} + +/** + * Marks an index for the given typecode as used, expanding the relevant node + * array if necessary. + */ +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode) { + uint64_t first_free = art->first_free[typecode]; + if (first_free == art->capacities[typecode]) { + art_extend(art, typecode); + art->first_free[typecode]++; + return first_free; + } + art->first_free[typecode] = art_next_free(art, typecode); + return first_free; +} + +// Returns a pointer to the rootmost node where the value was inserted, may +// not be equal to `node`. +static art_ref_t art_insert_at(art_t *art, art_ref_t ref, + const art_key_chunk_t key[], uint8_t depth, + art_ref_t new_leaf) { + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix( leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); - // Previously this was a leaf, create an inner node instead and add both - // the existing and new leaf to it. + // Previously this was a leaf, create an inner node instead and add + // both the existing and new leaf to it. art_node_t *new_node = - (art_node_t *)art_node4_create(key + depth, common_prefix); + (art_node_t *)art_node4_create(art, key + depth, common_prefix); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - leaf->key[depth + common_prefix], leaf); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - key[depth + common_prefix], new_leaf); + art_ref_t new_ref = art_node_insert_leaf( + art, (art_inner_node_t *)new_node, CROARING_ART_NODE4_TYPE, + leaf->key[depth + common_prefix], ref); + new_ref = art_node_insert_leaf(art, (art_inner_node_t *)new_node, + CROARING_ART_NODE4_TYPE, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. - return new_node; + return new_ref; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); // Not a leaf: inner node uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { - // Partial prefix match. Create a new internal node to hold the common + // Partial prefix match. Create a new internal node to hold the common // prefix. - art_node4_t *node4 = - art_node4_create(inner_node->prefix, common_prefix); + // We create a copy of the node's prefix as the creation of a new + // node may invalidate the prefix pointer. + art_key_chunk_t *prefix_copy = (art_key_chunk_t *)roaring_malloc( + common_prefix * sizeof(art_key_chunk_t)); + memcpy(prefix_copy, inner_node->prefix, + common_prefix * sizeof(art_key_chunk_t)); + art_node4_t *node4 = art_node4_create(art, prefix_copy, common_prefix); + roaring_free(prefix_copy); + + // Deref as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Make the existing internal node a child of the new internal node. - node4 = (art_node4_t *)art_node4_insert( - node4, node, inner_node->prefix[common_prefix]); + art_node4_insert(art, node4, ref, inner_node->prefix[common_prefix]); - // Correct the prefix of the moved internal node, trimming off the chunk - // inserted into the new internal node. + // Deref again as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); + + // Correct the prefix of the moved internal node, trimming off the + // chunk inserted into the new internal node. inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; if (inner_node->prefix_size > 0) { // Move the remaining prefix to the correct position. @@ -10302,55 +10559,67 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], } // Insert the value in the new internal node. - return art_node_insert_leaf(&node4->base, key[common_prefix + depth], - new_leaf); + return art_node_insert_leaf(art, (art_inner_node_t *)node4, + CROARING_ART_NODE4_TYPE, + key[common_prefix + depth], new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child != NULL) { - art_node_t *new_child = - art_insert_at(child, key, depth + common_prefix + 1, new_leaf); + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child != CROARING_ART_NULL_REF) { + art_ref_t new_child = + art_insert_at(art, child, key, depth + common_prefix + 1, new_leaf); if (new_child != child) { + // Deref again as a new node may have been created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Node type changed. - art_replace(inner_node, key_chunk, new_child); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + new_child); } - return node; + return ref; } - return art_node_insert_leaf(inner_node, key_chunk, new_leaf); + return art_node_insert_leaf(art, inner_node, art_ref_typecode(ref), + key_chunk, new_leaf); } // Erase helper struct. typedef struct art_erase_result_s { - // The rootmost node where the value was erased, may not be equal to `node`. - // If no value was removed, this is null. - art_node_t *rootmost_node; + // The rootmost node where the value was erased, may not be equal to + // the original node. If no value was removed, this is + // CROARING_ART_NULL_REF. + art_ref_t rootmost_node; + + // True if a value was erased. + bool erased; - // Value removed, null if not removed. - art_val_t *value_erased; + // Value removed, if any. + art_val_t value_erased; } art_erase_result_t; // Searches for the given key starting at `node`, erases it if found. -static art_erase_result_t art_erase_at(art_node_t *node, +static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { art_erase_result_t result; - result.rootmost_node = NULL; - result.value_erased = NULL; + result.rootmost_node = CROARING_ART_NULL_REF; + result.erased = false; - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix != ART_KEY_BYTES) { // Leaf key mismatch. return result; } - result.value_erased = (art_val_t *)leaf; + result.erased = true; + result.value_erased = leaf->val; + art_node_free(art, (art_node_t *)leaf, CROARING_ART_LEAF_TYPE); return result; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); @@ -10359,101 +10628,76 @@ static art_erase_result_t art_erase_at(art_node_t *node, return result; } art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child == NULL) { + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child == CROARING_ART_NULL_REF) { // No child with key chunk. return result; } - // Try to erase the key further down. Skip the key chunk associated with the - // child in the node. + // Try to erase the key further down. Skip the key chunk associated with + // the child in the node. art_erase_result_t child_result = - art_erase_at(child, key, depth + common_prefix + 1); - if (child_result.value_erased == NULL) { + art_erase_at(art, child, key, depth + common_prefix + 1); + if (!child_result.erased) { return result; } + result.erased = true; result.value_erased = child_result.value_erased; - result.rootmost_node = node; - if (child_result.rootmost_node == NULL) { + result.rootmost_node = ref; + + // Deref again as nodes may have changed location. + inner_node = (art_inner_node_t *)art_deref(art, ref); + if (child_result.rootmost_node == CROARING_ART_NULL_REF) { // Child node was fully erased, erase it from this node's children. - result.rootmost_node = art_node_erase(inner_node, key_chunk); + result.rootmost_node = + art_node_erase(art, inner_node, art_ref_typecode(ref), key_chunk); } else if (child_result.rootmost_node != child) { // Child node was not fully erased, update the pointer to it in this // node. - art_replace(inner_node, key_chunk, child_result.rootmost_node); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + child_result.rootmost_node); } return result; } -// Searches for the given key starting at `node`, returns NULL if the key was -// not found. -static art_val_t *art_find_at(const art_node_t *node, +// Searches for the given key starting at `node`, returns NULL if the key +// was not found. +static art_val_t *art_find_at(const art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { return NULL; } - art_node_t *child = - art_find_child(inner_node, key[depth + inner_node->prefix_size]); - if (child == NULL) { + art_ref_t child = art_find_child(inner_node, art_ref_typecode(ref), + key[depth + inner_node->prefix_size]); + if (child == CROARING_ART_NULL_REF) { return NULL; } - node = child; + ref = child; // Include both the prefix and the child key chunk in the depth. depth += inner_node->prefix_size + 1; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (depth >= ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix == ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } return NULL; } -// Returns the size in bytes of the subtrie. -static size_t art_size_in_bytes_at(const art_node_t *node) { - if (art_is_leaf(node)) { - return 0; - } - size_t size = 0; - switch (art_get_type((art_inner_node_t *)node)) { - case CROARING_ART_NODE4_TYPE: { - size += sizeof(art_node4_t); - } break; - case CROARING_ART_NODE16_TYPE: { - size += sizeof(art_node16_t); - } break; - case CROARING_ART_NODE48_TYPE: { - size += sizeof(art_node48_t); - } break; - case CROARING_ART_NODE256_TYPE: { - size += sizeof(art_node256_t); - } break; - default: - assert(false); - break; - } - art_indexed_child_t indexed_child = art_node_next_child(node, -1); - while (indexed_child.child != NULL) { - size += art_size_in_bytes_at(indexed_child.child); - indexed_child = art_node_next_child(node, indexed_child.index); - } - return size; -} - -static void art_node_print_type(const art_node_t *node) { - if (art_is_leaf(node)) { - printf("Leaf"); - return; - } - switch (art_get_type((art_inner_node_t *)node)) { +static void art_node_print_type(art_ref_t ref) { + switch (art_ref_typecode(ref)) { + case CROARING_ART_LEAF_TYPE: + printf("Leaf"); + return; case CROARING_ART_NODE4_TYPE: printf("Node4"); return; @@ -10472,10 +10716,10 @@ static void art_node_print_type(const art_node_t *node) { } } -static void art_node_printf(const art_node_t *node, uint8_t depth) { - if (art_is_leaf(node)) { +void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { + if (art_is_leaf(ref)) { printf("{ type: Leaf, key: "); - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); for (size_t i = 0; i < ART_KEY_BYTES; ++i) { printf("%02x", leaf->key[i]); } @@ -10487,10 +10731,10 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { printf("%*s", depth, ""); printf("type: "); - art_node_print_type(node); + art_node_print_type(ref); printf("\n"); - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); printf("%*s", depth, ""); printf("prefix_size: %d\n", inner_node->prefix_size); @@ -10501,41 +10745,42 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { } printf("\n"); - switch (art_get_type(inner_node)) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)node; + art_node4_t *node4 = (art_node4_t *)inner_node; for (uint8_t i = 0; i < node4->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node4->keys[i]); - art_node_printf(node4->children[i], depth); + art_node_printf(art, node4->children[i], depth); } } break; case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)node; + art_node16_t *node16 = (art_node16_t *)inner_node; for (uint8_t i = 0; i < node16->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node16->keys[i]); - art_node_printf(node16->children[i], depth); + art_node_printf(art, node16->children[i], depth); } } break; case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)node; + art_node48_t *node48 = (art_node48_t *)inner_node; for (int i = 0; i < 256; ++i) { if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { printf("%*s", depth, ""); printf("key: %02x ", i); printf("child: %02x ", node48->keys[i]); - art_node_printf(node48->children[node48->keys[i]], depth); + art_node_printf(art, node48->children[node48->keys[i]], + depth); } } } break; case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)node; + art_node256_t *node256 = (art_node256_t *)inner_node; for (int i = 0; i < 256; ++i) { - if (node256->children[i] != NULL) { + if (node256->children[i] != CROARING_ART_NULL_REF) { printf("%*s", depth, ""); printf("key: %02x ", i); - art_node_printf(node256->children[i], depth); + art_node_printf(art, node256->children[i], depth); } } } break; @@ -10548,118 +10793,310 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { printf("}\n"); } -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { - art_leaf_t *leaf = (art_leaf_t *)val; - art_leaf_populate(leaf, key); - if (art->root == NULL) { - art->root = (art_node_t *)CROARING_SET_LEAF(leaf); - return; - } - art->root = art_insert_at(art->root, key, 0, leaf); -} - -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { - return NULL; - } - art_erase_result_t result = art_erase_at(art->root, key, 0); - if (result.value_erased == NULL) { - return NULL; - } - art->root = result.rootmost_node; - return result.value_erased; -} - -art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { - return NULL; - } - return art_find_at(art->root, key, 0); +/** + * Moves the node at `ref` to the earliest free index before it (if any), + * returns the new ref. Assumes `art->first_free[typecode]` points to the + * smallest free index. + */ +static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { + uint64_t idx = art_ref_index(ref); + art_typecode_t typecode = art_ref_typecode(ref); + uint64_t first_free = art->first_free[typecode]; + assert(idx != first_free); + if (idx < first_free) { + return ref; + } + uint64_t from = idx; + uint64_t to = first_free; + uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); + memcpy(art_get_node(art, to, typecode), art_get_node(art, from, typecode), + ART_NODE_SIZES[typecode]); + + // With an integer representing the next free index, and an `x` representing + // an occupied index, assume the following scenario at the start of this + // function: + // nodes = [1,2,5,x,x] + // first_free = 0 + // + // We just moved a node from index 3 to 0: + // nodes = [x,2,5,?,x] + // + // We need to modify the free list so that the free indices are ascending. + // This can be done by traversing the list until we find a node with a + // `next_free` greater than the index we copied the node from, and inserting + // the new index in between. This leads to the following: + // nodes = [x,2,3,5,x] + // first_free = 1 + uint64_t initial_next_free = next_free; + uint64_t current = next_free; + while (next_free < from) { + current = next_free; + next_free = + art_node_get_next_free(art, art_to_ref(next_free, typecode)); + } + art_node_set_next_free(art_deref(art, ref), typecode, next_free); + if (current < from) { + art_node_set_next_free(art_get_node(art, current, typecode), typecode, + from); + } + art->first_free[typecode] = + from < initial_next_free ? from : initial_next_free; + return art_to_ref(to, typecode); } -bool art_is_empty(const art_t *art) { return art->root == NULL; } - -void art_free(art_t *art) { - if (art->root == NULL) { - return; +/** + * Sorts the free lists pointed to by art->first_free in ascending index order. + */ +static void art_sort_free_lists(art_t *art) { + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + bool *free_indices = + (bool *)roaring_calloc(art->capacities[type], sizeof(bool)); + + for (uint64_t i = art->first_free[type]; i < art->capacities[type]; + i = art_node_get_next_free(art, art_to_ref(i, type))) { + free_indices[i] = true; + } + + uint64_t first_free = art->capacities[type]; + for (uint64_t i = art->capacities[type]; i > 0; --i) { + uint64_t index = i - 1; + if (free_indices[index]) { + art_node_set_next_free(art_get_node(art, index, type), type, + first_free); + first_free = index; + } + } + art->first_free[type] = first_free; + roaring_free(free_indices); } - art_free_node(art->root); } -size_t art_size_in_bytes(const art_t *art) { - size_t size = sizeof(art_t); - if (art->root != NULL) { - size += art_size_in_bytes_at(art->root); +/** + * Shrinks all node arrays to `first_free`. Assumes all indices after + * `first_free` are unused. + */ +static size_t art_shrink_node_arrays(art_t *art) { + size_t freed = 0; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] < art->capacities[t]) { + uint64_t new_capacity = art->first_free[t]; + art->nodes[t] = roaring_realloc(art->nodes[t], + new_capacity * ART_NODE_SIZES[t]); + freed += (art->capacities[t] - new_capacity) * ART_NODE_SIZES[t]; + art->capacities[t] = new_capacity; + } } - return size; + return freed; } -void art_printf(const art_t *art) { - if (art->root == NULL) { +/** + * Traverses the ART, moving nodes to earlier free indices and modifying their + * references along the way. + */ +static void art_shrink_at(art_t *art, art_ref_t ref) { + if (art_is_leaf(ref)) { return; } - art_node_printf(art->root, 0); -} - -// Returns the current node that the iterator is positioned at. -static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { - return iterator->frames[iterator->frame].node; -} - -// Sets the iterator key and value to the leaf's key and value. Always returns -// true for convenience. -static inline bool art_iterator_valid_loc(art_iterator_t *iterator, - art_leaf_t *leaf) { - iterator->frames[iterator->frame].node = CROARING_SET_LEAF(leaf); - iterator->frames[iterator->frame].index_in_node = 0; - memcpy(iterator->key, leaf->key, ART_KEY_BYTES); - iterator->value = (art_val_t *)leaf; - return true; + switch (art_ref_typecode(ref)) { + case CROARING_ART_NODE4_TYPE: { + art_node4_t *node4 = (art_node4_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node4->count; ++i) { + node4->children[i] = + art_move_node_to_shrink(art, node4->children[i]); + art_shrink_at(art, node4->children[i]); + } + } break; + case CROARING_ART_NODE16_TYPE: { + art_node16_t *node16 = (art_node16_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node16->count; ++i) { + node16->children[i] = + art_move_node_to_shrink(art, node16->children[i]); + art_shrink_at(art, node16->children[i]); + } + } break; + case CROARING_ART_NODE48_TYPE: { + art_node48_t *node48 = (art_node48_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { + uint8_t idx = node48->keys[i]; + node48->children[idx] = + art_move_node_to_shrink(art, node48->children[idx]); + art_shrink_at(art, node48->children[idx]); + } + } + } break; + case CROARING_ART_NODE256_TYPE: { + art_node256_t *node256 = (art_node256_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node256->children[i] != CROARING_ART_NULL_REF) { + node256->children[i] = + art_move_node_to_shrink(art, node256->children[i]); + art_shrink_at(art, node256->children[i]); + } + } + } break; + default: + assert(false); + break; + } +} + +void art_init_cleared(art_t *art) { + art->root = CROARING_ART_NULL_REF; + memset(art->first_free, 0, sizeof(art->first_free)); + memset(art->capacities, 0, sizeof(art->capacities)); + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + art->nodes[t] = NULL; + } } -// Invalidates the iterator key and value. Always returns false for convenience. +size_t art_shrink_to_fit(art_t *art) { + if (art_is_shrunken(art)) { + return 0; + } + if (art->root != CROARING_ART_NULL_REF) { + art_sort_free_lists(art); + art->root = art_move_node_to_shrink(art, art->root); + art_shrink_at(art, art->root); + } + return art_shrink_node_arrays(art); +} + +bool art_is_shrunken(const art_t *art) { + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] != art->capacities[t]) { + return false; + } + } + return true; +} + +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { + art_ref_t leaf = art_leaf_create(art, key, val); + if (art->root == CROARING_ART_NULL_REF) { + art->root = leaf; + return &((art_leaf_t *)art_deref(art, leaf))->val; + } + art->root = art_insert_at(art, art->root, key, 0, leaf); + return &((art_leaf_t *)art_deref(art, leaf))->val; +} + +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } + if (art->root == CROARING_ART_NULL_REF) { + return false; + } + art_erase_result_t result = art_erase_at(art, art->root, key, 0); + if (!result.erased) { + return false; + } + art->root = result.rootmost_node; + *erased_val = result.value_erased; + return true; +} + +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { + if (art->root == CROARING_ART_NULL_REF) { + return NULL; + } + return art_find_at(art, art->root, key, 0); +} + +bool art_is_empty(const art_t *art) { + return art->root == CROARING_ART_NULL_REF; +} + +void art_free(art_t *art) { + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + roaring_free(art->nodes[t]); + } +} + +void art_printf(const art_t *art) { + if (art->root == CROARING_ART_NULL_REF) { + return; + } + art_node_printf(art, art->root, 0); +} + +// Returns a reference to the current node that the iterator is positioned +// at. +static inline art_ref_t art_iterator_ref(art_iterator_t *iterator) { + return iterator->frames[iterator->frame].ref; +} + +// Returns the current node that the iterator is positioned at. +static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { + return art_deref(iterator->art, art_iterator_ref(iterator)); +} + +// Sets the iterator key and value to the leaf's key and value. Always +// returns true for convenience. +static inline bool art_iterator_valid_loc(art_iterator_t *iterator, + art_ref_t leaf_ref) { + iterator->frames[iterator->frame].ref = leaf_ref; + iterator->frames[iterator->frame].index_in_node = 0; + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, leaf_ref); + memcpy(iterator->key, leaf->key, ART_KEY_BYTES); + iterator->value = &leaf->val; + return true; +} + +// Invalidates the iterator key and value. Always returns false for +// convenience. static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { memset(iterator->key, 0, ART_KEY_BYTES); iterator->value = NULL; return false; } -// Moves the iterator one level down in the tree, given a node at the current -// level and the index of the child that we're going down to. +// Moves the iterator one level down in the tree, given a node at the +// current level and the index of the child that we're going down to. // // Note: does not set the index at the new level. -static void art_iterator_down(art_iterator_t *iterator, - const art_inner_node_t *node, +static void art_iterator_down(art_iterator_t *iterator, art_ref_t ref, uint8_t index_in_node) { - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = index_in_node; iterator->frame++; - art_indexed_child_t indexed_child = - art_node_child_at((art_node_t *)node, index_in_node); - assert(indexed_child.child != NULL); - iterator->frames[iterator->frame].node = indexed_child.child; + art_inner_node_t *node = (art_inner_node_t *)art_deref(iterator->art, ref); + art_indexed_child_t indexed_child = art_node_child_at( + (art_node_t *)node, art_ref_typecode(ref), index_in_node); + assert(indexed_child.child != CROARING_ART_NULL_REF); + iterator->frames[iterator->frame].ref = indexed_child.child; iterator->depth += node->prefix_size + 1; } -// Moves the iterator to the next/previous child of the current node. Returns -// the child moved to, or NULL if there is no neighboring child. -static art_node_t *art_iterator_neighbor_child( - art_iterator_t *iterator, const art_inner_node_t *inner_node, - bool forward) { +// Moves the iterator to the next/previous child of the current node. +// Returns the child moved to, or NULL if there is no neighboring child. +static art_ref_t art_iterator_neighbor_child(art_iterator_t *iterator, + bool forward) { art_iterator_frame_t frame = iterator->frames[iterator->frame]; + art_node_t *node = art_deref(iterator->art, frame.ref); art_indexed_child_t indexed_child; if (forward) { - indexed_child = art_node_next_child(frame.node, frame.index_in_node); + indexed_child = art_node_next_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } else { - indexed_child = art_node_prev_child(frame.node, frame.index_in_node); + indexed_child = art_node_prev_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } - if (indexed_child.child != NULL) { - art_iterator_down(iterator, inner_node, indexed_child.index); + if (indexed_child.child != CROARING_ART_NULL_REF) { + art_iterator_down(iterator, frame.ref, indexed_child.index); } return indexed_child.child; } -// Moves the iterator one level up in the tree, returns false if not possible. +// Moves the iterator one level up in the tree, returns false if not +// possible. static bool art_iterator_up(art_iterator_t *iterator) { if (iterator->frame == 0) { return false; @@ -10671,8 +11108,8 @@ static bool art_iterator_up(art_iterator_t *iterator) { return true; } -// Moves the iterator one level, followed by a move to the next / previous leaf. -// Sets the status of the iterator. +// Moves the iterator one level, followed by a move to the next / previous +// leaf. Sets the status of the iterator. static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { if (!art_iterator_up(iterator)) { // We're at the root. @@ -10683,27 +11120,29 @@ static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { // Initializes the iterator at the first / last leaf of the given node. // Returns true for convenience. -static bool art_node_init_iterator(const art_node_t *node, - art_iterator_t *iterator, bool first) { - while (!art_is_leaf(node)) { +static bool art_node_init_iterator(art_ref_t ref, art_iterator_t *iterator, + bool first) { + while (!art_is_leaf(ref)) { + art_node_t *node = art_deref(iterator->art, ref); art_indexed_child_t indexed_child; if (first) { - indexed_child = art_node_next_child(node, -1); + indexed_child = + art_node_next_child(node, art_ref_typecode(ref), -1); } else { - indexed_child = art_node_prev_child(node, 256); + indexed_child = + art_node_prev_child(node, art_ref_typecode(ref), 256); } - art_iterator_down(iterator, (art_inner_node_t *)node, - indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } // We're at a leaf. - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. - return art_iterator_valid_loc(iterator, CROARING_CAST_LEAF(node)); + return art_iterator_valid_loc(iterator, ref); } bool art_iterator_move(art_iterator_t *iterator, bool forward) { - if (art_is_leaf(art_iterator_node(iterator))) { + if (art_is_leaf(art_iterator_ref(iterator))) { bool went_up = art_iterator_up(iterator); if (!went_up) { // This leaf is the root, we're done. @@ -10711,67 +11150,69 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { } } // Advance within inner node. - art_node_t *neighbor_child = art_iterator_neighbor_child( - iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); - if (neighbor_child != NULL) { - // There is another child at this level, go down to the first or last - // leaf. + art_ref_t neighbor_child = art_iterator_neighbor_child(iterator, forward); + if (neighbor_child != CROARING_ART_NULL_REF) { + // There is another child at this level, go down to the first or + // last leaf. return art_node_init_iterator(neighbor_child, iterator, forward); } // No more children at this level, go up. return art_iterator_up_and_move(iterator, forward); } -// Assumes the iterator is positioned at a node with an equal prefix path up to -// the depth of the iterator. -static bool art_node_iterator_lower_bound(const art_node_t *node, +// Assumes the iterator is positioned at a node with an equal prefix path up +// to the depth of the iterator. +static bool art_node_iterator_lower_bound(art_ref_t ref, art_iterator_t *iterator, const art_key_chunk_t key[]) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(iterator->art, ref); int prefix_comparison = art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. - // Since we take the lower bound within each node, we can return the - // next leaf. + // Since we take the lower bound within each node, we can return + // the next leaf. return art_iterator_up_and_move(iterator, true); } else if (prefix_comparison > 0) { - // No key equal to the key we're looking for, return the first leaf. - return art_node_init_iterator(node, iterator, true); + // No key equal to the key we're looking for, return the first + // leaf. + return art_node_init_iterator(ref, iterator, true); } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = key[iterator->depth + inner_node->prefix_size]; - art_indexed_child_t indexed_child = - art_node_lower_bound(node, key_chunk); - if (indexed_child.child == NULL) { + art_indexed_child_t indexed_child = art_node_lower_bound( + (art_node_t *)inner_node, art_ref_typecode(ref), key_chunk); + if (indexed_child.child == CROARING_ART_NULL_REF) { // Only smaller keys among children. return art_iterator_up_and_move(iterator, true); } if (indexed_child.key_chunk > key_chunk) { // Only larger children, return the first larger child. - art_iterator_down(iterator, inner_node, indexed_child.index); + art_iterator_down(iterator, ref, indexed_child.index); return art_node_init_iterator(indexed_child.child, iterator, true); } // We found a child with an equal prefix. - art_iterator_down(iterator, inner_node, indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, ref); if (art_compare_keys(leaf->key, key) >= 0) { // Leaf has an equal or larger key. - return art_iterator_valid_loc(iterator, leaf); + return art_iterator_valid_loc(iterator, ref); } - // Leaf has an equal prefix, but the full key is smaller. Move to the next - // leaf. + // Leaf has an equal prefix, but the full key is smaller. Move to the + // next leaf. return art_iterator_up_and_move(iterator, true); } -art_iterator_t art_init_iterator(const art_t *art, bool first) { +art_iterator_t art_init_iterator(art_t *art, bool first) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root == NULL) { + iterator.art = art; + if (art->root == CROARING_ART_NULL_REF) { return iterator; } art_node_init_iterator(art->root, &iterator, first); @@ -10789,12 +11230,12 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { if (iterator->value == NULL) { - // We're beyond the end / start of the ART so the iterator does not have - // a valid key. Start from the root. + // We're beyond the end / start of the ART so the iterator does not + // have a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - art_node_t *root = art_iterator_node(iterator); - if (root == NULL) { + art_ref_t root = art_iterator_ref(iterator); + if (root == CROARING_ART_NULL_REF) { return false; } return art_node_iterator_lower_bound(root, iterator, key); @@ -10809,7 +11250,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // Only smaller keys found. return art_iterator_invalid_loc(iterator); } else { - return art_node_init_iterator(art_iterator_node(iterator), + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } } @@ -10822,24 +11263,26 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, iterator->depth + inner_node->prefix_size); } if (compare_result > 0) { - return art_node_init_iterator(art_iterator_node(iterator), iterator, + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } - return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, + return art_node_iterator_lower_bound(art_iterator_ref(iterator), iterator, key); } -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { art_node_iterator_lower_bound(art->root, &iterator, key); } return iterator; } -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); @@ -10848,90 +11291,100 @@ art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { return iterator; } -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val) { +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val) { // TODO: This can likely be faster. - art_insert(art, key, val); - assert(art->root != NULL); + art_insert(iterator->art, key, val); + assert(iterator->art->root != CROARING_ART_NULL_REF); iterator->frame = 0; iterator->depth = 0; - art_node_iterator_lower_bound(art->root, iterator, key); + art_node_iterator_lower_bound(iterator->art->root, iterator, key); } -// TODO: consider keeping `art_t *art` in the iterator. -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } if (iterator->value == NULL) { - return NULL; + return false; } art_key_chunk_t initial_key[ART_KEY_BYTES]; memcpy(initial_key, iterator->key, ART_KEY_BYTES); - art_val_t *value_erased = iterator->value; + *erased_val = *iterator->value; + // Erase the leaf. + art_node_free(iterator->art, art_iterator_node(iterator), + art_ref_typecode(art_iterator_ref(iterator))); bool went_up = art_iterator_up(iterator); if (!went_up) { // We're erasing the root. - art->root = NULL; + iterator->art->root = CROARING_ART_NULL_REF; art_iterator_invalid_loc(iterator); - return value_erased; + return true; } - // Erase the leaf. + // Erase the leaf in its parent. + art_ref_t parent_ref = art_iterator_ref(iterator); art_inner_node_t *parent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_parent = iterator->key[iterator->depth + parent_node->prefix_size]; - art_node_t *new_parent_node = - art_node_erase(parent_node, key_chunk_in_parent); + art_ref_t new_parent_ref = + art_node_erase(iterator->art, parent_node, art_ref_typecode(parent_ref), + key_chunk_in_parent); - if (new_parent_node != ((art_node_t *)parent_node)) { + if (new_parent_ref != parent_ref) { // Replace the pointer to the inner node we erased from in its // parent (it may be a leaf now). - iterator->frames[iterator->frame].node = new_parent_node; + iterator->frames[iterator->frame].ref = new_parent_ref; went_up = art_iterator_up(iterator); if (went_up) { + art_ref_t grandparent_ref = art_iterator_ref(iterator); art_inner_node_t *grandparent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_grandparent = iterator->key[iterator->depth + grandparent_node->prefix_size]; - art_replace(grandparent_node, key_chunk_in_grandparent, - new_parent_node); + art_replace(grandparent_node, art_ref_typecode(grandparent_ref), + key_chunk_in_grandparent, new_parent_ref); } else { // We were already at the rootmost node. - art->root = new_parent_node; + iterator->art->root = new_parent_ref; } } iterator->frame = 0; iterator->depth = 0; - // Do a lower bound search for the initial key, which will find the first - // greater key if it exists. This can likely be mildly faster if we instead - // start from the current position. - art_node_iterator_lower_bound(art->root, iterator, initial_key); - return value_erased; + // Do a lower bound search for the initial key, which will find the + // first greater key if it exists. This can likely be mildly faster if + // we instead start from the current position. + art_node_iterator_lower_bound(iterator->art->root, iterator, initial_key); + return true; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator) { - if (node == NULL) { + if (ref == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "node is null"); } - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (art_compare_prefix(leaf->key, 0, validator.current_key, 0, validator.depth) != 0) { - return art_validate_fail( - &validator, - "leaf key does not match its position's prefix in the tree"); + return art_validate_fail(&validator, + "leaf key does not match its " + "position's prefix in the tree"); } if (validator.validate_cb != NULL && - !validator.validate_cb(leaf, validator.reason)) { + !validator.validate_cb(leaf->val, validator.reason, + validator.context)) { if (*validator.reason == NULL) { *validator.reason = "leaf validation failed"; } return false; } } else { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) { return art_validate_fail(&validator, @@ -10941,28 +11394,28 @@ static bool art_internal_validate_at(const art_node_t *node, inner_node->prefix_size); validator.depth += inner_node->prefix_size; - switch (inner_node->typecode) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: - if (!art_node4_internal_validate((art_node4_t *)inner_node, + if (!art_node4_internal_validate(art, (art_node4_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE16_TYPE: - if (!art_node16_internal_validate((art_node16_t *)inner_node, - validator)) { + if (!art_node16_internal_validate( + art, (art_node16_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE48_TYPE: - if (!art_node48_internal_validate((art_node48_t *)inner_node, - validator)) { + if (!art_node48_internal_validate( + art, (art_node48_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE256_TYPE: - if (!art_node256_internal_validate((art_node256_t *)inner_node, - validator)) { + if (!art_node256_internal_validate( + art, (art_node256_t *)inner_node, validator)) { return false; } break; @@ -10974,23 +11427,143 @@ static bool art_internal_validate_at(const art_node_t *node, } bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb) { + art_validate_cb_t validate_cb, void *context) { const char *reason_local; if (reason == NULL) { // Always allow assigning through *reason reason = &reason_local; } *reason = NULL; - if (art->root == NULL) { + if (art->root == CROARING_ART_NULL_REF) { return true; } art_internal_validate_t validator = { .reason = reason, .validate_cb = validate_cb, + .context = context, .depth = 0, - .current_key = {0}, + .current_key = CROARING_ZERO_INITIALIZER, }; - return art_internal_validate_at(art->root, validator); + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + uint64_t capacity = art->capacities[type]; + for (uint64_t i = 0; i < capacity; ++i) { + uint64_t first_free = art->first_free[type]; + if (first_free > capacity) { + return art_validate_fail(&validator, "first_free > capacity"); + } + } + } + return art_internal_validate_at(art, art->root, validator); +} + +_Static_assert(alignof(art_leaf_t) == alignof(art_node4_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node16_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node48_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), + "Serialization assumes node type alignment is equal"); + +size_t art_size_in_bytes(const art_t *art) { + if (!art_is_shrunken(art)) { + return 0; + } + // Root. + size_t size = sizeof(art->root); + // Node counts. + size += sizeof(art->capacities); + // Alignment for leaves. The rest of the nodes are aligned the same way. + size += + ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + size += art->capacities[t] * ART_NODE_SIZES[t]; + } + return size; +} + +size_t art_serialize(const art_t *art, char *buf) { + if (buf == NULL) { + return 0; + } + if (!art_is_shrunken(art)) { + return 0; + } + const char *initial_buf = buf; + + // Root. + memcpy(buf, &art->root, sizeof(art->root)); + buf += sizeof(art->root); + + // Node counts. + memcpy(buf, art->capacities, sizeof(art->capacities)); + buf += sizeof(art->capacities); + + // Alignment for leaves. The rest of the nodes are aligned the same way. + size_t align_bytes = + CROARING_ART_ALIGN_SIZE_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); + memset(buf, 0, align_bytes); + buf += align_bytes; + + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + memcpy(buf, art->nodes[t], size); + buf += size; + } + } + + return buf - initial_buf; +} + +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { + if (buf == NULL || art == NULL) { + return 0; + } + const char *initial_buf = buf; + art_init_cleared(art); + + if (maxbytes < sizeof(art->root)) { + return 0; + } + memcpy(&art->root, buf, sizeof(art->root)); + buf += sizeof(art->root); + maxbytes -= sizeof(art->root); + + if (maxbytes < sizeof(art->capacities)) { + return 0; + } + _Static_assert(sizeof(art->first_free) == sizeof(art->capacities), + "first_free is read from capacities"); + memcpy(art->first_free, buf, sizeof(art->capacities)); + memcpy(art->capacities, buf, sizeof(art->capacities)); + buf += sizeof(art->capacities); + maxbytes -= sizeof(art->capacities); + + // Alignment for leaves. The rest of the nodes are aligned the same way. + const char *before_align = buf; + buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); + if (maxbytes < (size_t)(buf - before_align)) { + return 0; + } + maxbytes -= buf - before_align; + + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + if (maxbytes < size) { + return 0; + } + art->nodes[t] = (char *)buf; + buf += size; + maxbytes -= size; + } + } + return buf - initial_buf; } #ifdef __cplusplus @@ -13210,9 +13783,6 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base, * bitset.c * */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif #include #include #include @@ -22725,6 +23295,7 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { /* end file src/roaring.c */ /* begin file src/roaring64.c */ #include +#include #include #include #include @@ -22733,6 +23304,12 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { // For serialization / deserialization // containers.h last to avoid conflict with ROARING_CONTAINER_T. +#define CROARING_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) + +#define CROARING_BITSET_ALIGNMENT 64 + #ifdef __cplusplus using namespace ::roaring::internal; @@ -22747,22 +23324,19 @@ namespace api { typedef struct roaring64_bitmap_s { art_t art; uint8_t flags; + uint64_t first_free; + uint64_t capacity; + container_t **containers; } roaring64_bitmap_t; // Leaf type of the ART used to keep the high 48 bits of each entry. -typedef struct roaring64_leaf_s { - art_val_t _pad; - uint8_t typecode; - container_t *container; -} roaring64_leaf_t; - -// Alias to make it easier to work with, since it's an internal-only type -// anyway. -typedef struct roaring64_leaf_s leaf_t; +// Low 8 bits: typecode +// High 56 bits: container index +typedef roaring64_leaf_t leaf_t; // Iterator struct to hold iteration state. typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *parent; + const roaring64_bitmap_t *r; art_iterator_t art_it; roaring_container_iterator_t container_it; uint64_t high48; // Key that art_it points to. @@ -22777,6 +23351,10 @@ typedef struct roaring64_iterator_s { bool saturated_forward; } roaring64_iterator_t; +static inline bool is_frozen64(const roaring64_bitmap_t *r) { + return r->flags & ROARING_FLAG_FROZEN; +} + // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { @@ -22797,23 +23375,95 @@ static inline uint64_t minimum(uint64_t a, uint64_t b) { return (a < b) ? a : b; } -static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { - leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf->container = container; - leaf->typecode = typecode; - return leaf; +static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { + return (container_index << 8) | typecode; } -static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->typecode = leaf->typecode; - // get_copy_of_container modifies the typecode passed in. - result_leaf->container = get_copy_of_container( - leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); - return result_leaf; +static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } + +static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } + +static inline container_t *get_container(const roaring64_bitmap_t *r, + leaf_t leaf) { + return r->containers[get_index(leaf)]; } -static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } +// Replaces the container of `leaf` with the given container. Returns the +// modified leaf for convenience. +static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, + container_t *container, + uint8_t typecode) { + uint64_t index = get_index(*leaf); + r->containers[index] = container; + *leaf = create_leaf(index, typecode); + return *leaf; +} + +/** + * Extends the array of container pointers. + */ +static void extend_containers(roaring64_bitmap_t *r) { + uint64_t size = r->first_free; + if (size < r->capacity) { + return; + } + uint64_t new_capacity; + if (r->capacity == 0) { + new_capacity = 2; + } else if (r->capacity < 1024) { + new_capacity = 2 * r->capacity; + } else { + new_capacity = 5 * r->capacity / 4; + } + uint64_t increase = new_capacity - r->capacity; + r->containers = + roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); + memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); + r->capacity = new_capacity; +} + +static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { + for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { + if (r->containers[i] == NULL) { + return i; + } + } + return r->capacity; +} + +static uint64_t allocate_index(roaring64_bitmap_t *r) { + uint64_t first_free = r->first_free; + if (first_free == r->capacity) { + extend_containers(r); + } + r->first_free = next_free_container_idx(r); + return first_free; +} + +static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, + uint8_t typecode) { + uint64_t index = allocate_index(r); + r->containers[index] = container; + return create_leaf(index, typecode); +} + +static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { + uint64_t index = get_index(leaf); + r->containers[index] = NULL; + if (index < r->first_free) { + r->first_free = index; + } +} + +// Copies the container referenced by `leaf` from `r1` to `r2`. +static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, leaf_t leaf) { + uint8_t typecode = get_typecode(leaf); + // get_copy_of_container modifies the typecode passed in. + container_t *container = get_copy_of_container( + get_container(r1, leaf), &typecode, /*copy_on_write=*/false); + return add_container(r2, container, typecode); +} static inline int compare_high48(art_key_chunk_t key1[], art_key_chunk_t key2[]) { @@ -22823,10 +23473,10 @@ static inline int compare_high48(art_key_chunk_t key1[], static inline bool roaring64_iterator_init_at_leaf_first( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = 0; - it->container_it = - container_init_iterator(leaf->container, leaf->typecode, &low16); + it->container_it = container_init_iterator(get_container(it->r, leaf), + get_typecode(leaf), &low16); it->value = it->high48 | low16; return (it->has_value = true); } @@ -22834,18 +23484,18 @@ static inline bool roaring64_iterator_init_at_leaf_first( static inline bool roaring64_iterator_init_at_leaf_last( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = 0; - it->container_it = - container_init_iterator_last(leaf->container, leaf->typecode, &low16); + it->container_it = container_init_iterator_last(get_container(it->r, leaf), + get_typecode(leaf), &low16); it->value = it->high48 | low16; return (it->has_value = true); } static inline roaring64_iterator_t *roaring64_iterator_init_at( const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->parent = r; - it->art_it = art_init_iterator(&r->art, first); + it->r = r; + it->art_it = art_init_iterator((art_t *)&r->art, first); it->has_value = it->art_it.value != NULL; if (it->has_value) { if (first) { @@ -22862,8 +23512,11 @@ static inline roaring64_iterator_t *roaring64_iterator_init_at( roaring64_bitmap_t *roaring64_bitmap_create(void) { roaring64_bitmap_t *r = (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - r->art.root = NULL; + art_init_cleared(&r->art); r->flags = 0; + r->capacity = 0; + r->first_free = 0; + r->containers = NULL; return r; } @@ -22873,26 +23526,35 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { } art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf = (leaf_t)*it.value; + if (is_frozen64(r)) { + // Only free the container itself, not the buffer-backed contents + // within. + roaring_free(get_container(r, leaf)); + } else { + container_free(get_container(r, leaf), get_typecode(leaf)); + } art_iterator_next(&it); } - art_free(&r->art); + if (!is_frozen64(r)) { + art_free(&r->art); + } + roaring_free(r->containers); roaring_free(r); } roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint8_t result_typecode = leaf->typecode; + leaf_t leaf = (leaf_t)*it.value; + uint8_t result_typecode = get_typecode(leaf); container_t *result_container = get_copy_of_container( - leaf->container, &result_typecode, /*copy_on_write=*/false); - leaf_t *result_leaf = create_leaf(result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t *)result_leaf); + get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t)result_leaf); art_iterator_next(&it); } return result; @@ -22919,8 +23581,8 @@ static void move_from_roaring32_offset(roaring64_bitmap_t *dst, uint8_t high48[ART_KEY_BYTES]; uint64_t high48_bits = key_base | ((uint64_t)key << 16); split_key(high48_bits, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&dst->art, high48, (art_val_t *)leaf); + leaf_t leaf = add_container(dst, container, typecode); + art_insert(&dst->art, high48, (art_val_t)leaf); } // We stole all the containers, so leave behind a size of zero src->high_low_container.size = 0; @@ -22962,8 +23624,8 @@ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint8_t high48[ART_KEY_BYTES]; split_key(min, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); + leaf_t leaf = add_container(r, container, typecode); + art_insert(&r->art, high48, (art_val_t)leaf); uint64_t gap = container_max - container_min + step - 1; uint64_t increment = gap - (gap % step); @@ -22987,13 +23649,14 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, uint16_t low16, leaf_t *leaf) { if (leaf != NULL) { + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(leaf->container, low16, leaf->typecode, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; + container_add(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); } return leaf; } else { @@ -23002,9 +23665,8 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, container_t *container = container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); assert(ac == container); - leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); - return leaf; + leaf_t new_leaf = add_container(r, container, typecode); + return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); } } @@ -23022,12 +23684,12 @@ bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { int old_cardinality = 0; if (leaf != NULL) { - old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + old_cardinality = container_get_cardinality(get_container(r, *leaf), + get_typecode(*leaf)); } leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); return old_cardinality != new_cardinality; } @@ -23036,22 +23698,22 @@ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - if (context->leaf != NULL && - compare_high48(context->high_bytes, high48) == 0) { + leaf_t *leaf = context->leaf; + if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; + container_add(container1, low16, typecode1, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + leaf = (leaf_t *)art_find(&r->art, high48); context->leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); @@ -23071,17 +23733,19 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void add_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { +static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf != NULL) { + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = container_add_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; + container_t *container2 = + container_add_range(container1, typecode1, min, max, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } return; } @@ -23089,8 +23753,8 @@ static inline void add_range_closed_at(art_t *art, uint8_t *high48, // container_add_range is inclusive, but `container_range_of_ones` is // exclusive. container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf = create_leaf(container, typecode); - art_insert(art, high48, (art_val_t *)leaf); + leaf_t new_leaf = add_container(r, container, typecode); + art_insert(art, high48, (art_val_t)new_leaf); } void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, @@ -23114,22 +23778,22 @@ void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only populate range within one container. - add_range_closed_at(art, min_high48, min_low16, max_low16); + add_range_closed_at(r, art, min_high48, min_low16, max_low16); return; } // Populate a range across containers. Fill intermediate containers // entirely. - add_range_closed_at(art, min_high48, min_low16, 0xffff); + add_range_closed_at(r, art, min_high48, min_low16, 0xffff); uint64_t min_high_bits = min >> 16; uint64_t max_high_bits = max >> 16; for (uint64_t current = min_high_bits + 1; current < max_high_bits; ++current) { uint8_t current_high48[ART_KEY_BYTES]; split_key(current << 16, current_high48); - add_range_closed_at(art, current_high48, 0, 0xffff); + add_range_closed_at(r, art, current_high48, 0, 0xffff); } - add_range_closed_at(art, max_high48, 0, max_low16); + add_range_closed_at(r, art, max_high48, 0, max_low16); } bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { @@ -23137,7 +23801,8 @@ bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { uint16_t low16 = split_key(val, high48); leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); if (leaf != NULL) { - return container_contains(leaf->container, low16, leaf->typecode); + return container_contains(get_container(r, *leaf), low16, + get_typecode(*leaf)); } return false; } @@ -23154,7 +23819,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - art_iterator_t it = art_lower_bound(&r->art, min_high48); + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); if (it.value == NULL || combine_key(it.key, 0) > min) { return false; } @@ -23171,7 +23836,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, return false; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; uint32_t container_min = 0; if (compare_high48(it.key, min_high48) == 0) { container_min = min_low16; @@ -23184,11 +23849,13 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, // For the first and last containers we use container_contains_range, // for the intermediate containers we can use container_is_full. if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(leaf->container, leaf->typecode)) { + if (!container_is_full(get_container(r, leaf), + get_typecode(leaf))) { return false; } - } else if (!container_contains_range(leaf->container, container_min, - container_max, leaf->typecode)) { + } else if (!container_contains_range(get_container(r, leaf), + container_min, container_max, + get_typecode(leaf))) { return false; } prev_high48_bits = current_high48_bits; @@ -23214,24 +23881,24 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return container_contains(context->leaf->container, low16, - context->leaf->typecode); + return container_contains(get_container(r, *context->leaf), low16, + get_typecode(*context->leaf)); } bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t start_rank = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint64_t cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + leaf_t leaf = (leaf_t)*it.value; + uint64_t cardinality = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); if (start_rank + cardinality > rank) { uint32_t uint32_start = 0; uint32_t uint32_rank = rank - start_rank; uint32_t uint32_element = 0; - if (container_select(leaf->container, leaf->typecode, &uint32_start, - uint32_rank, &uint32_element)) { + if (container_select(get_container(r, leaf), get_typecode(leaf), + &uint32_start, uint32_rank, &uint32_element)) { *element = combine_key(it.key, (uint16_t)uint32_element); return true; } @@ -23247,16 +23914,17 @@ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t rank = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - rank += container_get_cardinality(leaf->container, leaf->typecode); + rank += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); } else if (compare_result == 0) { - return rank + - container_rank(leaf->container, leaf->typecode, low16); + return rank + container_rank(get_container(r, leaf), + get_typecode(leaf), low16); } else { return rank; } @@ -23270,16 +23938,17 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t index = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - index += container_get_cardinality(leaf->container, leaf->typecode); + index += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); } else if (compare_result == 0) { - int index16 = - container_get_index(leaf->container, leaf->typecode, low16); + int index16 = container_get_index(get_container(r, leaf), + get_typecode(leaf), low16); if (index16 < 0) { return false; } @@ -23293,31 +23962,31 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, return false; } -static inline leaf_t *containerptr_roaring64_bitmap_remove( - roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { +// Returns true if a container was removed. +static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { if (leaf == NULL) { - return NULL; + return false; } - container_t *container = leaf->container; - uint8_t typecode = leaf->typecode; + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = container_remove(container, low16, typecode, &typecode2); if (container2 != container) { container_free(container, typecode); - leaf->container = container2; - leaf->typecode = typecode2; + replace_container(r, leaf, container2, typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - leaf = (leaf_t *)art_erase(&r->art, high48); - if (leaf != NULL) { - free_leaf(leaf); - } - return NULL; + bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); + assert(erased); + return true; } - return leaf; + return false; } void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { @@ -23339,13 +24008,12 @@ bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { return false; } int old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - if (leaf == NULL) { + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { return true; } int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); return new_cardinality != old_cardinality; } @@ -23358,26 +24026,28 @@ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, if (context->leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. + uint8_t typecode = get_typecode(*context->leaf); + container_t *container = get_container(r, *context->leaf); uint8_t typecode2; container_t *container2 = - container_remove(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, context->leaf, container2, typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { - leaf_t *leaf = (leaf_t *)art_erase(art, high48); container_free(container2, typecode2); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_erase(art, high48, (art_val_t *)&leaf); + assert(erased); + remove_container(r, leaf); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. leaf_t *leaf = (leaf_t *)art_find(art, high48); - context->leaf = - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -23395,23 +24065,26 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void remove_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { +static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf == NULL) { return; } + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = container_remove_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); + container_t *container2 = + container_remove_range(container, typecode, min, max, &typecode2); + if (container2 != container) { + container_free(container, typecode); if (container2 != NULL) { - leaf->container = container2; - leaf->typecode = typecode2; + replace_container(r, leaf, container2, typecode2); } else { - art_erase(art, high48); - free_leaf(leaf); + bool erased = art_erase(art, high48, NULL); + assert(erased); + remove_container(r, *leaf); } } } @@ -23437,21 +24110,23 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only remove a range within one container. - remove_range_closed_at(art, min_high48, min_low16, max_low16); + remove_range_closed_at(r, art, min_high48, min_low16, max_low16); return; } // Remove a range across containers. Remove intermediate containers // entirely. - remove_range_closed_at(art, min_high48, min_low16, 0xffff); + remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); art_iterator_t it = art_upper_bound(art, min_high48); while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r, leaf), get_typecode(leaf)); + remove_container(r, leaf); } - remove_range_closed_at(art, max_high48, 0, max_low16); + remove_range_closed_at(r, art, max_high48, 0, max_low16); } void roaring64_bitmap_clear(roaring64_bitmap_t *r) { @@ -23459,12 +24134,12 @@ void roaring64_bitmap_clear(roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t cardinality = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); + leaf_t leaf = (leaf_t)*it.value; + cardinality += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } return cardinality; @@ -23493,7 +24168,7 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, uint8_t max_high48[ART_KEY_BYTES]; uint16_t max_low16 = split_key(max, max_high48); - art_iterator_t it = art_lower_bound(&r->art, min_high48); + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); while (it.value != NULL) { int max_compare_result = compare_high48(it.key, max_high48); if (max_compare_result > 0) { @@ -23501,23 +24176,22 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, break; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); if (max_compare_result == 0) { // We're at the max high key, add only the range up to the low // 16 bits of max. - cardinality += - container_rank(leaf->container, leaf->typecode, max_low16); + cardinality += container_rank(container, typecode, max_low16); } else { // We're not yet at the max high key, add the full container // range. - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); + cardinality += container_get_cardinality(container, typecode); } if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { // We're at the min high key, remove the range up to the low 16 // bits of min. - cardinality -= - container_rank(leaf->container, leaf->typecode, min_low16 - 1); + cardinality -= container_rank(container, typecode, min_low16 - 1); } art_iterator_next(&it); } @@ -23529,23 +24203,23 @@ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); if (it.value == NULL) { return UINT64_MAX; } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_minimum(leaf->container, leaf->typecode)); + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); } uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); if (it.value == NULL) { return 0; } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_maximum(leaf->container, leaf->typecode)); + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); } bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { @@ -23556,15 +24230,53 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { uint8_t new_typecode; // We don't need to free the existing container if a new one was // created, convert_run_optimize does that internally. - leaf->container = convert_run_optimize(leaf->container, leaf->typecode, - &new_typecode); - leaf->typecode = new_typecode; + container_t *new_container = convert_run_optimize( + get_container(r, *leaf), get_typecode(*leaf), &new_typecode); + replace_container(r, leaf, new_container, new_typecode); has_run_container |= new_typecode == RUN_CONTAINER_TYPE; art_iterator_next(&it); } return has_run_container; } +static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { + uint64_t idx = get_index(*leaf); + if (idx < r->first_free) { + return; + } + r->containers[r->first_free] = get_container(r, *leaf); + r->containers[idx] = NULL; + *leaf = create_leaf(r->first_free, get_typecode(*leaf)); + r->first_free = next_free_container_idx(r); +} + +static inline bool is_shrunken(const roaring64_bitmap_t *r) { + return art_is_shrunken(&r->art) && r->first_free == r->capacity; +} + +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { + size_t freed = art_shrink_to_fit(&r->art); + art_iterator_t it = art_init_iterator(&r->art, true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + freed += container_shrink_to_fit(get_container(r, *leaf), + get_typecode(*leaf)); + move_to_shrink(r, leaf); + art_iterator_next(&it); + } + if (is_shrunken(r)) { + return freed; + } + uint64_t new_capacity = r->first_free; + if (new_capacity < r->capacity) { + r->containers = roaring_realloc(r->containers, + new_capacity * sizeof(container_t *)); + freed += (r->capacity - new_capacity) * sizeof(container_t *); + r->capacity = new_capacity; + } + return freed; +} + /** * (For advanced users.) * Collect statistics about the bitmap @@ -23575,15 +24287,16 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, stat->min_value = roaring64_bitmap_minimum(r); stat->max_value = roaring64_bitmap_maximum(r); - art_iterator_t it = art_init_iterator(&r->art, true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; stat->n_containers++; - uint8_t truetype = get_container_type(leaf->container, leaf->typecode); - uint32_t card = - container_get_cardinality(leaf->container, leaf->typecode); + uint8_t truetype = + get_container_type(get_container(r, leaf), get_typecode(leaf)); + uint32_t card = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); uint32_t sbytes = - container_size_in_bytes(leaf->container, leaf->typecode); + container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); stat->cardinality += card; switch (truetype) { case BITSET_CONTAINER_TYPE: @@ -23609,31 +24322,34 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, } } -static bool roaring64_leaf_internal_validate(const art_val_t *val, - const char **reason) { - leaf_t *leaf = (leaf_t *)val; - return container_internal_validate(leaf->container, leaf->typecode, reason); +static bool roaring64_leaf_internal_validate(const art_val_t val, + const char **reason, + void *context) { + leaf_t leaf = (leaf_t)val; + roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; + return container_internal_validate(get_container(r, leaf), + get_typecode(leaf), reason); } bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) { return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate); + roaring64_leaf_internal_validate, (void *)r); } bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { if (compare_high48(it1.key, it2.key) != 0) { return false; } - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_equals(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { return false; } art_iterator_next(&it1); @@ -23644,8 +24360,8 @@ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { bool it2_present = it2.value != NULL; @@ -23654,10 +24370,11 @@ bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, if (it2_present) { compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_is_subset(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_is_subset( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { return false; } art_iterator_next(&it1); @@ -23684,8 +24401,8 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -23695,19 +24412,20 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_and(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -23726,8 +24444,8 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { uint64_t result = 0; - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -23737,11 +24455,11 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result += - container_and_cardinality(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + result += container_and_cardinality( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -23762,7 +24480,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -23778,7 +24496,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2a: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t leaf2 = (leaf_t)*it2.value; // We do the computation "in place" only when c1 is not a // shared container. Rationale: using a shared container @@ -23786,28 +24504,31 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, // copy and then doing the computation in place which is // likely less efficient than avoiding in place entirely and // always generating a new container. + uint8_t typecode = get_typecode(*leaf1); + container_t *container = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + if (typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and(container, typecode, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } else { container2 = container_iand( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container, typecode, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; + if (container2 != container) { + container_free(container, typecode); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + art_iterator_erase(&it1, NULL); + remove_container(r1, *leaf1); } else { + if (container2 != container) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -23818,10 +24539,11 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (!it2_present || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); - assert(leaf != NULL); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r1, leaf), get_typecode(leaf)); + remove_container(r1, leaf); } else if (compare_result > 0) { // Case 2c: it1 is after it2. art_iterator_lower_bound(&it2, it1.key); @@ -23832,8 +24554,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { bool intersect = false; - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -23843,10 +24565,11 @@ bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - intersect |= container_intersect(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + intersect |= container_intersect( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -23886,8 +24609,8 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -23905,26 +24628,31 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_or( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_or(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); art_iterator_next(&it2); } } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -23945,7 +24673,7 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -23964,22 +24692,23 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_or(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode, - &typecode2); + if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { + container2 = container_or(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } else { - container2 = container_ior( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container2 = container_ior(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r1, leaf1, container2, typecode2); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -23990,9 +24719,9 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -24002,8 +24731,8 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24021,19 +24750,20 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_xor(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24041,13 +24771,15 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -24066,7 +24798,7 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { assert(r1 != r2); art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24085,15 +24817,15 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_xor(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); if (container2 != container1) { // We only free when doing container_xor, not // container_ixor, as ixor frees the original @@ -24102,17 +24834,19 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_ixor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - leaf1->container = container2; - leaf1->typecode = typecode2; if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24125,13 +24859,13 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); if (it1_present) { - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else { - art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); + art_insert(&r1->art, it2.key, (art_val_t)result_leaf); } art_iterator_next(&it2); } @@ -24142,8 +24876,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24158,20 +24892,21 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2b: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = container_andnot( + get_container(r1, *leaf1), get_typecode(*leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24179,8 +24914,9 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, } if (!it2_present || compare_result < 0) { // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if (compare_result > 0) { // Case 2c: it1 is after it2. @@ -24200,7 +24936,7 @@ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24216,15 +24952,15 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + if (typecode1 == SHARED_CONTAINER_TYPE) { container2 = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); if (container2 != container1) { // We only free when doing container_andnot, not // container_iandnot, as iandnot frees the original @@ -24233,19 +24969,19 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_iandnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } - if (container2 != container1) { - leaf1->container = container2; - leaf1->typecode = typecode2; + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24264,38 +25000,39 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } /** - * Flips the leaf at high48 in the range [min, max), returning a new leaf with a - * new container. If the high48 key is not found in the existing bitmap, a new - * container is created. Returns null if the negation results in an empty range. - */ -static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r, - uint8_t high48[], uint32_t min, - uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48); - container_t *container2; + * Flips the leaf at high48 in the range [min, max), adding the result to + * `r2`. If the high48 key is not found in `r1`, a new container is created. + */ +static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); uint8_t typecode2; + container_t *container2; if (leaf1 == NULL) { // No container at this key, create a full container. container2 = container_range_of_ones(min, max, &typecode2); } else if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = - container_not(leaf1->container, leaf1->typecode, &typecode2); + container2 = container_not(get_container(r1, *leaf1), + get_typecode(*leaf1), &typecode2); } else { // Partially flip a container. - container2 = container_not_range(leaf1->container, leaf1->typecode, min, - max, &typecode2); + container2 = + container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), + min, max, &typecode2); } if (container_nonzero_cardinality(container2, typecode2)) { - return create_leaf(container2, typecode2); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, high48, (art_val_t)leaf2); + } else { + container_free(container2, typecode2); } - container_free(container2, typecode2); - return NULL; } /** - * Flips the leaf at high48 in the range [min, max). If the high48 key is not - * found in the bitmap, a new container is created. Deletes the leaf and + * Flips the leaf at high48 in the range [min, max). If the high48 key is + * not found in the bitmap, a new container is created. Deletes the leaf and * associated container if the negation results in an empty range. */ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], @@ -24306,28 +25043,28 @@ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], if (leaf == NULL) { // No container at this key, insert a full container. container2 = container_range_of_ones(min, max, &typecode2); - art_insert(&r->art, high48, - (art_val_t *)create_leaf(container2, typecode2)); + leaf_t new_leaf = add_container(r, container2, typecode2); + art_insert(&r->art, high48, (art_val_t)new_leaf); return; } if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = - container_inot(leaf->container, leaf->typecode, &typecode2); + container2 = container_inot(get_container(r, *leaf), + get_typecode(*leaf), &typecode2); } else { // Partially flip a container. - container2 = container_inot_range(leaf->container, leaf->typecode, min, - max, &typecode2); + container2 = container_inot_range( + get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); } - leaf->container = container2; - leaf->typecode = typecode2; - - if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) { - art_erase(&r->art, high48); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + if (container_nonzero_cardinality(container2, typecode2)) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(&r->art, high48, NULL); + assert(erased); + container_free(container2, typecode2); + remove_container(r, *leaf); } } @@ -24352,20 +25089,21 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); // Copy the containers before min unchanged. while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); art_iterator_next(&it); } - // Flip the range (including non-existent containers!) between min and max. + // Flip the range (including non-existent containers!) between min and + // max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -24380,22 +25118,19 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, max_container = max_low16 + 1; // Exclusive. } - leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key, - min_container, max_container); - if (leaf != NULL) { - art_insert(&r2->art, current_high48_key, (art_val_t *)leaf); - } + roaring64_flip_leaf(r1, r2, current_high48_key, min_container, + max_container); } // Copy the containers after max unchanged. - it = art_upper_bound(&r1->art, max_high48_key); + it = art_upper_bound((art_t *)&r1->art, max_high48_key); while (it.value != NULL) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); art_iterator_next(&it); } @@ -24420,7 +25155,8 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - // Flip the range (including non-existent containers!) between min and max. + // Flip the range (including non-existent containers!) between min and + // max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -24442,7 +25178,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, // Returns the number of distinct high 32-bit entries in the bitmap. static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t high32_count = 0; uint32_t prev_high32 = 0; while (it.value != NULL) { @@ -24471,7 +25207,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint64_t high32_count; size += sizeof(high32_count); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -24480,7 +25216,8 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. + // Write as uint32 the most significant 32 bits of the + // bucket. size += sizeof(prev_high32); // Write the 32-bit Roaring bitmaps representing the least @@ -24502,10 +25239,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { prev_high32 = current_high32; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), leaf->container, - leaf->typecode); + (uint16_t)(current_high32 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } @@ -24536,7 +25273,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, memcpy(buf, &high32_count, sizeof(high32_count)); buf += sizeof(high32_count); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -24546,7 +25283,8 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, uint32_t current_high32 = (uint32_t)(current_high48 >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. + // Write as uint32 the most significant 32 bits of the + // bucket. memcpy(buf, &prev_high32, sizeof(prev_high32)); buf += sizeof(prev_high32); @@ -24569,10 +25307,10 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, prev_high32 = current_high32; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), leaf->container, - leaf->typecode); + (uint16_t)(current_high48 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } @@ -24623,8 +25361,8 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, buf += sizeof(high32); read_bytes += sizeof(high32); - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -24679,8 +25417,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( } previous_high32 = high32; - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -24722,16 +25460,364 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( return r; } +// Returns an "element count" for the given container. This has a different +// meaning for each container type, but the purpose is the minimal information +// required to serialize the container metadata. +static inline uint32_t container_get_element_count(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return ((bitset_container_t *)c)->cardinality; + } + case ARRAY_CONTAINER_TYPE: { + return ((array_container_t *)c)->cardinality; + } + case RUN_CONTAINER_TYPE: { + return ((run_container_t *)c)->n_runs; + } + default: { + assert(false); + roaring_unreachable; + return 0; + } + } +} + +static inline size_t container_get_frozen_size(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + case ARRAY_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(uint16_t); + } + case RUN_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(rle16_t); + } + default: { + assert(false); + roaring_unreachable; + return 0; + } + } +} + +uint64_t align_size(uint64_t size, uint64_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} + +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { + if (!is_shrunken(r)) { + return 0; + } + // Flags. + uint64_t size = sizeof(r->flags); + // Container count. + size += sizeof(r->capacity); + // Container element counts. + size += r->capacity * sizeof(uint16_t); + // Total container sizes. + size += 3 * sizeof(uint64_t); + // ART (8 byte aligned). + size = align_size(size, 8); + size += art_size_in_bytes(&r->art); + + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + total_sizes[typecode] += + container_get_frozen_size(get_container(r, leaf), typecode); + art_iterator_next(&it); + } + // Containers (aligned). + size = align_size(size, CROARING_BITSET_ALIGNMENT); + size += total_sizes[BITSET_CONTAINER_TYPE]; + size = align_size(size, alignof(rle16_t)); + size += total_sizes[ARRAY_CONTAINER_TYPE]; + size = align_size(size, alignof(uint16_t)); + size += total_sizes[RUN_CONTAINER_TYPE]; + // Padding to make overall size a multiple of required alignment. + size = align_size(size, CROARING_BITSET_ALIGNMENT); + return size; +} + +static inline void container_frozen_serialize(const container_t *container, + uint8_t typecode, + uint64_t **bitsets, + uint16_t **arrays, + rle16_t **runs) { + size_t size = container_get_frozen_size(container, typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)container; + memcpy(*bitsets, bitset->words, size); + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)container; + memcpy(*arrays, array->array, size); + *arrays += container_get_element_count(container, typecode); + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)container; + memcpy(*runs, run->runs, size); + *runs += container_get_element_count(container, typecode); + break; + } + default: { + assert(false); + roaring_unreachable; + } + } +} + +static inline char *pad_align(char *buf, const char *initial_buf, + size_t alignment) { + uint64_t buf_size = buf - initial_buf; + uint64_t pad = align_size(buf_size, alignment) - buf_size; + memset(buf, 0, pad); + return buf + pad; +} + +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf) { + if (buf == NULL) { + return 0; + } + if (!is_shrunken(r)) { + return 0; + } + const char *initial_buf = buf; + + // Flags. + memcpy(buf, &r->flags, sizeof(r->flags)); + buf += sizeof(r->flags); + + // Container count. + memcpy(buf, &r->capacity, sizeof(r->capacity)); + buf += sizeof(r->capacity); + + // Container element counts. + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + + uint32_t elem_count = container_get_element_count(container, typecode); + uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); + memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); + buf += sizeof(compressed_elem_count); + + total_sizes[typecode] += container_get_frozen_size(container, typecode); + art_iterator_next(&it); + } + + // Total container sizes. + memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + + // ART. + buf = pad_align(buf, initial_buf, 8); + buf += art_serialize(&r->art, buf); + + // Containers (aligned). + // Runs before arrays as run elements are larger than array elements and + // smaller than bitset elements. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + uint64_t *bitsets = (uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(rle16_t)); + rle16_t *runs = (rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = pad_align(buf, initial_buf, alignof(uint16_t)); + uint16_t *arrays = (uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + + it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + container_frozen_serialize(container, typecode, &bitsets, &arrays, + &runs); + art_iterator_next(&it); + } + + // Padding to make overall size a multiple of required alignment. + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + + return buf - initial_buf; +} + +static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, + const uint64_t **bitsets, + const uint16_t **arrays, + const rle16_t **runs) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *c = (bitset_container_t *)roaring_malloc( + sizeof(bitset_container_t)); + c->cardinality = elem_count; + c->words = (uint64_t *)*bitsets; + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + return (container_t *)c; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *c = + (array_container_t *)roaring_malloc(sizeof(array_container_t)); + c->cardinality = elem_count; + c->capacity = elem_count; + c->array = (uint16_t *)*arrays; + *arrays += elem_count; + return (container_t *)c; + } + case RUN_CONTAINER_TYPE: { + run_container_t *c = + (run_container_t *)roaring_malloc(sizeof(run_container_t)); + c->n_runs = elem_count; + c->capacity = elem_count; + c->runs = (rle16_t *)*runs; + *runs += elem_count; + return (container_t *)c; + } + default: { + assert(false); + roaring_unreachable; + return NULL; + } + } +} + +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes) { + if (buf == NULL) { + return NULL; + } + + roaring64_bitmap_t *r = roaring64_bitmap_create(); + + // Flags. + if (maxbytes < sizeof(r->flags)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->flags, buf, sizeof(r->flags)); + buf += sizeof(r->flags); + maxbytes -= sizeof(r->flags); + r->flags |= ROARING_FLAG_FROZEN; + + // Container count. + if (maxbytes < sizeof(r->capacity)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->capacity, buf, sizeof(r->capacity)); + buf += sizeof(r->capacity); + maxbytes -= sizeof(r->capacity); + + r->containers = + (container_t *)roaring_malloc(r->capacity * sizeof(container_t *)); + + // Container element counts. + if (maxbytes < r->capacity * sizeof(uint16_t)) { + roaring64_bitmap_free(r); + return NULL; + } + const char *elem_counts = buf; + buf += r->capacity * sizeof(uint16_t); + maxbytes -= r->capacity * sizeof(uint16_t); + + // Total container sizes. + uint64_t total_sizes[4]; + if (maxbytes < sizeof(uint64_t) * 3) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + + // ART (8 byte aligned). + buf = CROARING_ALIGN_BUF(buf, 8); + size_t art_size = art_frozen_view(buf, maxbytes, &r->art); + if (art_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } + buf += art_size; + maxbytes -= art_size; + + // Containers (aligned). + const char *before_containers = buf; + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + const uint64_t *bitsets = (const uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); + const rle16_t *runs = (const rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); + const uint16_t *arrays = (const uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + if (maxbytes < (uint64_t)(buf - before_containers)) { + roaring64_bitmap_free(r); + return NULL; + } + maxbytes -= buf - before_containers; + + // Deserialize in ART iteration order. + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + for (size_t i = 0; it.value != NULL; ++i) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + + uint16_t compressed_elem_count; + memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), + sizeof(compressed_elem_count)); + uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; + + // The container index is unrelated to the iteration order. + uint64_t index = get_index(leaf); + r->containers[index] = container_frozen_view(typecode, elem_count, + &bitsets, &arrays, &runs); + + art_iterator_next(&it); + } + + // Padding to make overall size a multiple of required alignment. + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + + return r; +} + bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { uint64_t high48 = combine_key(it.key, 0); uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; uint32_t low32 = high48; - leaf_t *leaf = (leaf_t *)it.value; - if (!container_iterate64(leaf->container, leaf->typecode, low32, - iterator, high32, ptr)) { + leaf_t leaf = (leaf_t)*it.value; + if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), + low32, iterator, high32, ptr)) { return false; } art_iterator_next(&it); @@ -24791,12 +25877,12 @@ bool roaring64_iterator_advance(roaring64_iterator_t *it) { if (it->saturated_forward) { return (it->has_value = false); } - roaring64_iterator_init_at(it->parent, it, /*first=*/true); + roaring64_iterator_init_at(it->r, it, /*first=*/true); return it->has_value; } - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(leaf->container, leaf->typecode, + if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -24814,12 +25900,12 @@ bool roaring64_iterator_previous(roaring64_iterator_t *it) { // Saturated backward. return (it->has_value = false); } - roaring64_iterator_init_at(it->parent, it, /*first=*/false); + roaring64_iterator_init_at(it->r, it, /*first=*/false); return it->has_value; } - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(leaf->container, leaf->typecode, + if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -24837,8 +25923,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint16_t val_low16 = split_key(val, val_high48); if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a key - // equal or greater. + // beyond the ART altogether), so we need to move to a leaf with a + // key equal or greater. if (!art_iterator_lower_bound(&it->art_it, val_high48)) { // Only smaller keys found. it->saturated_forward = true; @@ -24849,13 +25935,13 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found in - // this container. - leaf_t *leaf = (leaf_t *)it->art_it.value; + // We're at equal high bits, check if a suitable value can be found + // in this container. + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound(leaf->container, leaf->typecode, - &it->container_it, &low16, - val_low16)) { + if (container_iterator_lower_bound( + get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16, val_low16)) { it->value = it->high48 | low16; return (it->has_value = true); } @@ -24866,8 +25952,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } } - // We're at a leaf with high bits greater than `val`, so the first entry in - // this container is our result. + // We're at a leaf with high bits greater than `val`, so the first entry + // in this container is our result. return roaring64_iterator_init_at_leaf_first(it); } @@ -24876,15 +25962,15 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t consumed = 0; while (it->has_value && consumed < count) { uint32_t container_consumed; - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; uint32_t container_count = UINT32_MAX; if (count - consumed < (uint64_t)UINT32_MAX) { container_count = count - consumed; } bool has_value = container_iterator_read_into_uint64( - leaf->container, leaf->typecode, &it->container_it, it->high48, buf, - container_count, &container_consumed, &low16); + get_container(it->r, leaf), get_typecode(leaf), &it->container_it, + it->high48, buf, container_count, &container_consumed, &low16); consumed += container_consumed; buf += container_consumed; if (has_value) { diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 7caeb1e..9965870 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-26T22:28:04Z +// Created by amalgamation.sh on 2025-02-28T15:35:21Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.2.3" +#define ROARING_VERSION "4.3.0" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 3 + ROARING_VERSION_MINOR = 3, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -119,20 +119,6 @@ enum { #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif -#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) -#undef _POSIX_C_SOURCE -#endif - -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) - -#ifdef __illumos__ -#ifndef __EXTENSIONS__ -#define __EXTENSIONS__ -#endif // __EXTENSIONS__ -#endif - #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above @@ -2379,7 +2365,7 @@ namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; -typedef struct roaring64_leaf_s roaring64_leaf_t; +typedef uint64_t roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** @@ -2674,6 +2660,12 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); +/** + * Shrinks internal arrays to eliminate any unused capacity. Returns the number + * of bytes freed. + */ +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r); + /** * (For advanced users.) * Collect statistics about the bitmap @@ -2926,6 +2918,53 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); +/** + * Returns the number of bytes required to serialize this bitmap in a "frozen" + * format. This is not compatible with any other serialization formats. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + */ +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r); + +/** + * Serializes the bitmap in a "frozen" format. The given buffer must be at least + * `roaring64_bitmap_frozen_size_in_bytes()` in size. Returns the number of + * bytes used for serialization. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + * + * The frozen format is optimized for speed of (de)serialization, as well as + * allowing the user to create a bitmap based on a memory mapped file, which is + * possible because the format mimics the memory layout of the bitmap. + * + * Because the format mimics the memory layout of the bitmap, the format is not + * fixed across releases of Roaring Bitmaps, and may change in future releases. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf); + +/** + * Creates a readonly bitmap that is a view of the given buffer. The buffer + * must be created with `roaring64_bitmap_frozen_serialize()`, and must be + * aligned by 64 bytes. + * + * Returns NULL if deserialization fails. + * + * The returned bitmap must only be used in a readonly manner. The bitmap must + * be freed using `roaring64_bitmap_free()` as normal. The backing buffer must + * only be freed after the bitmap. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes); + /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call. From d3658bf1861b8992ca6576fb465198e59dfb3dea Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Sun, 29 Jun 2025 22:36:46 +0200 Subject: [PATCH 11/11] Version v4.2.3 --- pyroaring/croaring_version.pxi | 2 +- pyroaring/roaring.c | 3146 +++++++++++--------------------- pyroaring/roaring.h | 77 +- 3 files changed, 1050 insertions(+), 2175 deletions(-) diff --git a/pyroaring/croaring_version.pxi b/pyroaring/croaring_version.pxi index 007b74b..0e32cf3 100644 --- a/pyroaring/croaring_version.pxi +++ b/pyroaring/croaring_version.pxi @@ -1 +1 @@ -__croaring_version__ = "v4.3.0" \ No newline at end of file +__croaring_version__ = "v4.2.3" \ No newline at end of file diff --git a/pyroaring/roaring.c b/pyroaring/roaring.c index 2e76a6d..07a5f2b 100644 --- a/pyroaring/roaring.c +++ b/pyroaring/roaring.c @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-28T15:35:21Z +// Created by amalgamation.sh on 2025-02-26T22:28:04Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -6758,8 +6758,8 @@ void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); * chunks _differ_. This means that if there are two entries with different * high 48 bits, then there is only one inner node containing the common key * prefix, and two leaves. - * * Mostly pointer-free: nodes are referred to by index rather than pointer, - * so that the structure can be deserialized with a backing buffer. + * * Intrusive leaves: the leaf struct is included in user values. This removes + * a layer of indirection. */ // Fixed length of keys in the ART. All keys are assumed to be of this length. @@ -6772,33 +6772,25 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; - -// Internal node reference type. Contains the node typecode in the low 8 bits, -// and the index in the relevant node array in the high 48 bits. Has a value of -// CROARING_ART_NULL_REF when pointing to a non-existent node. -typedef uint64_t art_ref_t; - -typedef void art_node_t; +typedef struct art_node_s art_node_t; /** - * The ART is empty when root is a null ref. - * - * Each node type has its own dynamic array of node structs, indexed by - * art_ref_t. The arrays are expanded as needed, and shrink only when - * `shrink_to_fit` is called. + * Wrapper to allow an empty tree. */ typedef struct art_s { - art_ref_t root; - - // Indexed by node typecode, thus 1 larger than they need to be for - // convenience. `first_free` indicates the index where the first free node - // lives, which may be equal to the capacity. - uint64_t first_free[6]; - uint64_t capacities[6]; - art_node_t *nodes[6]; + art_node_t *root; } art_t; -typedef uint64_t art_val_t; +/** + * Values inserted into the tree have to be cast-able to art_val_t. This + * improves performance by reducing indirection. + * + * NOTE: Value pointers must be unique! This is because each value struct + * contains the key corresponding to the value. + */ +typedef struct art_val_s { + art_key_chunk_t key[ART_KEY_BYTES]; +} art_val_t; /** * Compares two keys, returns their relative order: @@ -6810,21 +6802,14 @@ int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]); /** - * Initializes the ART. - */ -void art_init_cleared(art_t *art); - -/** - * Inserts the given key and value. Returns a pointer to the value inserted, - * valid as long as the ART is not modified. + * Inserts the given key and value. */ -art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val); +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); /** - * Returns true if a value was erased. Sets `*erased_val` to the value erased, - * if any. + * Returns the value erased, NULL if not found. */ -bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val); +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); /** * Returns the value associated with the given key, NULL if not found. @@ -6837,39 +6822,42 @@ art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); bool art_is_empty(const art_t *art); /** - * Frees the contents of the ART. Should not be called when using - * `art_deserialize_frozen_safe`. + * Frees the nodes of the ART except the values, which the user is expected to + * free. */ void art_free(art_t *art); +/** + * Returns the size in bytes of the ART. Includes size of pointers to values, + * but not the values themselves. + */ +size_t art_size_in_bytes(const art_t *art); + /** * Prints the ART using printf, useful for debugging. */ void art_printf(const art_t *art); /** - * Callback for validating the value stored in a leaf. `context` is a - * user-provided value passed to the callback without modification. + * Callback for validating the value stored in a leaf. * * Should return true if the value is valid, false otherwise * If false is returned, `*reason` should be set to a static string describing * the reason for the failure. */ -typedef bool (*art_validate_cb_t)(const art_val_t val, const char **reason, - void *context); +typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason); /** - * Validate the ART tree, ensuring it is internally consistent. `context` is a - * user-provided value passed to the callback without modification. + * Validate the ART tree, ensuring it is internally consistent. */ bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb, void *context); + art_validate_cb_t validate_cb); /** * ART-internal iterator bookkeeping. Users should treat this as an opaque type. */ typedef struct art_iterator_frame_s { - art_ref_t ref; + art_node_t *node; uint8_t index_in_node; } art_iterator_frame_t; @@ -6881,8 +6869,6 @@ typedef struct art_iterator_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t *value; - art_t *art; - uint8_t depth; // Key depth uint8_t frame; // Node depth @@ -6896,19 +6882,19 @@ typedef struct art_iterator_s { * depending on `first`. The iterator is not valid if there are no entries in * the ART. */ -art_iterator_t art_init_iterator(art_t *art, bool first); +art_iterator_t art_init_iterator(const art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than * the given key, if it exists. */ -art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key); +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); /** * Returns an initialized iterator positioned at a key greater than the given * key, if it exists. */ -art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key); +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); /** * The following iterator movement functions return true if a new entry was @@ -6927,49 +6913,14 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, /** * Insert the value and positions the iterator at the key. */ -void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, - art_val_t val); +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val); /** * Erase the value pointed at by the iterator. Moves the iterator to the next - * leaf. - * Returns true if a value was erased. Sets `*erased_val` to the value erased, - * if any. - */ -bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); - -/** - * Shrinks the internal arrays in the ART to remove any unused elements. Returns - * the number of bytes freed. - */ -size_t art_shrink_to_fit(art_t *art); - -/** - * Returns true if the ART has no unused elements. - */ -bool art_is_shrunken(const art_t *art); - -/** - * Returns the serialized size in bytes. - * Requires `art_shrink_to_fit` to be called first. - */ -size_t art_size_in_bytes(const art_t *art); - -/** - * Serializes the ART and returns the number of bytes written. Returns 0 on - * error. Requires `art_shrink_to_fit` to be called first. - */ -size_t art_serialize(const art_t *art, char *buf); - -/** - * Deserializes the ART from a serialized buffer, reading up to `maxbytes` - * bytes. Returns 0 on error. Requires `buf` to be 8 byte aligned. - * - * An ART deserialized in this way should only be used in a readonly context.The - * underlying buffer must not be freed before the ART. `art_free` should not be - * called on the ART deserialized in this way. + * leaf. Returns the value erased or NULL if nothing was erased. */ -size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art); +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); #ifdef __cplusplus } // extern "C" @@ -9151,36 +9102,37 @@ CROARING_UNTARGET_AVX512 #endif/* end file src/array_util.c */ /* begin file src/art/art.c */ #include -#include #include #include -#define CROARING_ART_NULL_REF 0 - -#define CROARING_ART_LEAF_TYPE 1 -#define CROARING_ART_NODE4_TYPE 2 -#define CROARING_ART_NODE16_TYPE 3 -#define CROARING_ART_NODE48_TYPE 4 -#define CROARING_ART_NODE256_TYPE 5 - -#define CROARING_ART_MIN_TYPE CROARING_ART_LEAF_TYPE -#define CROARING_ART_MAX_TYPE CROARING_ART_NODE256_TYPE +#define CROARING_ART_NODE4_TYPE 0 +#define CROARING_ART_NODE16_TYPE 1 +#define CROARING_ART_NODE48_TYPE 2 +#define CROARING_ART_NODE256_TYPE 3 +#define CROARING_ART_NUM_TYPES 4 // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 -#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) -#define CROARING_ART_ALIGN_BUF(buf, alignment) \ - (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) +// We use the least significant bit of node pointers to indicate whether a node +// is a leaf or an inner node. This is never surfaced to the user. +// +// Using pointer tagging to indicate leaves not only saves a bit of memory by +// sparing the typecode, but also allows us to use an intrusive leaf struct. +// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon +// deallocation of the ART, we know not to free the leaves without having to +// dereference the leaf pointers. +// +// All internal operations on leaves should use CROARING_CAST_LEAF before using +// the leaf. The only places that use CROARING_SET_LEAF are locations where a +// field is directly assigned to a leaf pointer. After using CROARING_SET_LEAF, +// the leaf should be treated as a node of unknown type. +#define CROARING_IS_LEAF(p) (((uintptr_t)(p) & 1)) +#define CROARING_SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) +#define CROARING_CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) -// Gives the byte difference needed to align the current buffer to the -// alignment, relative to the start of the buffer. -#define CROARING_ART_ALIGN_SIZE_RELATIVE(buf_cur, buf_start, alignment) \ - ((((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) - \ - (ptrdiff_t)((buf_cur) - (buf_start))) +#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) #ifdef __cplusplus extern "C" { @@ -9190,20 +9142,30 @@ namespace internal { typedef uint8_t art_typecode_t; -typedef struct art_leaf_s { - union { - struct { - art_key_chunk_t key[ART_KEY_BYTES]; - art_val_t val; - }; - uint64_t next_free; - }; -} art_leaf_t; +// Aliasing with a "leaf" naming so that its purpose is clearer in the context +// of the trie internals. +typedef art_val_t art_leaf_t; + +typedef struct art_internal_validate_s { + const char **reason; + art_validate_cb_t validate_cb; + + int depth; + art_key_chunk_t current_key[ART_KEY_BYTES]; +} art_internal_validate_t; + +// Set the reason message, and return false for convenience. +static inline bool art_validate_fail(const art_internal_validate_t *validate, + const char *msg) { + *validate->reason = msg; + return false; +} // Inner node, with prefix. // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { + art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; } art_inner_node_t; @@ -9212,232 +9174,119 @@ typedef struct art_inner_node_s { // Node4: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node4_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - uint8_t keys[4]; - art_ref_t children[4]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + uint8_t keys[4]; + art_node_t *children[4]; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - uint8_t keys[16]; - art_ref_t children[16]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + uint8_t keys[16]; + art_node_t *children[16]; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != // CROARING_ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct // indexing. typedef struct art_node48_s { - union { - struct { - art_inner_node_t base; - uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits - // are used. - uint64_t available_children; - uint8_t keys[256]; - art_ref_t children[48]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint8_t count; + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits are used. + uint64_t available_children; + uint8_t keys[256]; + art_node_t *children[48]; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if // children[i] != NULL. typedef struct art_node256_s { - union { - struct { - art_inner_node_t base; - uint16_t count; - art_ref_t children[256]; - }; - uint64_t next_free; - }; + art_inner_node_t base; + uint16_t count; + art_node_t *children[256]; } art_node256_t; -// Size of each node type, indexed by typecode for convenience. -static const size_t ART_NODE_SIZES[] = { - 0, - sizeof(art_leaf_t), - sizeof(art_node4_t), - sizeof(art_node16_t), - sizeof(art_node48_t), - sizeof(art_node256_t), -}; - // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { - art_ref_t child; + art_node_t *child; uint8_t index; art_key_chunk_t key_chunk; } art_indexed_child_t; -typedef struct art_internal_validate_s { - const char **reason; - art_validate_cb_t validate_cb; - void *context; - - int depth; - art_key_chunk_t current_key[ART_KEY_BYTES]; -} art_internal_validate_t; - -// Set the reason message, and return false for convenience. -static inline bool art_validate_fail(const art_internal_validate_t *validate, - const char *msg) { - *validate->reason = msg; - return false; -} - -static inline art_ref_t art_to_ref(uint64_t index, art_typecode_t typecode) { - return ((art_ref_t)index) << 16 | typecode; -} - -static inline uint64_t art_ref_index(art_ref_t ref) { - return ((uint64_t)ref) >> 16; -} - -static inline art_typecode_t art_ref_typecode(art_ref_t ref) { - return (art_typecode_t)ref; -} - -/** - * Gets a pointer to a node from its reference. The pointer only remains valid - * under non-mutating operations. If any mutating operations occur, this - * function should be called again to get a valid pointer to the node. - */ -static art_node_t *art_deref(const art_t *art, art_ref_t ref) { - assert(ref != CROARING_ART_NULL_REF); - art_typecode_t typecode = art_ref_typecode(ref); - return (art_node_t *)((char *)art->nodes[typecode] + - art_ref_index(ref) * ART_NODE_SIZES[typecode]); -} - -static inline art_node_t *art_get_node(const art_t *art, uint64_t index, - art_typecode_t typecode) { - return art_deref(art, art_to_ref(index, typecode)); -} - -static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, - art_typecode_t typecode) { - art_node_t *nodes = art->nodes[typecode]; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_leaf_t *)node - (art_leaf_t *)nodes; - case CROARING_ART_NODE4_TYPE: - return (art_node4_t *)node - (art_node4_t *)nodes; - case CROARING_ART_NODE16_TYPE: - return (art_node16_t *)node - (art_node16_t *)nodes; - case CROARING_ART_NODE48_TYPE: - return (art_node48_t *)node - (art_node48_t *)nodes; - case CROARING_ART_NODE256_TYPE: - return (art_node256_t *)node - (art_node256_t *)nodes; - default: - assert(false); - return 0; - } +static inline bool art_is_leaf(const art_node_t *node) { + return CROARING_IS_LEAF(node); } -/** - * Creates a reference from a pointer. - */ -static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, - art_typecode_t typecode) { - return art_to_ref(art_get_index(art, node, typecode), typecode); +static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { + memcpy(leaf->key, key, ART_KEY_BYTES); } -static inline bool art_is_leaf(art_ref_t ref) { - return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; +static inline uint8_t art_get_type(const art_inner_node_t *node) { + return node->typecode; } static inline void art_init_inner_node(art_inner_node_t *node, + art_typecode_t typecode, const art_key_chunk_t prefix[], uint8_t prefix_size) { + node->typecode = typecode; node->prefix_size = prefix_size; memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); } -static void art_node_free(art_t *art, art_node_t *node, - art_typecode_t typecode); - -static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); +static void art_free_node(art_node_t *node); // ===================== Start of node-specific functions ====================== -static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], - art_val_t val) { - uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); - art_leaf_t *leaf = - ((art_leaf_t *)art->nodes[CROARING_ART_LEAF_TYPE]) + index; - memcpy(leaf->key, key, ART_KEY_BYTES); - leaf->val = val; - return art_to_ref(index, CROARING_ART_LEAF_TYPE); -} - -static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { - leaf->next_free = next_free; -} - -static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node16_t *art_node16_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node48_t *art_node48_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node256_t *art_node256_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, - art_ref_t child, uint8_t key); -static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, - art_ref_t child, uint8_t key); +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key); -static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); - art_node4_t *node = - ((art_node4_t *)art->nodes[CROARING_ART_NODE4_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE4_TYPE, prefix, + prefix_size); node->count = 0; return node; } -static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node4(art_node4_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); } -static inline art_ref_t art_node4_find_child(const art_node4_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key) { if (node->count < 4) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9450,26 +9299,26 @@ static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_ref_t)); + after * sizeof(art_node_t *)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } art_node16_t *new_node = - art_node16_create(art, node->base.prefix, node->base.prefix_size); + art_node16_create(node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node16_insert(art, new_node, node->children[i], node->keys[i]); + art_node16_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); - return art_node16_insert(art, new_node, child, key); + roaring_free(node); + return art_node16_insert(new_node, child, key); } -static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, - art_key_chunk_t key_chunk) { +static inline art_node_t *art_node4_erase(art_node4_t *node, + art_key_chunk_t key_chunk) { int idx = -1; for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { @@ -9477,18 +9326,17 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, } } if (idx == -1) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } if (node->count == 2) { // Only one child remains after erasing, so compress the path by // removing this node. uint8_t other_idx = idx ^ 1; - art_ref_t remaining_child = node->children[other_idx]; + art_node_t *remaining_child = node->children[other_idx]; art_key_chunk_t remaining_child_key = node->keys[other_idx]; if (!art_is_leaf(remaining_child)) { // Correct the prefix of the child node. - art_inner_node_t *inner_node = - (art_inner_node_t *)art_deref(art, remaining_child); + art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; memmove(inner_node->prefix + node->base.prefix_size + 1, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, @@ -9496,7 +9344,7 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + roaring_free(node); return remaining_child; } // Shift other keys to maintain sorted order. @@ -9504,14 +9352,14 @@ static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, memmove(node->keys + idx, node->keys + idx + 1, after_next * sizeof(art_key_chunk_t)); memmove(node->children + idx, node->children + idx + 1, - after_next * sizeof(art_ref_t)); + after_next * sizeof(art_node_t *)); node->count--; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return (art_node_t *)node; } static inline void art_node4_replace(art_node4_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9525,7 +9373,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9542,7 +9390,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9555,7 +9403,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9575,15 +9423,14 @@ static inline art_indexed_child_t art_node4_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_internal_validate_at(const art_t *art, art_ref_t ref, +static bool art_internal_validate_at(const art_node_t *node, art_internal_validate_t validator); -static bool art_node4_internal_validate(const art_t *art, - const art_node4_t *node, +static bool art_node4_internal_validate(const art_node4_t *node, art_internal_validate_t validator) { if (node->count == 0) { return art_validate_fail(&validator, "Node4 has no children"); @@ -9610,41 +9457,41 @@ static bool art_node4_internal_validate(const art_t *art, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } return true; } -static art_node16_t *art_node16_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); - art_node16_t *node = - ((art_node16_t *)art->nodes[CROARING_ART_NODE16_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE16_TYPE, prefix, + prefix_size); node->count = 0; return node; } -static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node16(art_node16_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); } -static inline art_ref_t art_node16_find_child(const art_node16_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key) { if (node->count < 16) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -9657,24 +9504,24 @@ static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_ref_t)); + after * sizeof(art_node_t *)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return (art_node_t *)node; } art_node48_t *new_node = - art_node48_create(art, node->base.prefix, node->base.prefix_size); + art_node48_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 16; ++i) { - art_node48_insert(art, new_node, node->children[i], node->keys[i]); + art_node48_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); - return art_node48_insert(art, new_node, child, key); + roaring_free(node); + return art_node48_insert(new_node, child, key); } -static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, - uint8_t key_chunk) { +static inline art_node_t *art_node16_erase(art_node16_t *node, + uint8_t key_chunk) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { // Shift other keys to maintain sorted order. @@ -9682,28 +9529,28 @@ static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, memmove(node->keys + i, node->keys + i + 1, after_next * sizeof(key_chunk)); memmove(node->children + i, node->children + i + 1, - after_next * sizeof(art_ref_t)); + after_next * sizeof(art_node_t *)); node->count--; break; } } if (node->count > 4) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return (art_node_t *)node; } art_node4_t *new_node = - art_node4_create(art, node->base.prefix, node->base.prefix_size); + art_node4_create(node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node4_insert(art, new_node, node->children[i], node->keys[i]); + art_node4_insert(new_node, node->children[i], node->keys[i]); } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE4_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node16_replace(art_node16_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { for (uint8_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -9717,7 +9564,7 @@ static inline art_indexed_child_t art_node16_next_child( art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9734,7 +9581,7 @@ static inline art_indexed_child_t art_node16_prev_child( index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9747,7 +9594,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9767,12 +9614,11 @@ static inline art_indexed_child_t art_node16_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node16_internal_validate(const art_t *art, - const art_node16_t *node, +static bool art_node16_internal_validate(const art_node16_t *node, art_internal_validate_t validator) { if (node->count <= 4) { return art_validate_fail(&validator, "Node16 has too few children"); @@ -9795,20 +9641,18 @@ static bool art_node16_internal_validate(const art_t *art, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } return true; } -static art_node48_t *art_node48_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); - art_node48_t *node = - ((art_node48_t *)art->nodes[CROARING_ART_NODE48_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE48_TYPE, prefix, + prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; for (size_t i = 0; i < 256; ++i) { @@ -9817,22 +9661,29 @@ static art_node48_t *art_node48_create(art_t *art, return node; } -static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node48(art_node48_t *node) { + uint64_t used_children = + (node->available_children) ^ CROARING_NODE48_AVAILABLE_CHILDREN_MASK; + while (used_children != 0) { + // We checked above that used_children is not zero + uint8_t child_idx = roaring_trailing_zeroes(used_children); + art_free_node(node->children[child_idx]); + used_children &= ~(UINT64_C(1) << child_idx); + } + roaring_free(node); } -static inline art_ref_t art_node48_find_child(const art_node48_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { return node->children[val_idx]; } - return CROARING_ART_NULL_REF; + return NULL; } -static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key) { if (node->count < 48) { // node->available_children is only zero when the node is full (count == // 48), we just checked count < 48 @@ -9841,48 +9692,48 @@ static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, node->children[val_idx] = child; node->count++; node->available_children &= ~(UINT64_C(1) << val_idx); - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } art_node256_t *new_node = - art_node256_create(art, node->base.prefix, node->base.prefix_size); + art_node256_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { uint8_t val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node256_insert(art, new_node, node->children[val_idx], i); + art_node256_insert(new_node, node->children[val_idx], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); - return art_node256_insert(art, new_node, child, key); + roaring_free(node); + return art_node256_insert(new_node, child, key); } -static inline art_ref_t art_node48_erase(art_t *art, art_node48_t *node, - uint8_t key_chunk) { +static inline art_node_t *art_node48_erase(art_node48_t *node, + uint8_t key_chunk) { uint8_t val_idx = node->keys[key_chunk]; if (val_idx == CROARING_ART_NODE48_EMPTY_VAL) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } node->keys[key_chunk] = CROARING_ART_NODE48_EMPTY_VAL; node->available_children |= UINT64_C(1) << val_idx; node->count--; if (node->count > 16) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return (art_node_t *)node; } art_node16_t *new_node = - art_node16_create(art, node->base.prefix, node->base.prefix_size); + art_node16_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node16_insert(art, new_node, node->children[val_idx], i); + art_node16_insert(new_node, node->children[val_idx], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE16_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node48_replace(art_node48_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { uint8_t val_idx = node->keys[key_chunk]; assert(val_idx != CROARING_ART_NODE48_EMPTY_VAL); node->children[val_idx] = new_child; @@ -9900,7 +9751,7 @@ static inline art_indexed_child_t art_node48_next_child( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -9919,7 +9770,7 @@ static inline art_indexed_child_t art_node48_prev_child( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -9927,7 +9778,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -9947,12 +9798,11 @@ static inline art_indexed_child_t art_node48_lower_bound( return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node48_internal_validate(const art_t *art, - const art_node48_t *node, +static bool art_node48_internal_validate(const art_node48_t *node, art_internal_validate_t validator) { if (node->count <= 16) { return art_validate_fail(&validator, "Node48 has too few children"); @@ -9969,8 +9819,8 @@ static bool art_node48_internal_validate(const art_t *art, &validator, "Node48 keys point to the same child index"); } - art_ref_t child = node->children[child_idx]; - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = node->children[child_idx]; + if (child == NULL) { return art_validate_fail(&validator, "Node48 has a NULL child"); } used_children |= UINT64_C(1) << child_idx; @@ -10002,7 +9852,7 @@ static bool art_node48_internal_validate(const art_t *art, for (int i = 0; i < 256; ++i) { if (node->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(art, node->children[node->keys[i]], + if (!art_internal_validate_at(node->children[node->keys[i]], validator)) { return false; } @@ -10011,59 +9861,62 @@ static bool art_node48_internal_validate(const art_t *art, return true; } -static art_node256_t *art_node256_create(art_t *art, - const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { - uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); art_node256_t *node = - ((art_node256_t *)art->nodes[CROARING_ART_NODE256_TYPE]) + index; - art_init_inner_node(&node->base, prefix, prefix_size); + (art_node256_t *)roaring_malloc(sizeof(art_node256_t)); + art_init_inner_node(&node->base, CROARING_ART_NODE256_TYPE, prefix, + prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { - node->children[i] = CROARING_ART_NULL_REF; + node->children[i] = NULL; } return node; } -static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { - node->count = 0; - node->next_free = next_free; +static void art_free_node256(art_node256_t *node) { + for (size_t i = 0; i < 256; ++i) { + if (node->children[i] != NULL) { + art_free_node(node->children[i]); + } + } + roaring_free(node); } -static inline art_ref_t art_node256_find_child(const art_node256_t *node, - art_key_chunk_t key) { +static inline art_node_t *art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { return node->children[key]; } -static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, - art_ref_t child, uint8_t key) { +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key) { node->children[key] = child; node->count++; - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return (art_node_t *)node; } -static inline art_ref_t art_node256_erase(art_t *art, art_node256_t *node, - uint8_t key_chunk) { - node->children[key_chunk] = CROARING_ART_NULL_REF; +static inline art_node_t *art_node256_erase(art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = NULL; node->count--; if (node->count > 48) { - return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return (art_node_t *)node; } art_node48_t *new_node = - art_node48_create(art, node->base.prefix, node->base.prefix_size); + art_node48_create(node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { - art_node48_insert(art, new_node, node->children[i], i); + if (node->children[i] != NULL) { + art_node48_insert(new_node, node->children[i], i); } } - art_node_free(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); - return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE48_TYPE); + roaring_free(node); + return (art_node_t *)new_node; } static inline void art_node256_replace(art_node256_t *node, art_key_chunk_t key_chunk, - art_ref_t new_child) { + art_node_t *new_child) { node->children[key_chunk] = new_child; } @@ -10072,14 +9925,14 @@ static inline art_indexed_child_t art_node256_next_child( art_indexed_child_t indexed_child; index++; for (size_t i = index; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -10091,14 +9944,14 @@ static inline art_indexed_child_t art_node256_prev_child( index--; art_indexed_child_t indexed_child; for (int i = index; i >= 0; --i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } @@ -10106,7 +9959,7 @@ static inline art_indexed_child_t art_node256_child_at( const art_node256_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } indexed_child.index = index; @@ -10119,19 +9972,18 @@ static inline art_indexed_child_t art_node256_lower_bound( art_node256_t *node, art_key_chunk_t key_chunk) { art_indexed_child_t indexed_child; for (size_t i = key_chunk; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = CROARING_ART_NULL_REF; + indexed_child.child = NULL; return indexed_child; } -static bool art_node256_internal_validate(const art_t *art, - const art_node256_t *node, +static bool art_node256_internal_validate(const art_node256_t *node, art_internal_validate_t validator) { if (node->count <= 48) { return art_validate_fail(&validator, "Node256 has too few children"); @@ -10142,7 +9994,7 @@ static bool art_node256_internal_validate(const art_t *art, validator.depth++; int actual_count = 0; for (int i = 0; i < 256; ++i) { - if (node->children[i] != CROARING_ART_NULL_REF) { + if (node->children[i] != NULL) { actual_count++; for (int j = i + 1; j < 256; ++j) { @@ -10153,7 +10005,7 @@ static bool art_node256_internal_validate(const art_t *art, } validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(art, node->children[i], validator)) { + if (!art_internal_validate_at(node->children[i], validator)) { return false; } } @@ -10167,10 +10019,9 @@ static bool art_node256_internal_validate(const art_t *art, // Finds the child with the given key chunk in the inner node, returns NULL if // no such child is found. -static art_ref_t art_find_child(const art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk) { - switch (typecode) { +static art_node_t *art_find_child(const art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: return art_node4_find_child((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10181,14 +10032,14 @@ static art_ref_t art_find_child(const art_inner_node_t *node, return art_node256_find_child((art_node256_t *)node, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } // Replaces the child with the given key chunk in the inner node. -static void art_replace(art_inner_node_t *node, art_typecode_t typecode, - art_key_chunk_t key_chunk, art_ref_t new_child) { - switch (typecode) { +static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, + art_node_t *new_child) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: art_node4_replace((art_node4_t *)node, key_chunk, new_child); break; @@ -10208,112 +10059,78 @@ static void art_replace(art_inner_node_t *node, art_typecode_t typecode, // Erases the child with the given key chunk from the inner node, returns the // updated node (the same as the initial node if it was not shrunk). -static art_ref_t art_node_erase(art_t *art, art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk) { - switch (typecode) { +static art_node_t *art_node_erase(art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: - return art_node4_erase(art, (art_node4_t *)node, key_chunk); + return art_node4_erase((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_erase(art, (art_node16_t *)node, key_chunk); + return art_node16_erase((art_node16_t *)node, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_erase(art, (art_node48_t *)node, key_chunk); + return art_node48_erase((art_node48_t *)node, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_erase(art, (art_node256_t *)node, key_chunk); + return art_node256_erase((art_node256_t *)node, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } // Inserts the leaf with the given key chunk in the inner node, returns a // pointer to the (possibly expanded) node. -static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, - art_typecode_t typecode, - art_key_chunk_t key_chunk, - art_ref_t leaf) { - switch (typecode) { +static art_node_t *art_node_insert_leaf(art_inner_node_t *node, + art_key_chunk_t key_chunk, + art_leaf_t *leaf) { + art_node_t *child = (art_node_t *)(CROARING_SET_LEAF(leaf)); + switch (art_get_type(node)) { case CROARING_ART_NODE4_TYPE: - return art_node4_insert(art, (art_node4_t *)node, leaf, key_chunk); + return art_node4_insert((art_node4_t *)node, child, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_insert(art, (art_node16_t *)node, leaf, - key_chunk); + return art_node16_insert((art_node16_t *)node, child, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_insert(art, (art_node48_t *)node, leaf, - key_chunk); + return art_node48_insert((art_node48_t *)node, child, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_insert(art, (art_node256_t *)node, leaf, - key_chunk); + return art_node256_insert((art_node256_t *)node, child, key_chunk); default: assert(false); - return CROARING_ART_NULL_REF; + return NULL; } } -static uint64_t art_node_get_next_free(const art_t *art, art_ref_t ref) { - art_node_t *node = art_deref(art, ref); - art_typecode_t typecode = art_ref_typecode(ref); - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return ((art_leaf_t *)node)->next_free; - case CROARING_ART_NODE4_TYPE: - return ((art_node4_t *)node)->next_free; - case CROARING_ART_NODE16_TYPE: - return ((art_node16_t *)node)->next_free; - case CROARING_ART_NODE48_TYPE: - return ((art_node48_t *)node)->next_free; - case CROARING_ART_NODE256_TYPE: - return ((art_node256_t *)node)->next_free; - default: - assert(false); - return 0; +// Frees the node and its children. Leaves are freed by the user. +static void art_free_node(art_node_t *node) { + if (art_is_leaf(node)) { + // We leave it up to the user to free leaves. + return; } -} - -static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, - uint64_t next_free) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - ((art_leaf_t *)node)->next_free = next_free; - break; + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: - ((art_node4_t *)node)->next_free = next_free; + art_free_node4((art_node4_t *)node); break; case CROARING_ART_NODE16_TYPE: - ((art_node16_t *)node)->next_free = next_free; + art_free_node16((art_node16_t *)node); break; case CROARING_ART_NODE48_TYPE: - ((art_node48_t *)node)->next_free = next_free; + art_free_node48((art_node48_t *)node); break; case CROARING_ART_NODE256_TYPE: - ((art_node256_t *)node)->next_free = next_free; + art_free_node256((art_node256_t *)node); break; default: assert(false); } } -// Marks the node as unoccopied and frees its index. -static void art_node_free(art_t *art, art_node_t *node, - art_typecode_t typecode) { - uint64_t index = art_get_index(art, node, typecode); - uint64_t next_free = art->first_free[typecode]; - art_node_set_next_free(node, typecode, next_free); - art->first_free[typecode] = index; -} - // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_next_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10331,15 +10148,13 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, // Returns the previous child in key order, or NULL if called on a leaf. // Provided index may be in the range [0, 256]. static art_indexed_child_t art_node_prev_child(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_prev_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10354,19 +10169,16 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, } } -// Returns the child found at the provided index, or NULL if called on a -// leaf. Provided index is only valid if returned by -// art_node_(next|prev)_child. +// Returns the child found at the provided index, or NULL if called on a leaf. +// Provided index is only valid if returned by art_node_(next|prev)_child. static art_indexed_child_t art_node_child_at(const art_node_t *node, - art_typecode_t typecode, int index) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_child_at((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -10381,18 +10193,16 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, } } -// Returns the child with the smallest key equal to or greater than the -// given key chunk, NULL if called on a leaf or no such child was found. +// Returns the child with the smallest key equal to or greater than the given +// key chunk, NULL if called on a leaf or no such child was found. static art_indexed_child_t art_node_lower_bound(const art_node_t *node, - art_typecode_t typecode, art_key_chunk_t key_chunk) { - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - return (art_indexed_child_t){ - .child = CROARING_ART_NULL_REF, - .index = 0, - .key_chunk = 0, - }; + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: return art_node4_lower_bound((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -10407,7 +10217,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, } } -// ====================== End of node-specific functions ====================== +// ====================== End of node-specific functions ======================= // Compares the given ranges of two keys, returns their relative order: // * Key range 1 < key range 2: a negative value @@ -10445,112 +10255,45 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } -/** - * Extends the array of nodes of the given typecode. Invalidates pointers into - * the array obtained by `art_deref`. - */ -static void art_extend(art_t *art, art_typecode_t typecode) { - uint64_t size = art->first_free[typecode]; - uint64_t capacity = art->capacities[typecode]; - if (size < capacity) { - return; - } - uint64_t new_capacity; - if (capacity == 0) { - new_capacity = 2; - } else if (capacity < 1024) { - new_capacity = 2 * capacity; - } else { - new_capacity = 5 * capacity / 4; - } - art->capacities[typecode] = new_capacity; - art->nodes[typecode] = roaring_realloc( - art->nodes[typecode], new_capacity * ART_NODE_SIZES[typecode]); - uint64_t increase = new_capacity - capacity; - memset(art_get_node(art, capacity, typecode), 0, - increase * ART_NODE_SIZES[typecode]); - for (uint64_t i = capacity; i < new_capacity; ++i) { - art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); - } -} - -/** - * Returns the next free index for the given typecode, may be equal to the - * capacity of the array. - */ -static uint64_t art_next_free(const art_t *art, art_typecode_t typecode) { - uint64_t index = art->first_free[typecode]; - return art_node_get_next_free(art, art_to_ref(index, typecode)); -} - -/** - * Marks an index for the given typecode as used, expanding the relevant node - * array if necessary. - */ -static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode) { - uint64_t first_free = art->first_free[typecode]; - if (first_free == art->capacities[typecode]) { - art_extend(art, typecode); - art->first_free[typecode]++; - return first_free; - } - art->first_free[typecode] = art_next_free(art, typecode); - return first_free; -} - -// Returns a pointer to the rootmost node where the value was inserted, may -// not be equal to `node`. -static art_ref_t art_insert_at(art_t *art, art_ref_t ref, - const art_key_chunk_t key[], uint8_t depth, - art_ref_t new_leaf) { - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); +// Returns a pointer to the rootmost node where the value was inserted, may not +// be equal to `node`. +static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], + uint8_t depth, art_leaf_t *new_leaf) { + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); uint8_t common_prefix = art_common_prefix( leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); - // Previously this was a leaf, create an inner node instead and add - // both the existing and new leaf to it. + // Previously this was a leaf, create an inner node instead and add both + // the existing and new leaf to it. art_node_t *new_node = - (art_node_t *)art_node4_create(art, key + depth, common_prefix); + (art_node_t *)art_node4_create(key + depth, common_prefix); - art_ref_t new_ref = art_node_insert_leaf( - art, (art_inner_node_t *)new_node, CROARING_ART_NODE4_TYPE, - leaf->key[depth + common_prefix], ref); - new_ref = art_node_insert_leaf(art, (art_inner_node_t *)new_node, - CROARING_ART_NODE4_TYPE, - key[depth + common_prefix], new_leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + leaf->key[depth + common_prefix], leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. - return new_ref; + return new_node; } - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; // Not a leaf: inner node uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { - // Partial prefix match. Create a new internal node to hold the common + // Partial prefix match. Create a new internal node to hold the common // prefix. - // We create a copy of the node's prefix as the creation of a new - // node may invalidate the prefix pointer. - art_key_chunk_t *prefix_copy = (art_key_chunk_t *)roaring_malloc( - common_prefix * sizeof(art_key_chunk_t)); - memcpy(prefix_copy, inner_node->prefix, - common_prefix * sizeof(art_key_chunk_t)); - art_node4_t *node4 = art_node4_create(art, prefix_copy, common_prefix); - roaring_free(prefix_copy); - - // Deref as a new node was created. - inner_node = (art_inner_node_t *)art_deref(art, ref); + art_node4_t *node4 = + art_node4_create(inner_node->prefix, common_prefix); // Make the existing internal node a child of the new internal node. - art_node4_insert(art, node4, ref, inner_node->prefix[common_prefix]); + node4 = (art_node4_t *)art_node4_insert( + node4, node, inner_node->prefix[common_prefix]); - // Deref again as a new node was created. - inner_node = (art_inner_node_t *)art_deref(art, ref); - - // Correct the prefix of the moved internal node, trimming off the - // chunk inserted into the new internal node. + // Correct the prefix of the moved internal node, trimming off the chunk + // inserted into the new internal node. inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; if (inner_node->prefix_size > 0) { // Move the remaining prefix to the correct position. @@ -10559,67 +10302,55 @@ static art_ref_t art_insert_at(art_t *art, art_ref_t ref, } // Insert the value in the new internal node. - return art_node_insert_leaf(art, (art_inner_node_t *)node4, - CROARING_ART_NODE4_TYPE, - key[common_prefix + depth], new_leaf); + return art_node_insert_leaf(&node4->base, key[common_prefix + depth], + new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_ref_t child = - art_find_child(inner_node, art_ref_typecode(ref), key_chunk); - if (child != CROARING_ART_NULL_REF) { - art_ref_t new_child = - art_insert_at(art, child, key, depth + common_prefix + 1, new_leaf); + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child != NULL) { + art_node_t *new_child = + art_insert_at(child, key, depth + common_prefix + 1, new_leaf); if (new_child != child) { - // Deref again as a new node may have been created. - inner_node = (art_inner_node_t *)art_deref(art, ref); // Node type changed. - art_replace(inner_node, art_ref_typecode(ref), key_chunk, - new_child); + art_replace(inner_node, key_chunk, new_child); } - return ref; + return node; } - return art_node_insert_leaf(art, inner_node, art_ref_typecode(ref), - key_chunk, new_leaf); + return art_node_insert_leaf(inner_node, key_chunk, new_leaf); } // Erase helper struct. typedef struct art_erase_result_s { - // The rootmost node where the value was erased, may not be equal to - // the original node. If no value was removed, this is - // CROARING_ART_NULL_REF. - art_ref_t rootmost_node; - - // True if a value was erased. - bool erased; + // The rootmost node where the value was erased, may not be equal to `node`. + // If no value was removed, this is null. + art_node_t *rootmost_node; - // Value removed, if any. - art_val_t value_erased; + // Value removed, null if not removed. + art_val_t *value_erased; } art_erase_result_t; // Searches for the given key starting at `node`, erases it if found. -static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, +static art_erase_result_t art_erase_at(art_node_t *node, const art_key_chunk_t *key, uint8_t depth) { art_erase_result_t result; - result.rootmost_node = CROARING_ART_NULL_REF; - result.erased = false; + result.rootmost_node = NULL; + result.value_erased = NULL; - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix != ART_KEY_BYTES) { // Leaf key mismatch. return result; } - result.erased = true; - result.value_erased = leaf->val; - art_node_free(art, (art_node_t *)leaf, CROARING_ART_LEAF_TYPE); + result.value_erased = (art_val_t *)leaf; return result; } - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); @@ -10628,76 +10359,101 @@ static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, return result; } art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_ref_t child = - art_find_child(inner_node, art_ref_typecode(ref), key_chunk); - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child == NULL) { // No child with key chunk. return result; } - // Try to erase the key further down. Skip the key chunk associated with - // the child in the node. + // Try to erase the key further down. Skip the key chunk associated with the + // child in the node. art_erase_result_t child_result = - art_erase_at(art, child, key, depth + common_prefix + 1); - if (!child_result.erased) { + art_erase_at(child, key, depth + common_prefix + 1); + if (child_result.value_erased == NULL) { return result; } - result.erased = true; result.value_erased = child_result.value_erased; - result.rootmost_node = ref; - - // Deref again as nodes may have changed location. - inner_node = (art_inner_node_t *)art_deref(art, ref); - if (child_result.rootmost_node == CROARING_ART_NULL_REF) { + result.rootmost_node = node; + if (child_result.rootmost_node == NULL) { // Child node was fully erased, erase it from this node's children. - result.rootmost_node = - art_node_erase(art, inner_node, art_ref_typecode(ref), key_chunk); + result.rootmost_node = art_node_erase(inner_node, key_chunk); } else if (child_result.rootmost_node != child) { // Child node was not fully erased, update the pointer to it in this // node. - art_replace(inner_node, art_ref_typecode(ref), key_chunk, - child_result.rootmost_node); + art_replace(inner_node, key_chunk, child_result.rootmost_node); } return result; } -// Searches for the given key starting at `node`, returns NULL if the key -// was not found. -static art_val_t *art_find_at(const art_t *art, art_ref_t ref, +// Searches for the given key starting at `node`, returns NULL if the key was +// not found. +static art_val_t *art_find_at(const art_node_t *node, const art_key_chunk_t *key, uint8_t depth) { - while (!art_is_leaf(ref)) { - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { return NULL; } - art_ref_t child = art_find_child(inner_node, art_ref_typecode(ref), - key[depth + inner_node->prefix_size]); - if (child == CROARING_ART_NULL_REF) { + art_node_t *child = + art_find_child(inner_node, key[depth + inner_node->prefix_size]); + if (child == NULL) { return NULL; } - ref = child; + node = child; // Include both the prefix and the child key chunk in the depth. depth += inner_node->prefix_size + 1; } - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (depth >= ART_KEY_BYTES) { - return &leaf->val; + return (art_val_t *)leaf; } uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix == ART_KEY_BYTES) { - return &leaf->val; + return (art_val_t *)leaf; } return NULL; } -static void art_node_print_type(art_ref_t ref) { - switch (art_ref_typecode(ref)) { - case CROARING_ART_LEAF_TYPE: - printf("Leaf"); - return; +// Returns the size in bytes of the subtrie. +static size_t art_size_in_bytes_at(const art_node_t *node) { + if (art_is_leaf(node)) { + return 0; + } + size_t size = 0; + switch (art_get_type((art_inner_node_t *)node)) { + case CROARING_ART_NODE4_TYPE: { + size += sizeof(art_node4_t); + } break; + case CROARING_ART_NODE16_TYPE: { + size += sizeof(art_node16_t); + } break; + case CROARING_ART_NODE48_TYPE: { + size += sizeof(art_node48_t); + } break; + case CROARING_ART_NODE256_TYPE: { + size += sizeof(art_node256_t); + } break; + default: + assert(false); + break; + } + art_indexed_child_t indexed_child = art_node_next_child(node, -1); + while (indexed_child.child != NULL) { + size += art_size_in_bytes_at(indexed_child.child); + indexed_child = art_node_next_child(node, indexed_child.index); + } + return size; +} + +static void art_node_print_type(const art_node_t *node) { + if (art_is_leaf(node)) { + printf("Leaf"); + return; + } + switch (art_get_type((art_inner_node_t *)node)) { case CROARING_ART_NODE4_TYPE: printf("Node4"); return; @@ -10716,10 +10472,10 @@ static void art_node_print_type(art_ref_t ref) { } } -void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { - if (art_is_leaf(ref)) { +static void art_node_printf(const art_node_t *node, uint8_t depth) { + if (art_is_leaf(node)) { printf("{ type: Leaf, key: "); - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); for (size_t i = 0; i < ART_KEY_BYTES; ++i) { printf("%02x", leaf->key[i]); } @@ -10731,10 +10487,10 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { printf("%*s", depth, ""); printf("type: "); - art_node_print_type(ref); + art_node_print_type(node); printf("\n"); - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; printf("%*s", depth, ""); printf("prefix_size: %d\n", inner_node->prefix_size); @@ -10745,42 +10501,41 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { } printf("\n"); - switch (art_ref_typecode(ref)) { + switch (art_get_type(inner_node)) { case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)inner_node; + art_node4_t *node4 = (art_node4_t *)node; for (uint8_t i = 0; i < node4->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node4->keys[i]); - art_node_printf(art, node4->children[i], depth); + art_node_printf(node4->children[i], depth); } } break; case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)inner_node; + art_node16_t *node16 = (art_node16_t *)node; for (uint8_t i = 0; i < node16->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node16->keys[i]); - art_node_printf(art, node16->children[i], depth); + art_node_printf(node16->children[i], depth); } } break; case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)inner_node; + art_node48_t *node48 = (art_node48_t *)node; for (int i = 0; i < 256; ++i) { if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { printf("%*s", depth, ""); printf("key: %02x ", i); printf("child: %02x ", node48->keys[i]); - art_node_printf(art, node48->children[node48->keys[i]], - depth); + art_node_printf(node48->children[node48->keys[i]], depth); } } } break; case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)inner_node; + art_node256_t *node256 = (art_node256_t *)node; for (int i = 0; i < 256; ++i) { - if (node256->children[i] != CROARING_ART_NULL_REF) { + if (node256->children[i] != NULL) { printf("%*s", depth, ""); printf("key: %02x ", i); - art_node_printf(art, node256->children[i], depth); + art_node_printf(node256->children[i], depth); } } } break; @@ -10793,310 +10548,118 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { printf("}\n"); } -/** - * Moves the node at `ref` to the earliest free index before it (if any), - * returns the new ref. Assumes `art->first_free[typecode]` points to the - * smallest free index. - */ -static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { - uint64_t idx = art_ref_index(ref); - art_typecode_t typecode = art_ref_typecode(ref); - uint64_t first_free = art->first_free[typecode]; - assert(idx != first_free); - if (idx < first_free) { - return ref; - } - uint64_t from = idx; - uint64_t to = first_free; - uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); - memcpy(art_get_node(art, to, typecode), art_get_node(art, from, typecode), - ART_NODE_SIZES[typecode]); - - // With an integer representing the next free index, and an `x` representing - // an occupied index, assume the following scenario at the start of this - // function: - // nodes = [1,2,5,x,x] - // first_free = 0 - // - // We just moved a node from index 3 to 0: - // nodes = [x,2,5,?,x] - // - // We need to modify the free list so that the free indices are ascending. - // This can be done by traversing the list until we find a node with a - // `next_free` greater than the index we copied the node from, and inserting - // the new index in between. This leads to the following: - // nodes = [x,2,3,5,x] - // first_free = 1 - uint64_t initial_next_free = next_free; - uint64_t current = next_free; - while (next_free < from) { - current = next_free; - next_free = - art_node_get_next_free(art, art_to_ref(next_free, typecode)); - } - art_node_set_next_free(art_deref(art, ref), typecode, next_free); - if (current < from) { - art_node_set_next_free(art_get_node(art, current, typecode), typecode, - from); - } - art->first_free[typecode] = - from < initial_next_free ? from : initial_next_free; - return art_to_ref(to, typecode); +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { + art_leaf_t *leaf = (art_leaf_t *)val; + art_leaf_populate(leaf, key); + if (art->root == NULL) { + art->root = (art_node_t *)CROARING_SET_LEAF(leaf); + return; + } + art->root = art_insert_at(art->root, key, 0, leaf); } -/** - * Sorts the free lists pointed to by art->first_free in ascending index order. - */ -static void art_sort_free_lists(art_t *art) { - for (art_typecode_t type = CROARING_ART_LEAF_TYPE; - type <= CROARING_ART_NODE256_TYPE; ++type) { - bool *free_indices = - (bool *)roaring_calloc(art->capacities[type], sizeof(bool)); - - for (uint64_t i = art->first_free[type]; i < art->capacities[type]; - i = art_node_get_next_free(art, art_to_ref(i, type))) { - free_indices[i] = true; - } - - uint64_t first_free = art->capacities[type]; - for (uint64_t i = art->capacities[type]; i > 0; --i) { - uint64_t index = i - 1; - if (free_indices[index]) { - art_node_set_next_free(art_get_node(art, index, type), type, - first_free); - first_free = index; - } - } - art->first_free[type] = first_free; - roaring_free(free_indices); +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; } + art_erase_result_t result = art_erase_at(art->root, key, 0); + if (result.value_erased == NULL) { + return NULL; + } + art->root = result.rootmost_node; + return result.value_erased; } -/** - * Shrinks all node arrays to `first_free`. Assumes all indices after - * `first_free` are unused. - */ -static size_t art_shrink_node_arrays(art_t *art) { - size_t freed = 0; - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->first_free[t] < art->capacities[t]) { - uint64_t new_capacity = art->first_free[t]; - art->nodes[t] = roaring_realloc(art->nodes[t], - new_capacity * ART_NODE_SIZES[t]); - freed += (art->capacities[t] - new_capacity) * ART_NODE_SIZES[t]; - art->capacities[t] = new_capacity; - } +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; } - return freed; + return art_find_at(art->root, key, 0); } -/** - * Traverses the ART, moving nodes to earlier free indices and modifying their - * references along the way. - */ -static void art_shrink_at(art_t *art, art_ref_t ref) { - if (art_is_leaf(ref)) { +bool art_is_empty(const art_t *art) { return art->root == NULL; } + +void art_free(art_t *art) { + if (art->root == NULL) { return; } - switch (art_ref_typecode(ref)) { - case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)art_deref(art, ref); - for (uint8_t i = 0; i < node4->count; ++i) { - node4->children[i] = - art_move_node_to_shrink(art, node4->children[i]); - art_shrink_at(art, node4->children[i]); - } - } break; - case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)art_deref(art, ref); - for (uint8_t i = 0; i < node16->count; ++i) { - node16->children[i] = - art_move_node_to_shrink(art, node16->children[i]); - art_shrink_at(art, node16->children[i]); - } - } break; - case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)art_deref(art, ref); - for (int i = 0; i < 256; ++i) { - if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { - uint8_t idx = node48->keys[i]; - node48->children[idx] = - art_move_node_to_shrink(art, node48->children[idx]); - art_shrink_at(art, node48->children[idx]); - } - } - } break; - case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)art_deref(art, ref); - for (int i = 0; i < 256; ++i) { - if (node256->children[i] != CROARING_ART_NULL_REF) { - node256->children[i] = - art_move_node_to_shrink(art, node256->children[i]); - art_shrink_at(art, node256->children[i]); - } - } - } break; - default: - assert(false); - break; - } -} - -void art_init_cleared(art_t *art) { - art->root = CROARING_ART_NULL_REF; - memset(art->first_free, 0, sizeof(art->first_free)); - memset(art->capacities, 0, sizeof(art->capacities)); - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - art->nodes[t] = NULL; - } + art_free_node(art->root); } -size_t art_shrink_to_fit(art_t *art) { - if (art_is_shrunken(art)) { - return 0; - } - if (art->root != CROARING_ART_NULL_REF) { - art_sort_free_lists(art); - art->root = art_move_node_to_shrink(art, art->root); - art_shrink_at(art, art->root); - } - return art_shrink_node_arrays(art); -} - -bool art_is_shrunken(const art_t *art) { - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->first_free[t] != art->capacities[t]) { - return false; - } - } - return true; -} - -art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { - art_ref_t leaf = art_leaf_create(art, key, val); - if (art->root == CROARING_ART_NULL_REF) { - art->root = leaf; - return &((art_leaf_t *)art_deref(art, leaf))->val; - } - art->root = art_insert_at(art, art->root, key, 0, leaf); - return &((art_leaf_t *)art_deref(art, leaf))->val; -} - -bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val) { - art_val_t erased_val_local; - if (erased_val == NULL) { - erased_val = &erased_val_local; - } - if (art->root == CROARING_ART_NULL_REF) { - return false; - } - art_erase_result_t result = art_erase_at(art, art->root, key, 0); - if (!result.erased) { - return false; - } - art->root = result.rootmost_node; - *erased_val = result.value_erased; - return true; -} - -art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { - if (art->root == CROARING_ART_NULL_REF) { - return NULL; - } - return art_find_at(art, art->root, key, 0); -} - -bool art_is_empty(const art_t *art) { - return art->root == CROARING_ART_NULL_REF; -} - -void art_free(art_t *art) { - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - roaring_free(art->nodes[t]); +size_t art_size_in_bytes(const art_t *art) { + size_t size = sizeof(art_t); + if (art->root != NULL) { + size += art_size_in_bytes_at(art->root); } + return size; } void art_printf(const art_t *art) { - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return; } - art_node_printf(art, art->root, 0); -} - -// Returns a reference to the current node that the iterator is positioned -// at. -static inline art_ref_t art_iterator_ref(art_iterator_t *iterator) { - return iterator->frames[iterator->frame].ref; + art_node_printf(art->root, 0); } // Returns the current node that the iterator is positioned at. static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { - return art_deref(iterator->art, art_iterator_ref(iterator)); + return iterator->frames[iterator->frame].node; } -// Sets the iterator key and value to the leaf's key and value. Always -// returns true for convenience. +// Sets the iterator key and value to the leaf's key and value. Always returns +// true for convenience. static inline bool art_iterator_valid_loc(art_iterator_t *iterator, - art_ref_t leaf_ref) { - iterator->frames[iterator->frame].ref = leaf_ref; + art_leaf_t *leaf) { + iterator->frames[iterator->frame].node = CROARING_SET_LEAF(leaf); iterator->frames[iterator->frame].index_in_node = 0; - art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, leaf_ref); memcpy(iterator->key, leaf->key, ART_KEY_BYTES); - iterator->value = &leaf->val; + iterator->value = (art_val_t *)leaf; return true; } -// Invalidates the iterator key and value. Always returns false for -// convenience. +// Invalidates the iterator key and value. Always returns false for convenience. static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { memset(iterator->key, 0, ART_KEY_BYTES); iterator->value = NULL; return false; } -// Moves the iterator one level down in the tree, given a node at the -// current level and the index of the child that we're going down to. +// Moves the iterator one level down in the tree, given a node at the current +// level and the index of the child that we're going down to. // // Note: does not set the index at the new level. -static void art_iterator_down(art_iterator_t *iterator, art_ref_t ref, +static void art_iterator_down(art_iterator_t *iterator, + const art_inner_node_t *node, uint8_t index_in_node) { - iterator->frames[iterator->frame].ref = ref; + iterator->frames[iterator->frame].node = (art_node_t *)node; iterator->frames[iterator->frame].index_in_node = index_in_node; iterator->frame++; - art_inner_node_t *node = (art_inner_node_t *)art_deref(iterator->art, ref); - art_indexed_child_t indexed_child = art_node_child_at( - (art_node_t *)node, art_ref_typecode(ref), index_in_node); - assert(indexed_child.child != CROARING_ART_NULL_REF); - iterator->frames[iterator->frame].ref = indexed_child.child; + art_indexed_child_t indexed_child = + art_node_child_at((art_node_t *)node, index_in_node); + assert(indexed_child.child != NULL); + iterator->frames[iterator->frame].node = indexed_child.child; iterator->depth += node->prefix_size + 1; } -// Moves the iterator to the next/previous child of the current node. -// Returns the child moved to, or NULL if there is no neighboring child. -static art_ref_t art_iterator_neighbor_child(art_iterator_t *iterator, - bool forward) { +// Moves the iterator to the next/previous child of the current node. Returns +// the child moved to, or NULL if there is no neighboring child. +static art_node_t *art_iterator_neighbor_child( + art_iterator_t *iterator, const art_inner_node_t *inner_node, + bool forward) { art_iterator_frame_t frame = iterator->frames[iterator->frame]; - art_node_t *node = art_deref(iterator->art, frame.ref); art_indexed_child_t indexed_child; if (forward) { - indexed_child = art_node_next_child(node, art_ref_typecode(frame.ref), - frame.index_in_node); + indexed_child = art_node_next_child(frame.node, frame.index_in_node); } else { - indexed_child = art_node_prev_child(node, art_ref_typecode(frame.ref), - frame.index_in_node); + indexed_child = art_node_prev_child(frame.node, frame.index_in_node); } - if (indexed_child.child != CROARING_ART_NULL_REF) { - art_iterator_down(iterator, frame.ref, indexed_child.index); + if (indexed_child.child != NULL) { + art_iterator_down(iterator, inner_node, indexed_child.index); } return indexed_child.child; } -// Moves the iterator one level up in the tree, returns false if not -// possible. +// Moves the iterator one level up in the tree, returns false if not possible. static bool art_iterator_up(art_iterator_t *iterator) { if (iterator->frame == 0) { return false; @@ -11108,8 +10671,8 @@ static bool art_iterator_up(art_iterator_t *iterator) { return true; } -// Moves the iterator one level, followed by a move to the next / previous -// leaf. Sets the status of the iterator. +// Moves the iterator one level, followed by a move to the next / previous leaf. +// Sets the status of the iterator. static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { if (!art_iterator_up(iterator)) { // We're at the root. @@ -11120,29 +10683,27 @@ static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { // Initializes the iterator at the first / last leaf of the given node. // Returns true for convenience. -static bool art_node_init_iterator(art_ref_t ref, art_iterator_t *iterator, - bool first) { - while (!art_is_leaf(ref)) { - art_node_t *node = art_deref(iterator->art, ref); +static bool art_node_init_iterator(const art_node_t *node, + art_iterator_t *iterator, bool first) { + while (!art_is_leaf(node)) { art_indexed_child_t indexed_child; if (first) { - indexed_child = - art_node_next_child(node, art_ref_typecode(ref), -1); + indexed_child = art_node_next_child(node, -1); } else { - indexed_child = - art_node_prev_child(node, art_ref_typecode(ref), 256); + indexed_child = art_node_prev_child(node, 256); } - art_iterator_down(iterator, ref, indexed_child.index); - ref = indexed_child.child; + art_iterator_down(iterator, (art_inner_node_t *)node, + indexed_child.index); + node = indexed_child.child; } // We're at a leaf. - iterator->frames[iterator->frame].ref = ref; + iterator->frames[iterator->frame].node = (art_node_t *)node; iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. - return art_iterator_valid_loc(iterator, ref); + return art_iterator_valid_loc(iterator, CROARING_CAST_LEAF(node)); } bool art_iterator_move(art_iterator_t *iterator, bool forward) { - if (art_is_leaf(art_iterator_ref(iterator))) { + if (art_is_leaf(art_iterator_node(iterator))) { bool went_up = art_iterator_up(iterator); if (!went_up) { // This leaf is the root, we're done. @@ -11150,69 +10711,67 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { } } // Advance within inner node. - art_ref_t neighbor_child = art_iterator_neighbor_child(iterator, forward); - if (neighbor_child != CROARING_ART_NULL_REF) { - // There is another child at this level, go down to the first or - // last leaf. + art_node_t *neighbor_child = art_iterator_neighbor_child( + iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); + if (neighbor_child != NULL) { + // There is another child at this level, go down to the first or last + // leaf. return art_node_init_iterator(neighbor_child, iterator, forward); } // No more children at this level, go up. return art_iterator_up_and_move(iterator, forward); } -// Assumes the iterator is positioned at a node with an equal prefix path up -// to the depth of the iterator. -static bool art_node_iterator_lower_bound(art_ref_t ref, +// Assumes the iterator is positioned at a node with an equal prefix path up to +// the depth of the iterator. +static bool art_node_iterator_lower_bound(const art_node_t *node, art_iterator_t *iterator, const art_key_chunk_t key[]) { - while (!art_is_leaf(ref)) { - art_inner_node_t *inner_node = - (art_inner_node_t *)art_deref(iterator->art, ref); + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; int prefix_comparison = art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. - // Since we take the lower bound within each node, we can return - // the next leaf. + // Since we take the lower bound within each node, we can return the + // next leaf. return art_iterator_up_and_move(iterator, true); } else if (prefix_comparison > 0) { - // No key equal to the key we're looking for, return the first - // leaf. - return art_node_init_iterator(ref, iterator, true); + // No key equal to the key we're looking for, return the first leaf. + return art_node_init_iterator(node, iterator, true); } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = key[iterator->depth + inner_node->prefix_size]; - art_indexed_child_t indexed_child = art_node_lower_bound( - (art_node_t *)inner_node, art_ref_typecode(ref), key_chunk); - if (indexed_child.child == CROARING_ART_NULL_REF) { + art_indexed_child_t indexed_child = + art_node_lower_bound(node, key_chunk); + if (indexed_child.child == NULL) { // Only smaller keys among children. return art_iterator_up_and_move(iterator, true); } if (indexed_child.key_chunk > key_chunk) { // Only larger children, return the first larger child. - art_iterator_down(iterator, ref, indexed_child.index); + art_iterator_down(iterator, inner_node, indexed_child.index); return art_node_init_iterator(indexed_child.child, iterator, true); } // We found a child with an equal prefix. - art_iterator_down(iterator, ref, indexed_child.index); - ref = indexed_child.child; + art_iterator_down(iterator, inner_node, indexed_child.index); + node = indexed_child.child; } - art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, ref); + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (art_compare_keys(leaf->key, key) >= 0) { // Leaf has an equal or larger key. - return art_iterator_valid_loc(iterator, ref); + return art_iterator_valid_loc(iterator, leaf); } - // Leaf has an equal prefix, but the full key is smaller. Move to the - // next leaf. + // Leaf has an equal prefix, but the full key is smaller. Move to the next + // leaf. return art_iterator_up_and_move(iterator, true); } -art_iterator_t art_init_iterator(art_t *art, bool first) { +art_iterator_t art_init_iterator(const art_t *art, bool first) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return iterator; } art_node_init_iterator(art->root, &iterator, first); @@ -11230,12 +10789,12 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { if (iterator->value == NULL) { - // We're beyond the end / start of the ART so the iterator does not - // have a valid key. Start from the root. + // We're beyond the end / start of the ART so the iterator does not have + // a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - art_ref_t root = art_iterator_ref(iterator); - if (root == CROARING_ART_NULL_REF) { + art_node_t *root = art_iterator_node(iterator); + if (root == NULL) { return false; } return art_node_iterator_lower_bound(root, iterator, key); @@ -11250,7 +10809,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // Only smaller keys found. return art_iterator_invalid_loc(iterator); } else { - return art_node_init_iterator(art_iterator_ref(iterator), + return art_node_init_iterator(art_iterator_node(iterator), iterator, true); } } @@ -11263,26 +10822,24 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, iterator->depth + inner_node->prefix_size); } if (compare_result > 0) { - return art_node_init_iterator(art_iterator_ref(iterator), iterator, + return art_node_init_iterator(art_iterator_node(iterator), iterator, true); } - return art_node_iterator_lower_bound(art_iterator_ref(iterator), iterator, + return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, key); } -art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root != CROARING_ART_NULL_REF) { + if (art->root != NULL) { art_node_iterator_lower_bound(art->root, &iterator, key); } return iterator; } -art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - iterator.art = art; - if (art->root != CROARING_ART_NULL_REF) { + if (art->root != NULL) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); @@ -11291,100 +10848,90 @@ art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { return iterator; } -void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, - art_val_t val) { +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val) { // TODO: This can likely be faster. - art_insert(iterator->art, key, val); - assert(iterator->art->root != CROARING_ART_NULL_REF); + art_insert(art, key, val); + assert(art->root != NULL); iterator->frame = 0; iterator->depth = 0; - art_node_iterator_lower_bound(iterator->art->root, iterator, key); + art_node_iterator_lower_bound(art->root, iterator, key); } -bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val) { - art_val_t erased_val_local; - if (erased_val == NULL) { - erased_val = &erased_val_local; - } +// TODO: consider keeping `art_t *art` in the iterator. +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { if (iterator->value == NULL) { - return false; + return NULL; } art_key_chunk_t initial_key[ART_KEY_BYTES]; memcpy(initial_key, iterator->key, ART_KEY_BYTES); - *erased_val = *iterator->value; - // Erase the leaf. - art_node_free(iterator->art, art_iterator_node(iterator), - art_ref_typecode(art_iterator_ref(iterator))); + art_val_t *value_erased = iterator->value; bool went_up = art_iterator_up(iterator); if (!went_up) { // We're erasing the root. - iterator->art->root = CROARING_ART_NULL_REF; + art->root = NULL; art_iterator_invalid_loc(iterator); - return true; + return value_erased; } - // Erase the leaf in its parent. - art_ref_t parent_ref = art_iterator_ref(iterator); + // Erase the leaf. art_inner_node_t *parent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_parent = iterator->key[iterator->depth + parent_node->prefix_size]; - art_ref_t new_parent_ref = - art_node_erase(iterator->art, parent_node, art_ref_typecode(parent_ref), - key_chunk_in_parent); + art_node_t *new_parent_node = + art_node_erase(parent_node, key_chunk_in_parent); - if (new_parent_ref != parent_ref) { + if (new_parent_node != ((art_node_t *)parent_node)) { // Replace the pointer to the inner node we erased from in its // parent (it may be a leaf now). - iterator->frames[iterator->frame].ref = new_parent_ref; + iterator->frames[iterator->frame].node = new_parent_node; went_up = art_iterator_up(iterator); if (went_up) { - art_ref_t grandparent_ref = art_iterator_ref(iterator); art_inner_node_t *grandparent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_grandparent = iterator->key[iterator->depth + grandparent_node->prefix_size]; - art_replace(grandparent_node, art_ref_typecode(grandparent_ref), - key_chunk_in_grandparent, new_parent_ref); + art_replace(grandparent_node, key_chunk_in_grandparent, + new_parent_node); } else { // We were already at the rootmost node. - iterator->art->root = new_parent_ref; + art->root = new_parent_node; } } iterator->frame = 0; iterator->depth = 0; - // Do a lower bound search for the initial key, which will find the - // first greater key if it exists. This can likely be mildly faster if - // we instead start from the current position. - art_node_iterator_lower_bound(iterator->art->root, iterator, initial_key); - return true; + // Do a lower bound search for the initial key, which will find the first + // greater key if it exists. This can likely be mildly faster if we instead + // start from the current position. + art_node_iterator_lower_bound(art->root, iterator, initial_key); + return value_erased; } -static bool art_internal_validate_at(const art_t *art, art_ref_t ref, +static bool art_internal_validate_at(const art_node_t *node, art_internal_validate_t validator) { - if (ref == CROARING_ART_NULL_REF) { + if (node == NULL) { return art_validate_fail(&validator, "node is null"); } - if (art_is_leaf(ref)) { - art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); + if (art_is_leaf(node)) { + art_leaf_t *leaf = CROARING_CAST_LEAF(node); if (art_compare_prefix(leaf->key, 0, validator.current_key, 0, validator.depth) != 0) { - return art_validate_fail(&validator, - "leaf key does not match its " - "position's prefix in the tree"); + return art_validate_fail( + &validator, + "leaf key does not match its position's prefix in the tree"); } if (validator.validate_cb != NULL && - !validator.validate_cb(leaf->val, validator.reason, - validator.context)) { + !validator.validate_cb(leaf, validator.reason)) { if (*validator.reason == NULL) { *validator.reason = "leaf validation failed"; } return false; } } else { - art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); + art_inner_node_t *inner_node = (art_inner_node_t *)node; if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) { return art_validate_fail(&validator, @@ -11394,28 +10941,28 @@ static bool art_internal_validate_at(const art_t *art, art_ref_t ref, inner_node->prefix_size); validator.depth += inner_node->prefix_size; - switch (art_ref_typecode(ref)) { + switch (inner_node->typecode) { case CROARING_ART_NODE4_TYPE: - if (!art_node4_internal_validate(art, (art_node4_t *)inner_node, + if (!art_node4_internal_validate((art_node4_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE16_TYPE: - if (!art_node16_internal_validate( - art, (art_node16_t *)inner_node, validator)) { + if (!art_node16_internal_validate((art_node16_t *)inner_node, + validator)) { return false; } break; case CROARING_ART_NODE48_TYPE: - if (!art_node48_internal_validate( - art, (art_node48_t *)inner_node, validator)) { + if (!art_node48_internal_validate((art_node48_t *)inner_node, + validator)) { return false; } break; case CROARING_ART_NODE256_TYPE: - if (!art_node256_internal_validate( - art, (art_node256_t *)inner_node, validator)) { + if (!art_node256_internal_validate((art_node256_t *)inner_node, + validator)) { return false; } break; @@ -11427,143 +10974,23 @@ static bool art_internal_validate_at(const art_t *art, art_ref_t ref, } bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb, void *context) { + art_validate_cb_t validate_cb) { const char *reason_local; if (reason == NULL) { // Always allow assigning through *reason reason = &reason_local; } *reason = NULL; - if (art->root == CROARING_ART_NULL_REF) { + if (art->root == NULL) { return true; } art_internal_validate_t validator = { .reason = reason, .validate_cb = validate_cb, - .context = context, .depth = 0, - .current_key = CROARING_ZERO_INITIALIZER, + .current_key = {0}, }; - for (art_typecode_t type = CROARING_ART_LEAF_TYPE; - type <= CROARING_ART_NODE256_TYPE; ++type) { - uint64_t capacity = art->capacities[type]; - for (uint64_t i = 0; i < capacity; ++i) { - uint64_t first_free = art->first_free[type]; - if (first_free > capacity) { - return art_validate_fail(&validator, "first_free > capacity"); - } - } - } - return art_internal_validate_at(art, art->root, validator); -} - -_Static_assert(alignof(art_leaf_t) == alignof(art_node4_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node16_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node48_t), - "Serialization assumes node type alignment is equal"); -_Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), - "Serialization assumes node type alignment is equal"); - -size_t art_size_in_bytes(const art_t *art) { - if (!art_is_shrunken(art)) { - return 0; - } - // Root. - size_t size = sizeof(art->root); - // Node counts. - size += sizeof(art->capacities); - // Alignment for leaves. The rest of the nodes are aligned the same way. - size += - ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - size += art->capacities[t] * ART_NODE_SIZES[t]; - } - return size; -} - -size_t art_serialize(const art_t *art, char *buf) { - if (buf == NULL) { - return 0; - } - if (!art_is_shrunken(art)) { - return 0; - } - const char *initial_buf = buf; - - // Root. - memcpy(buf, &art->root, sizeof(art->root)); - buf += sizeof(art->root); - - // Node counts. - memcpy(buf, art->capacities, sizeof(art->capacities)); - buf += sizeof(art->capacities); - - // Alignment for leaves. The rest of the nodes are aligned the same way. - size_t align_bytes = - CROARING_ART_ALIGN_SIZE_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); - memset(buf, 0, align_bytes); - buf += align_bytes; - - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->capacities[t] > 0) { - size_t size = art->capacities[t] * ART_NODE_SIZES[t]; - memcpy(buf, art->nodes[t], size); - buf += size; - } - } - - return buf - initial_buf; -} - -size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { - if (buf == NULL || art == NULL) { - return 0; - } - const char *initial_buf = buf; - art_init_cleared(art); - - if (maxbytes < sizeof(art->root)) { - return 0; - } - memcpy(&art->root, buf, sizeof(art->root)); - buf += sizeof(art->root); - maxbytes -= sizeof(art->root); - - if (maxbytes < sizeof(art->capacities)) { - return 0; - } - _Static_assert(sizeof(art->first_free) == sizeof(art->capacities), - "first_free is read from capacities"); - memcpy(art->first_free, buf, sizeof(art->capacities)); - memcpy(art->capacities, buf, sizeof(art->capacities)); - buf += sizeof(art->capacities); - maxbytes -= sizeof(art->capacities); - - // Alignment for leaves. The rest of the nodes are aligned the same way. - const char *before_align = buf; - buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); - if (maxbytes < (size_t)(buf - before_align)) { - return 0; - } - maxbytes -= buf - before_align; - - for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; - ++t) { - if (art->capacities[t] > 0) { - size_t size = art->capacities[t] * ART_NODE_SIZES[t]; - if (maxbytes < size) { - return 0; - } - art->nodes[t] = (char *)buf; - buf += size; - maxbytes -= size; - } - } - return buf - initial_buf; + return art_internal_validate_at(art->root, validator); } #ifdef __cplusplus @@ -13783,6 +13210,9 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base, * bitset.c * */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif #include #include #include @@ -23295,7 +22725,6 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { /* end file src/roaring.c */ /* begin file src/roaring64.c */ #include -#include #include #include #include @@ -23304,12 +22733,6 @@ bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { // For serialization / deserialization // containers.h last to avoid conflict with ROARING_CONTAINER_T. -#define CROARING_ALIGN_BUF(buf, alignment) \ - (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1))) - -#define CROARING_BITSET_ALIGNMENT 64 - #ifdef __cplusplus using namespace ::roaring::internal; @@ -23324,19 +22747,22 @@ namespace api { typedef struct roaring64_bitmap_s { art_t art; uint8_t flags; - uint64_t first_free; - uint64_t capacity; - container_t **containers; } roaring64_bitmap_t; // Leaf type of the ART used to keep the high 48 bits of each entry. -// Low 8 bits: typecode -// High 56 bits: container index -typedef roaring64_leaf_t leaf_t; +typedef struct roaring64_leaf_s { + art_val_t _pad; + uint8_t typecode; + container_t *container; +} roaring64_leaf_t; + +// Alias to make it easier to work with, since it's an internal-only type +// anyway. +typedef struct roaring64_leaf_s leaf_t; // Iterator struct to hold iteration state. typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *r; + const roaring64_bitmap_t *parent; art_iterator_t art_it; roaring_container_iterator_t container_it; uint64_t high48; // Key that art_it points to. @@ -23351,10 +22777,6 @@ typedef struct roaring64_iterator_s { bool saturated_forward; } roaring64_iterator_t; -static inline bool is_frozen64(const roaring64_bitmap_t *r) { - return r->flags & ROARING_FLAG_FROZEN; -} - // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { @@ -23375,96 +22797,24 @@ static inline uint64_t minimum(uint64_t a, uint64_t b) { return (a < b) ? a : b; } -static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { - return (container_index << 8) | typecode; -} - -static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } - -static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } - -static inline container_t *get_container(const roaring64_bitmap_t *r, - leaf_t leaf) { - return r->containers[get_index(leaf)]; +static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { + leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + leaf->container = container; + leaf->typecode = typecode; + return leaf; } -// Replaces the container of `leaf` with the given container. Returns the -// modified leaf for convenience. -static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, - container_t *container, - uint8_t typecode) { - uint64_t index = get_index(*leaf); - r->containers[index] = container; - *leaf = create_leaf(index, typecode); - return *leaf; -} - -/** - * Extends the array of container pointers. - */ -static void extend_containers(roaring64_bitmap_t *r) { - uint64_t size = r->first_free; - if (size < r->capacity) { - return; - } - uint64_t new_capacity; - if (r->capacity == 0) { - new_capacity = 2; - } else if (r->capacity < 1024) { - new_capacity = 2 * r->capacity; - } else { - new_capacity = 5 * r->capacity / 4; - } - uint64_t increase = new_capacity - r->capacity; - r->containers = - roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); - memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); - r->capacity = new_capacity; -} - -static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { - for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { - if (r->containers[i] == NULL) { - return i; - } - } - return r->capacity; -} - -static uint64_t allocate_index(roaring64_bitmap_t *r) { - uint64_t first_free = r->first_free; - if (first_free == r->capacity) { - extend_containers(r); - } - r->first_free = next_free_container_idx(r); - return first_free; -} - -static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, - uint8_t typecode) { - uint64_t index = allocate_index(r); - r->containers[index] = container; - return create_leaf(index, typecode); -} - -static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { - uint64_t index = get_index(leaf); - r->containers[index] = NULL; - if (index < r->first_free) { - r->first_free = index; - } -} - -// Copies the container referenced by `leaf` from `r1` to `r2`. -static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, leaf_t leaf) { - uint8_t typecode = get_typecode(leaf); +static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->typecode = leaf->typecode; // get_copy_of_container modifies the typecode passed in. - container_t *container = get_copy_of_container( - get_container(r1, leaf), &typecode, /*copy_on_write=*/false); - return add_container(r2, container, typecode); + result_leaf->container = get_copy_of_container( + leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); + return result_leaf; } +static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } + static inline int compare_high48(art_key_chunk_t key1[], art_key_chunk_t key2[]) { return art_compare_keys(key1, key2); @@ -23473,10 +22823,10 @@ static inline int compare_high48(art_key_chunk_t key1[], static inline bool roaring64_iterator_init_at_leaf_first( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = 0; - it->container_it = container_init_iterator(get_container(it->r, leaf), - get_typecode(leaf), &low16); + it->container_it = + container_init_iterator(leaf->container, leaf->typecode, &low16); it->value = it->high48 | low16; return (it->has_value = true); } @@ -23484,18 +22834,18 @@ static inline bool roaring64_iterator_init_at_leaf_first( static inline bool roaring64_iterator_init_at_leaf_last( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = 0; - it->container_it = container_init_iterator_last(get_container(it->r, leaf), - get_typecode(leaf), &low16); + it->container_it = + container_init_iterator_last(leaf->container, leaf->typecode, &low16); it->value = it->high48 | low16; return (it->has_value = true); } static inline roaring64_iterator_t *roaring64_iterator_init_at( const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->r = r; - it->art_it = art_init_iterator((art_t *)&r->art, first); + it->parent = r; + it->art_it = art_init_iterator(&r->art, first); it->has_value = it->art_it.value != NULL; if (it->has_value) { if (first) { @@ -23512,11 +22862,8 @@ static inline roaring64_iterator_t *roaring64_iterator_init_at( roaring64_bitmap_t *roaring64_bitmap_create(void) { roaring64_bitmap_t *r = (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - art_init_cleared(&r->art); + r->art.root = NULL; r->flags = 0; - r->capacity = 0; - r->first_free = 0; - r->containers = NULL; return r; } @@ -23526,35 +22873,26 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { } art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - if (is_frozen64(r)) { - // Only free the container itself, not the buffer-backed contents - // within. - roaring_free(get_container(r, leaf)); - } else { - container_free(get_container(r, leaf), get_typecode(leaf)); - } + leaf_t *leaf = (leaf_t *)it.value; + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); art_iterator_next(&it); } - if (!is_frozen64(r)) { - art_free(&r->art); - } - roaring_free(r->containers); + art_free(&r->art); roaring_free(r); } roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t result_typecode = get_typecode(leaf); + leaf_t *leaf = (leaf_t *)it.value; + uint8_t result_typecode = leaf->typecode; container_t *result_container = get_copy_of_container( - get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t)result_leaf); + leaf->container, &result_typecode, /*copy_on_write=*/false); + leaf_t *result_leaf = create_leaf(result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t *)result_leaf); art_iterator_next(&it); } return result; @@ -23581,8 +22919,8 @@ static void move_from_roaring32_offset(roaring64_bitmap_t *dst, uint8_t high48[ART_KEY_BYTES]; uint64_t high48_bits = key_base | ((uint64_t)key << 16); split_key(high48_bits, high48); - leaf_t leaf = add_container(dst, container, typecode); - art_insert(&dst->art, high48, (art_val_t)leaf); + leaf_t *leaf = create_leaf(container, typecode); + art_insert(&dst->art, high48, (art_val_t *)leaf); } // We stole all the containers, so leave behind a size of zero src->high_low_container.size = 0; @@ -23624,8 +22962,8 @@ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint8_t high48[ART_KEY_BYTES]; split_key(min, high48); - leaf_t leaf = add_container(r, container, typecode); - art_insert(&r->art, high48, (art_val_t)leaf); + leaf_t *leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); uint64_t gap = container_max - container_min + step - 1; uint64_t increment = gap - (gap % step); @@ -23649,14 +22987,13 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, uint16_t low16, leaf_t *leaf) { if (leaf != NULL) { - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); + container_add(leaf->container, low16, leaf->typecode, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; } return leaf; } else { @@ -23665,8 +23002,9 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, container_t *container = container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); assert(ac == container); - leaf_t new_leaf = add_container(r, container, typecode); - return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); + leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); + return leaf; } } @@ -23684,12 +23022,12 @@ bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { int old_cardinality = 0; if (leaf != NULL) { - old_cardinality = container_get_cardinality(get_container(r, *leaf), - get_typecode(*leaf)); + old_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); } leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + container_get_cardinality(leaf->container, leaf->typecode); return old_cardinality != new_cardinality; } @@ -23698,22 +23036,22 @@ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - leaf_t *leaf = context->leaf; - if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(container1, low16, typecode1, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); + container_add(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; } } else { // We're not positioned anywhere yet or the high bits of the key // differ. - leaf = (leaf_t *)art_find(&r->art, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); context->leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); @@ -23733,19 +23071,17 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { +static inline void add_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf != NULL) { - uint8_t typecode1 = get_typecode(*leaf); - container_t *container1 = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = - container_add_range(container1, typecode1, min, max, &typecode2); - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r, leaf, container2, typecode2); + container_t *container2 = container_add_range( + leaf->container, leaf->typecode, min, max, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; } return; } @@ -23753,8 +23089,8 @@ static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, // container_add_range is inclusive, but `container_range_of_ones` is // exclusive. container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf_t new_leaf = add_container(r, container, typecode); - art_insert(art, high48, (art_val_t)new_leaf); + leaf = create_leaf(container, typecode); + art_insert(art, high48, (art_val_t *)leaf); } void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, @@ -23778,22 +23114,22 @@ void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only populate range within one container. - add_range_closed_at(r, art, min_high48, min_low16, max_low16); + add_range_closed_at(art, min_high48, min_low16, max_low16); return; } // Populate a range across containers. Fill intermediate containers // entirely. - add_range_closed_at(r, art, min_high48, min_low16, 0xffff); + add_range_closed_at(art, min_high48, min_low16, 0xffff); uint64_t min_high_bits = min >> 16; uint64_t max_high_bits = max >> 16; for (uint64_t current = min_high_bits + 1; current < max_high_bits; ++current) { uint8_t current_high48[ART_KEY_BYTES]; split_key(current << 16, current_high48); - add_range_closed_at(r, art, current_high48, 0, 0xffff); + add_range_closed_at(art, current_high48, 0, 0xffff); } - add_range_closed_at(r, art, max_high48, 0, max_low16); + add_range_closed_at(art, max_high48, 0, max_low16); } bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { @@ -23801,8 +23137,7 @@ bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { uint16_t low16 = split_key(val, high48); leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); if (leaf != NULL) { - return container_contains(get_container(r, *leaf), low16, - get_typecode(*leaf)); + return container_contains(leaf->container, low16, leaf->typecode); } return false; } @@ -23819,7 +23154,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + art_iterator_t it = art_lower_bound(&r->art, min_high48); if (it.value == NULL || combine_key(it.key, 0) > min) { return false; } @@ -23836,7 +23171,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, return false; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; uint32_t container_min = 0; if (compare_high48(it.key, min_high48) == 0) { container_min = min_low16; @@ -23849,13 +23184,11 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, // For the first and last containers we use container_contains_range, // for the intermediate containers we can use container_is_full. if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(get_container(r, leaf), - get_typecode(leaf))) { + if (!container_is_full(leaf->container, leaf->typecode)) { return false; } - } else if (!container_contains_range(get_container(r, leaf), - container_min, container_max, - get_typecode(leaf))) { + } else if (!container_contains_range(leaf->container, container_min, + container_max, leaf->typecode)) { return false; } prev_high48_bits = current_high48_bits; @@ -23881,24 +23214,24 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return container_contains(get_container(r, *context->leaf), low16, - get_typecode(*context->leaf)); + return container_contains(context->leaf->container, low16, + context->leaf->typecode); } bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t start_rank = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint64_t cardinality = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + leaf_t *leaf = (leaf_t *)it.value; + uint64_t cardinality = + container_get_cardinality(leaf->container, leaf->typecode); if (start_rank + cardinality > rank) { uint32_t uint32_start = 0; uint32_t uint32_rank = rank - start_rank; uint32_t uint32_element = 0; - if (container_select(get_container(r, leaf), get_typecode(leaf), - &uint32_start, uint32_rank, &uint32_element)) { + if (container_select(leaf->container, leaf->typecode, &uint32_start, + uint32_rank, &uint32_element)) { *element = combine_key(it.key, (uint16_t)uint32_element); return true; } @@ -23914,17 +23247,16 @@ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t rank = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - rank += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + rank += container_get_cardinality(leaf->container, leaf->typecode); } else if (compare_result == 0) { - return rank + container_rank(get_container(r, leaf), - get_typecode(leaf), low16); + return rank + + container_rank(leaf->container, leaf->typecode, low16); } else { return rank; } @@ -23938,17 +23270,16 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t index = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - index += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + index += container_get_cardinality(leaf->container, leaf->typecode); } else if (compare_result == 0) { - int index16 = container_get_index(get_container(r, leaf), - get_typecode(leaf), low16); + int index16 = + container_get_index(leaf->container, leaf->typecode, low16); if (index16 < 0) { return false; } @@ -23962,31 +23293,31 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, return false; } -// Returns true if a container was removed. -static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, - uint8_t *high48, - uint16_t low16, - leaf_t *leaf) { +static inline leaf_t *containerptr_roaring64_bitmap_remove( + roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { if (leaf == NULL) { - return false; + return NULL; } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); + container_t *container = leaf->container; + uint8_t typecode = leaf->typecode; uint8_t typecode2; container_t *container2 = container_remove(container, low16, typecode, &typecode2); if (container2 != container) { container_free(container, typecode); - replace_container(r, leaf, container2, typecode2); + leaf->container = container2; + leaf->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); - assert(erased); - return true; + leaf = (leaf_t *)art_erase(&r->art, high48); + if (leaf != NULL) { + free_leaf(leaf); + } + return NULL; } - return false; + return leaf; } void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { @@ -24008,12 +23339,13 @@ bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { return false; } int old_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); - if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { + container_get_cardinality(leaf->container, leaf->typecode); + leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + if (leaf == NULL) { return true; } int new_cardinality = - container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + container_get_cardinality(leaf->container, leaf->typecode); return new_cardinality != old_cardinality; } @@ -24026,28 +23358,26 @@ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, if (context->leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. - uint8_t typecode = get_typecode(*context->leaf); - container_t *container = get_container(r, *context->leaf); uint8_t typecode2; container_t *container2 = - container_remove(container, low16, typecode, &typecode2); - if (container2 != container) { - container_free(container, typecode); - replace_container(r, context->leaf, container2, typecode2); + container_remove(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { + leaf_t *leaf = (leaf_t *)art_erase(art, high48); container_free(container2, typecode2); - leaf_t leaf; - bool erased = art_erase(art, high48, (art_val_t *)&leaf); - assert(erased); - remove_container(r, leaf); + free_leaf(leaf); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. leaf_t *leaf = (leaf_t *)art_find(art, high48); - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - context->leaf = leaf; + context->leaf = + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -24065,26 +23395,23 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, - uint8_t *high48, uint16_t min, - uint16_t max) { +static inline void remove_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf == NULL) { return; } - uint8_t typecode = get_typecode(*leaf); - container_t *container = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = - container_remove_range(container, typecode, min, max, &typecode2); - if (container2 != container) { - container_free(container, typecode); + container_t *container2 = container_remove_range( + leaf->container, leaf->typecode, min, max, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); if (container2 != NULL) { - replace_container(r, leaf, container2, typecode2); + leaf->container = container2; + leaf->typecode = typecode2; } else { - bool erased = art_erase(art, high48, NULL); - assert(erased); - remove_container(r, *leaf); + art_erase(art, high48); + free_leaf(leaf); } } } @@ -24110,23 +23437,21 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only remove a range within one container. - remove_range_closed_at(r, art, min_high48, min_low16, max_low16); + remove_range_closed_at(art, min_high48, min_low16, max_low16); return; } // Remove a range across containers. Remove intermediate containers // entirely. - remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); + remove_range_closed_at(art, min_high48, min_low16, 0xffff); art_iterator_t it = art_upper_bound(art, min_high48); while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t leaf; - bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); - assert(erased); - container_free(get_container(r, leaf), get_typecode(leaf)); - remove_container(r, leaf); + leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } - remove_range_closed_at(r, art, max_high48, 0, max_low16); + remove_range_closed_at(art, max_high48, 0, max_low16); } void roaring64_bitmap_clear(roaring64_bitmap_t *r) { @@ -24134,12 +23459,12 @@ void roaring64_bitmap_clear(roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t cardinality = 0; while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - cardinality += container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + leaf_t *leaf = (leaf_t *)it.value; + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); art_iterator_next(&it); } return cardinality; @@ -24168,7 +23493,7 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, uint8_t max_high48[ART_KEY_BYTES]; uint16_t max_low16 = split_key(max, max_high48); - art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); + art_iterator_t it = art_lower_bound(&r->art, min_high48); while (it.value != NULL) { int max_compare_result = compare_high48(it.key, max_high48); if (max_compare_result > 0) { @@ -24176,22 +23501,23 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, break; } - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); + leaf_t *leaf = (leaf_t *)it.value; if (max_compare_result == 0) { // We're at the max high key, add only the range up to the low // 16 bits of max. - cardinality += container_rank(container, typecode, max_low16); + cardinality += + container_rank(leaf->container, leaf->typecode, max_low16); } else { // We're not yet at the max high key, add the full container // range. - cardinality += container_get_cardinality(container, typecode); + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); } if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { // We're at the min high key, remove the range up to the low 16 // bits of min. - cardinality -= container_rank(container, typecode, min_low16 - 1); + cardinality -= + container_rank(leaf->container, leaf->typecode, min_low16 - 1); } art_iterator_next(&it); } @@ -24203,23 +23529,23 @@ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); if (it.value == NULL) { return UINT64_MAX; } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_minimum(leaf->container, leaf->typecode)); } uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); if (it.value == NULL) { return 0; } - leaf_t leaf = (leaf_t)*it.value; - return combine_key( - it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_maximum(leaf->container, leaf->typecode)); } bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { @@ -24230,53 +23556,15 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { uint8_t new_typecode; // We don't need to free the existing container if a new one was // created, convert_run_optimize does that internally. - container_t *new_container = convert_run_optimize( - get_container(r, *leaf), get_typecode(*leaf), &new_typecode); - replace_container(r, leaf, new_container, new_typecode); + leaf->container = convert_run_optimize(leaf->container, leaf->typecode, + &new_typecode); + leaf->typecode = new_typecode; has_run_container |= new_typecode == RUN_CONTAINER_TYPE; art_iterator_next(&it); } return has_run_container; } -static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { - uint64_t idx = get_index(*leaf); - if (idx < r->first_free) { - return; - } - r->containers[r->first_free] = get_container(r, *leaf); - r->containers[idx] = NULL; - *leaf = create_leaf(r->first_free, get_typecode(*leaf)); - r->first_free = next_free_container_idx(r); -} - -static inline bool is_shrunken(const roaring64_bitmap_t *r) { - return art_is_shrunken(&r->art) && r->first_free == r->capacity; -} - -size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { - size_t freed = art_shrink_to_fit(&r->art); - art_iterator_t it = art_init_iterator(&r->art, true); - while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - freed += container_shrink_to_fit(get_container(r, *leaf), - get_typecode(*leaf)); - move_to_shrink(r, leaf); - art_iterator_next(&it); - } - if (is_shrunken(r)) { - return freed; - } - uint64_t new_capacity = r->first_free; - if (new_capacity < r->capacity) { - r->containers = roaring_realloc(r->containers, - new_capacity * sizeof(container_t *)); - freed += (r->capacity - new_capacity) * sizeof(container_t *); - r->capacity = new_capacity; - } - return freed; -} - /** * (For advanced users.) * Collect statistics about the bitmap @@ -24287,16 +23575,15 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, stat->min_value = roaring64_bitmap_minimum(r); stat->max_value = roaring64_bitmap_maximum(r); - art_iterator_t it = art_init_iterator((art_t *)&r->art, true); + art_iterator_t it = art_init_iterator(&r->art, true); while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; stat->n_containers++; - uint8_t truetype = - get_container_type(get_container(r, leaf), get_typecode(leaf)); - uint32_t card = container_get_cardinality(get_container(r, leaf), - get_typecode(leaf)); + uint8_t truetype = get_container_type(leaf->container, leaf->typecode); + uint32_t card = + container_get_cardinality(leaf->container, leaf->typecode); uint32_t sbytes = - container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); + container_size_in_bytes(leaf->container, leaf->typecode); stat->cardinality += card; switch (truetype) { case BITSET_CONTAINER_TYPE: @@ -24322,34 +23609,31 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, } } -static bool roaring64_leaf_internal_validate(const art_val_t val, - const char **reason, - void *context) { - leaf_t leaf = (leaf_t)val; - roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; - return container_internal_validate(get_container(r, leaf), - get_typecode(leaf), reason); +static bool roaring64_leaf_internal_validate(const art_val_t *val, + const char **reason) { + leaf_t *leaf = (leaf_t *)val; + return container_internal_validate(leaf->container, leaf->typecode, reason); } bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) { return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate, (void *)r); + roaring64_leaf_internal_validate); } bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { if (compare_high48(it1.key, it2.key) != 0) { return false; } - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_equals(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { return false; } art_iterator_next(&it1); @@ -24360,8 +23644,8 @@ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { bool it2_present = it2.value != NULL; @@ -24370,11 +23654,10 @@ bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, if (it2_present) { compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - if (!container_is_subset( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2))) { + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_is_subset(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { return false; } art_iterator_next(&it1); @@ -24401,8 +23684,8 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24412,20 +23695,19 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_and(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24444,8 +23726,8 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { uint64_t result = 0; - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24455,11 +23737,11 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - result += container_and_cardinality( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result += + container_and_cardinality(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -24480,7 +23762,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24496,7 +23778,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2a: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; + leaf_t *leaf2 = (leaf_t *)it2.value; // We do the computation "in place" only when c1 is not a // shared container. Rationale: using a shared container @@ -24504,31 +23786,28 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, // copy and then doing the computation in place which is // likely less efficient than avoiding in place entirely and // always generating a new container. - uint8_t typecode = get_typecode(*leaf1); - container_t *container = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and(container, typecode, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } else { container2 = container_iand( - container, typecode, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } - if (container2 != container) { - container_free(container, typecode); + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&it1, NULL); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24539,11 +23818,10 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (!it2_present || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t leaf; - bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); - assert(erased); - container_free(get_container(r1, leaf), get_typecode(leaf)); - remove_container(r1, leaf); + leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); + assert(leaf != NULL); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } else if (compare_result > 0) { // Case 2c: it1 is after it2. art_iterator_lower_bound(&it2, it1.key); @@ -24554,8 +23832,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { bool intersect = false; - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -24565,11 +23843,10 @@ bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - intersect |= container_intersect( - get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + intersect |= container_intersect(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -24609,8 +23886,8 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24628,31 +23905,26 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_or(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - leaf_t result_leaf = - add_container(result, result_container, result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_or( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); art_iterator_next(&it2); } } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24673,7 +23945,7 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24692,23 +23964,22 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; uint8_t typecode2; container_t *container2; - if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { - container2 = container_or(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_or(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode, + &typecode2); } else { - container2 = container_ior(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + container2 = container_ior( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } - if (container2 != container1) { - container_free(container1, typecode1); - replace_container(r1, leaf1, container2, typecode2); + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24719,9 +23990,9 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24731,8 +24002,8 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24750,20 +24021,19 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t leaf1 = (leaf_t)*it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = - container_xor(get_container(r1, leaf1), get_typecode(leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24771,15 +24041,13 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, result, (leaf_t)*it2.value); - art_insert(&result->art, it2.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); art_iterator_next(&it2); } } @@ -24798,7 +24066,7 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { assert(r1 != r2); art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -24817,15 +24085,15 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; uint8_t typecode2; container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { - container2 = container_xor(container1, typecode1, - get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); if (container2 != container1) { // We only free when doing container_xor, not // container_ixor, as ixor frees the original @@ -24834,19 +24102,17 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_ixor( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); } + leaf1->container = container2; + leaf1->typecode = typecode2; if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -24859,13 +24125,13 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t result_leaf = - copy_leaf_container(r2, r1, (leaf_t)*it2.value); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); if (it1_present) { - art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); art_iterator_next(&it1); } else { - art_insert(&r1->art, it2.key, (art_val_t)result_leaf); + art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); } art_iterator_next(&it2); } @@ -24876,8 +24142,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24892,21 +24158,20 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2b: iterators at the same high key position. + leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t result_typecode; - container_t *result_container = container_andnot( - get_container(r1, *leaf1), get_typecode(*leaf1), - get_container(r2, leaf2), get_typecode(leaf2), - &result_typecode); - - if (container_nonzero_cardinality(result_container, - result_typecode)) { - leaf_t result_leaf = add_container(result, result_container, - result_typecode); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_andnot( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); } else { - container_free(result_container, result_typecode); + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -24914,9 +24179,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, } if (!it2_present || compare_result < 0) { // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t result_leaf = - copy_leaf_container(r1, result, (leaf_t)*it1.value); - art_insert(&result->art, it1.key, (art_val_t)result_leaf); + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); art_iterator_next(&it1); } else if (compare_result > 0) { // Case 2c: it1 is after it2. @@ -24936,7 +24200,7 @@ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -24952,15 +24216,15 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t leaf2 = (leaf_t)*it2.value; - uint8_t typecode1 = get_typecode(*leaf1); - container_t *container1 = get_container(r1, *leaf1); + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; uint8_t typecode2; container_t *container2; - if (typecode1 == SHARED_CONTAINER_TYPE) { + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { container2 = container_andnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); if (container2 != container1) { // We only free when doing container_andnot, not // container_iandnot, as iandnot frees the original @@ -24969,19 +24233,19 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_iandnot( - container1, typecode1, get_container(r2, leaf2), - get_typecode(leaf2), &typecode2); + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + if (container2 != container1) { + leaf1->container = container2; + leaf1->typecode = typecode2; } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - bool erased = art_iterator_erase(&it1, NULL); - assert(erased); - remove_container(r1, *leaf1); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); } else { - if (container2 != container1) { - replace_container(r1, leaf1, container2, typecode2); - } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -25000,39 +24264,38 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } /** - * Flips the leaf at high48 in the range [min, max), adding the result to - * `r2`. If the high48 key is not found in `r1`, a new container is created. - */ -static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, - roaring64_bitmap_t *r2, uint8_t high48[], - uint32_t min, uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); - uint8_t typecode2; + * Flips the leaf at high48 in the range [min, max), returning a new leaf with a + * new container. If the high48 key is not found in the existing bitmap, a new + * container is created. Returns null if the negation results in an empty range. + */ +static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r, + uint8_t high48[], uint32_t min, + uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48); container_t *container2; + uint8_t typecode2; if (leaf1 == NULL) { // No container at this key, create a full container. container2 = container_range_of_ones(min, max, &typecode2); } else if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = container_not(get_container(r1, *leaf1), - get_typecode(*leaf1), &typecode2); + container2 = + container_not(leaf1->container, leaf1->typecode, &typecode2); } else { // Partially flip a container. - container2 = - container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), - min, max, &typecode2); + container2 = container_not_range(leaf1->container, leaf1->typecode, min, + max, &typecode2); } if (container_nonzero_cardinality(container2, typecode2)) { - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, high48, (art_val_t)leaf2); - } else { - container_free(container2, typecode2); + return create_leaf(container2, typecode2); } + container_free(container2, typecode2); + return NULL; } /** - * Flips the leaf at high48 in the range [min, max). If the high48 key is - * not found in the bitmap, a new container is created. Deletes the leaf and + * Flips the leaf at high48 in the range [min, max). If the high48 key is not + * found in the bitmap, a new container is created. Deletes the leaf and * associated container if the negation results in an empty range. */ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], @@ -25043,28 +24306,28 @@ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], if (leaf == NULL) { // No container at this key, insert a full container. container2 = container_range_of_ones(min, max, &typecode2); - leaf_t new_leaf = add_container(r, container2, typecode2); - art_insert(&r->art, high48, (art_val_t)new_leaf); + art_insert(&r->art, high48, + (art_val_t *)create_leaf(container2, typecode2)); return; } if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = container_inot(get_container(r, *leaf), - get_typecode(*leaf), &typecode2); + container2 = + container_inot(leaf->container, leaf->typecode, &typecode2); } else { // Partially flip a container. - container2 = container_inot_range( - get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); + container2 = container_inot_range(leaf->container, leaf->typecode, min, + max, &typecode2); } - if (container_nonzero_cardinality(container2, typecode2)) { - replace_container(r, leaf, container2, typecode2); - } else { - bool erased = art_erase(&r->art, high48, NULL); - assert(erased); - container_free(container2, typecode2); - remove_container(r, *leaf); + leaf->container = container2; + leaf->typecode = typecode2; + + if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) { + art_erase(&r->art, high48); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); } } @@ -25089,21 +24352,20 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true); // Copy the containers before min unchanged. while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); + leaf_t *leaf1 = (leaf_t *)it.value; + uint8_t typecode2 = leaf1->typecode; container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); + leaf1->container, &typecode2, /*copy_on_write=*/false); + art_insert(&r2->art, it.key, + (art_val_t *)create_leaf(container2, typecode2)); art_iterator_next(&it); } - // Flip the range (including non-existent containers!) between min and - // max. + // Flip the range (including non-existent containers!) between min and max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -25118,19 +24380,22 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, max_container = max_low16 + 1; // Exclusive. } - roaring64_flip_leaf(r1, r2, current_high48_key, min_container, - max_container); + leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key, + min_container, max_container); + if (leaf != NULL) { + art_insert(&r2->art, current_high48_key, (art_val_t *)leaf); + } } // Copy the containers after max unchanged. - it = art_upper_bound((art_t *)&r1->art, max_high48_key); + it = art_upper_bound(&r1->art, max_high48_key); while (it.value != NULL) { - leaf_t leaf1 = (leaf_t)*it.value; - uint8_t typecode2 = get_typecode(leaf1); + leaf_t *leaf1 = (leaf_t *)it.value; + uint8_t typecode2 = leaf1->typecode; container_t *container2 = get_copy_of_container( - get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); - leaf_t leaf2 = add_container(r2, container2, typecode2); - art_insert(&r2->art, it.key, (art_val_t)leaf2); + leaf1->container, &typecode2, /*copy_on_write=*/false); + art_insert(&r2->art, it.key, + (art_val_t *)create_leaf(container2, typecode2)); art_iterator_next(&it); } @@ -25155,8 +24420,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - // Flip the range (including non-existent containers!) between min and - // max. + // Flip the range (including non-existent containers!) between min and max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -25178,7 +24442,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, // Returns the number of distinct high 32-bit entries in the bitmap. static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t high32_count = 0; uint32_t prev_high32 = 0; while (it.value != NULL) { @@ -25207,7 +24471,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint64_t high32_count; size += sizeof(high32_count); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -25216,8 +24480,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. + // Write as uint32 the most significant 32 bits of the bucket. size += sizeof(prev_high32); // Write the 32-bit Roaring bitmaps representing the least @@ -25239,10 +24502,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { prev_high32 = current_high32; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), get_container(r, leaf), - get_typecode(leaf)); + (uint16_t)(current_high32 >> 16), leaf->container, + leaf->typecode); art_iterator_next(&it); } @@ -25273,7 +24536,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, memcpy(buf, &high32_count, sizeof(high32_count)); buf += sizeof(high32_count); - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -25283,8 +24546,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, uint32_t current_high32 = (uint32_t)(current_high48 >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the - // bucket. + // Write as uint32 the most significant 32 bits of the bucket. memcpy(buf, &prev_high32, sizeof(prev_high32)); buf += sizeof(prev_high32); @@ -25307,10 +24569,10 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, prev_high32 = current_high32; } - leaf_t leaf = (leaf_t)*it.value; + leaf_t *leaf = (leaf_t *)it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), get_container(r, leaf), - get_typecode(leaf)); + (uint16_t)(current_high48 >> 16), leaf->container, + leaf->typecode); art_iterator_next(&it); } @@ -25361,8 +24623,8 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, buf += sizeof(high32); read_bytes += sizeof(high32); - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least significant + // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -25417,8 +24679,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( } previous_high32 = high32; - // Read the 32-bit Roaring bitmaps representing the least - // significant bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least significant + // bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -25460,364 +24722,16 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( return r; } -// Returns an "element count" for the given container. This has a different -// meaning for each container type, but the purpose is the minimal information -// required to serialize the container metadata. -static inline uint32_t container_get_element_count(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return ((bitset_container_t *)c)->cardinality; - } - case ARRAY_CONTAINER_TYPE: { - return ((array_container_t *)c)->cardinality; - } - case RUN_CONTAINER_TYPE: { - return ((run_container_t *)c)->n_runs; - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } -} - -static inline size_t container_get_frozen_size(const container_t *c, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - } - case ARRAY_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(uint16_t); - } - case RUN_CONTAINER_TYPE: { - return container_get_element_count(c, typecode) * sizeof(rle16_t); - } - default: { - assert(false); - roaring_unreachable; - return 0; - } - } -} - -uint64_t align_size(uint64_t size, uint64_t alignment) { - return (size + alignment - 1) & ~(alignment - 1); -} - -size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { - if (!is_shrunken(r)) { - return 0; - } - // Flags. - uint64_t size = sizeof(r->flags); - // Container count. - size += sizeof(r->capacity); - // Container element counts. - size += r->capacity * sizeof(uint16_t); - // Total container sizes. - size += 3 * sizeof(uint64_t); - // ART (8 byte aligned). - size = align_size(size, 8); - size += art_size_in_bytes(&r->art); - - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - total_sizes[typecode] += - container_get_frozen_size(get_container(r, leaf), typecode); - art_iterator_next(&it); - } - // Containers (aligned). - size = align_size(size, CROARING_BITSET_ALIGNMENT); - size += total_sizes[BITSET_CONTAINER_TYPE]; - size = align_size(size, alignof(rle16_t)); - size += total_sizes[ARRAY_CONTAINER_TYPE]; - size = align_size(size, alignof(uint16_t)); - size += total_sizes[RUN_CONTAINER_TYPE]; - // Padding to make overall size a multiple of required alignment. - size = align_size(size, CROARING_BITSET_ALIGNMENT); - return size; -} - -static inline void container_frozen_serialize(const container_t *container, - uint8_t typecode, - uint64_t **bitsets, - uint16_t **arrays, - rle16_t **runs) { - size_t size = container_get_frozen_size(container, typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *bitset = (bitset_container_t *)container; - memcpy(*bitsets, bitset->words, size); - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *array = (array_container_t *)container; - memcpy(*arrays, array->array, size); - *arrays += container_get_element_count(container, typecode); - break; - } - case RUN_CONTAINER_TYPE: { - run_container_t *run = (run_container_t *)container; - memcpy(*runs, run->runs, size); - *runs += container_get_element_count(container, typecode); - break; - } - default: { - assert(false); - roaring_unreachable; - } - } -} - -static inline char *pad_align(char *buf, const char *initial_buf, - size_t alignment) { - uint64_t buf_size = buf - initial_buf; - uint64_t pad = align_size(buf_size, alignment) - buf_size; - memset(buf, 0, pad); - return buf + pad; -} - -size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, - char *buf) { - if (buf == NULL) { - return 0; - } - if (!is_shrunken(r)) { - return 0; - } - const char *initial_buf = buf; - - // Flags. - memcpy(buf, &r->flags, sizeof(r->flags)); - buf += sizeof(r->flags); - - // Container count. - memcpy(buf, &r->capacity, sizeof(r->capacity)); - buf += sizeof(r->capacity); - - // Container element counts. - uint64_t total_sizes[4] = - CROARING_ZERO_INITIALIZER; // Indexed by typecode. - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - - uint32_t elem_count = container_get_element_count(container, typecode); - uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); - memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); - buf += sizeof(compressed_elem_count); - - total_sizes[typecode] += container_get_frozen_size(container, typecode); - art_iterator_next(&it); - } - - // Total container sizes. - memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); - buf += sizeof(uint64_t); - - // ART. - buf = pad_align(buf, initial_buf, 8); - buf += art_serialize(&r->art, buf); - - // Containers (aligned). - // Runs before arrays as run elements are larger than array elements and - // smaller than bitset elements. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - uint64_t *bitsets = (uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(rle16_t)); - rle16_t *runs = (rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = pad_align(buf, initial_buf, alignof(uint16_t)); - uint16_t *arrays = (uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; - - it = art_init_iterator((art_t *)&r->art, /*first=*/true); - while (it.value != NULL) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - container_t *container = get_container(r, leaf); - container_frozen_serialize(container, typecode, &bitsets, &arrays, - &runs); - art_iterator_next(&it); - } - - // Padding to make overall size a multiple of required alignment. - buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); - - return buf - initial_buf; -} - -static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, - const uint64_t **bitsets, - const uint16_t **arrays, - const rle16_t **runs) { - switch (typecode) { - case BITSET_CONTAINER_TYPE: { - bitset_container_t *c = (bitset_container_t *)roaring_malloc( - sizeof(bitset_container_t)); - c->cardinality = elem_count; - c->words = (uint64_t *)*bitsets; - *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; - return (container_t *)c; - } - case ARRAY_CONTAINER_TYPE: { - array_container_t *c = - (array_container_t *)roaring_malloc(sizeof(array_container_t)); - c->cardinality = elem_count; - c->capacity = elem_count; - c->array = (uint16_t *)*arrays; - *arrays += elem_count; - return (container_t *)c; - } - case RUN_CONTAINER_TYPE: { - run_container_t *c = - (run_container_t *)roaring_malloc(sizeof(run_container_t)); - c->n_runs = elem_count; - c->capacity = elem_count; - c->runs = (rle16_t *)*runs; - *runs += elem_count; - return (container_t *)c; - } - default: { - assert(false); - roaring_unreachable; - return NULL; - } - } -} - -roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, - size_t maxbytes) { - if (buf == NULL) { - return NULL; - } - - roaring64_bitmap_t *r = roaring64_bitmap_create(); - - // Flags. - if (maxbytes < sizeof(r->flags)) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&r->flags, buf, sizeof(r->flags)); - buf += sizeof(r->flags); - maxbytes -= sizeof(r->flags); - r->flags |= ROARING_FLAG_FROZEN; - - // Container count. - if (maxbytes < sizeof(r->capacity)) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&r->capacity, buf, sizeof(r->capacity)); - buf += sizeof(r->capacity); - maxbytes -= sizeof(r->capacity); - - r->containers = - (container_t *)roaring_malloc(r->capacity * sizeof(container_t *)); - - // Container element counts. - if (maxbytes < r->capacity * sizeof(uint16_t)) { - roaring64_bitmap_free(r); - return NULL; - } - const char *elem_counts = buf; - buf += r->capacity * sizeof(uint16_t); - maxbytes -= r->capacity * sizeof(uint16_t); - - // Total container sizes. - uint64_t total_sizes[4]; - if (maxbytes < sizeof(uint64_t) * 3) { - roaring64_bitmap_free(r); - return NULL; - } - memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); - buf += sizeof(uint64_t); - maxbytes -= sizeof(uint64_t); - - // ART (8 byte aligned). - buf = CROARING_ALIGN_BUF(buf, 8); - size_t art_size = art_frozen_view(buf, maxbytes, &r->art); - if (art_size == 0) { - roaring64_bitmap_free(r); - return NULL; - } - buf += art_size; - maxbytes -= art_size; - - // Containers (aligned). - const char *before_containers = buf; - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); - const uint64_t *bitsets = (const uint64_t *)buf; - buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); - const rle16_t *runs = (const rle16_t *)buf; - buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); - const uint16_t *arrays = (const uint16_t *)buf; - buf += total_sizes[ARRAY_CONTAINER_TYPE]; - if (maxbytes < (uint64_t)(buf - before_containers)) { - roaring64_bitmap_free(r); - return NULL; - } - maxbytes -= buf - before_containers; - - // Deserialize in ART iteration order. - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); - for (size_t i = 0; it.value != NULL; ++i) { - leaf_t leaf = (leaf_t)*it.value; - uint8_t typecode = get_typecode(leaf); - - uint16_t compressed_elem_count; - memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), - sizeof(compressed_elem_count)); - uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; - - // The container index is unrelated to the iteration order. - uint64_t index = get_index(leaf); - r->containers[index] = container_frozen_view(typecode, elem_count, - &bitsets, &arrays, &runs); - - art_iterator_next(&it); - } - - // Padding to make overall size a multiple of required alignment. - buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); - - return r; -} - bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { uint64_t high48 = combine_key(it.key, 0); uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; uint32_t low32 = high48; - leaf_t leaf = (leaf_t)*it.value; - if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), - low32, iterator, high32, ptr)) { + leaf_t *leaf = (leaf_t *)it.value; + if (!container_iterate64(leaf->container, leaf->typecode, low32, + iterator, high32, ptr)) { return false; } art_iterator_next(&it); @@ -25877,12 +24791,12 @@ bool roaring64_iterator_advance(roaring64_iterator_t *it) { if (it->saturated_forward) { return (it->has_value = false); } - roaring64_iterator_init_at(it->r, it, /*first=*/true); + roaring64_iterator_init_at(it->parent, it, /*first=*/true); return it->has_value; } - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), + if (container_iterator_next(leaf->container, leaf->typecode, &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -25900,12 +24814,12 @@ bool roaring64_iterator_previous(roaring64_iterator_t *it) { // Saturated backward. return (it->has_value = false); } - roaring64_iterator_init_at(it->r, it, /*first=*/false); + roaring64_iterator_init_at(it->parent, it, /*first=*/false); return it->has_value; } - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), + if (container_iterator_prev(leaf->container, leaf->typecode, &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -25923,8 +24837,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint16_t val_low16 = split_key(val, val_high48); if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a - // key equal or greater. + // beyond the ART altogether), so we need to move to a leaf with a key + // equal or greater. if (!art_iterator_lower_bound(&it->art_it, val_high48)) { // Only smaller keys found. it->saturated_forward = true; @@ -25935,13 +24849,13 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found - // in this container. - leaf_t leaf = (leaf_t)*it->art_it.value; + // We're at equal high bits, check if a suitable value can be found in + // this container. + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound( - get_container(it->r, leaf), get_typecode(leaf), - &it->container_it, &low16, val_low16)) { + if (container_iterator_lower_bound(leaf->container, leaf->typecode, + &it->container_it, &low16, + val_low16)) { it->value = it->high48 | low16; return (it->has_value = true); } @@ -25952,8 +24866,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } } - // We're at a leaf with high bits greater than `val`, so the first entry - // in this container is our result. + // We're at a leaf with high bits greater than `val`, so the first entry in + // this container is our result. return roaring64_iterator_init_at_leaf_first(it); } @@ -25962,15 +24876,15 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t consumed = 0; while (it->has_value && consumed < count) { uint32_t container_consumed; - leaf_t leaf = (leaf_t)*it->art_it.value; + leaf_t *leaf = (leaf_t *)it->art_it.value; uint16_t low16 = (uint16_t)it->value; uint32_t container_count = UINT32_MAX; if (count - consumed < (uint64_t)UINT32_MAX) { container_count = count - consumed; } bool has_value = container_iterator_read_into_uint64( - get_container(it->r, leaf), get_typecode(leaf), &it->container_it, - it->high48, buf, container_count, &container_consumed, &low16); + leaf->container, leaf->typecode, &it->container_it, it->high48, buf, + container_count, &container_consumed, &low16); consumed += container_consumed; buf += container_consumed; if (has_value) { diff --git a/pyroaring/roaring.h b/pyroaring/roaring.h index 9965870..7caeb1e 100644 --- a/pyroaring/roaring.h +++ b/pyroaring/roaring.h @@ -1,5 +1,5 @@ // !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! -// Created by amalgamation.sh on 2025-02-28T15:35:21Z +// Created by amalgamation.sh on 2025-02-26T22:28:04Z /* * The CRoaring project is under a dual license (Apache/MIT). @@ -59,11 +59,11 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "4.3.0" +#define ROARING_VERSION "4.2.3" enum { ROARING_VERSION_MAJOR = 4, - ROARING_VERSION_MINOR = 3, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_MINOR = 2, + ROARING_VERSION_REVISION = 3 }; #endif // ROARING_INCLUDE_ROARING_VERSION // clang-format on/* end file include/roaring/roaring_version.h */ @@ -119,6 +119,20 @@ enum { #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE +#endif + +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) + +#ifdef __illumos__ +#ifndef __EXTENSIONS__ +#define __EXTENSIONS__ +#endif // __EXTENSIONS__ +#endif + #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above @@ -2365,7 +2379,7 @@ namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; -typedef uint64_t roaring64_leaf_t; +typedef struct roaring64_leaf_s roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** @@ -2660,12 +2674,6 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); -/** - * Shrinks internal arrays to eliminate any unused capacity. Returns the number - * of bytes freed. - */ -size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r); - /** * (For advanced users.) * Collect statistics about the bitmap @@ -2918,53 +2926,6 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); -/** - * Returns the number of bytes required to serialize this bitmap in a "frozen" - * format. This is not compatible with any other serialization formats. - * - * `roaring64_bitmap_shrink_to_fit()` must be called before this method. - */ -size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r); - -/** - * Serializes the bitmap in a "frozen" format. The given buffer must be at least - * `roaring64_bitmap_frozen_size_in_bytes()` in size. Returns the number of - * bytes used for serialization. - * - * `roaring64_bitmap_shrink_to_fit()` must be called before this method. - * - * The frozen format is optimized for speed of (de)serialization, as well as - * allowing the user to create a bitmap based on a memory mapped file, which is - * possible because the format mimics the memory layout of the bitmap. - * - * Because the format mimics the memory layout of the bitmap, the format is not - * fixed across releases of Roaring Bitmaps, and may change in future releases. - * - * This function is endian-sensitive. If you have a big-endian system (e.g., a - * mainframe IBM s390x), the data format is going to be big-endian and not - * compatible with little-endian systems. - */ -size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, - char *buf); - -/** - * Creates a readonly bitmap that is a view of the given buffer. The buffer - * must be created with `roaring64_bitmap_frozen_serialize()`, and must be - * aligned by 64 bytes. - * - * Returns NULL if deserialization fails. - * - * The returned bitmap must only be used in a readonly manner. The bitmap must - * be freed using `roaring64_bitmap_free()` as normal. The backing buffer must - * only be freed after the bitmap. - * - * This function is endian-sensitive. If you have a big-endian system (e.g., a - * mainframe IBM s390x), the data format is going to be big-endian and not - * compatible with little-endian systems. - */ -roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, - size_t maxbytes); - /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call.