From dbbf65d4a8f585d7b2cdb4a6fe39b60a61c604f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 2 Sep 2017 23:46:56 -0400 Subject: [PATCH 01/91] Basic implementation This is a completely unoptimize first draft --- compile.sh | 21 +++ src/pairs.c | 126 ++++++++++++++++ src/rill.c | 346 +++++++++++++++++++++++++++++++++++++++++++ src/rill.h | 86 +++++++++++ src/store.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/store.h | 48 ++++++ src/utils.h | 51 +++++++ 7 files changed, 1089 insertions(+) create mode 100755 compile.sh create mode 100644 src/pairs.c create mode 100644 src/rill.c create mode 100644 src/rill.h create mode 100644 src/store.c create mode 100644 src/store.h create mode 100644 src/utils.h diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000..498f528 --- /dev/null +++ b/compile.sh @@ -0,0 +1,21 @@ +#! /usr/bin/env bash + +set -o errexit -o nounset -o pipefail +set -o xtrace + +declare -a SRC +SRC=(pairs store rill) + +CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" +CFLAGS="$CFLAGS -Werror -Wall -Wextra" +CFLAGS="$CFLAGS -Wundef -Wcast-align -Wwrite-strings -Wunreachable-code -Wformat=2" +CFLAGS="$CFLAGS -Wswitch-enum -Wswitch-default -Winit-self -Wno-strict-aliasing" +CFLAGS="$CFLAGS -fno-strict-aliasing" + +OBJ="" +for src in "${SRC[@]}"; do + gcc -c -o "$src.o" "src/$src.c" $CFLAGS + OBJ="$OBJ $src.o" +done +ar rcs librill.a $OBJ + diff --git a/src/pairs.c b/src/pairs.c new file mode 100644 index 0000000..bf6683b --- /dev/null +++ b/src/pairs.c @@ -0,0 +1,126 @@ +/* pairs.c + Rémi Attab (remi.attab@gmail.com), 02 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" + +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// pairs +// ----------------------------------------------------------------------------- + +void rill_pairs_free(struct rill_pairs *pairs) +{ + free(pairs->data); +} + +static bool resize(struct rill_pairs *pairs, size_t len) +{ + if (len <= pairs->cap) return true; + + size_t cap = pairs->cap ? pairs->cap : 1; + while (cap < len) cap *= 2; + + void *ret = realloc(pairs->data, cap); + if (!ret) return false; + + pairs->data = ret; + pairs->cap = cap; + + return true; +} + +bool rill_pairs_reset(struct rill_pairs *pairs, size_t cap) +{ + pairs->len = 0; + return resize(pairs, cap); +} + +bool rill_pairs_push(struct rill_pairs *pairs, rill_key_t key, rill_val_t val) +{ + if (!resize(pairs, pairs->len + 1)) return false; + + pairs->data[pairs->len] = (struct rill_kv) { .key = key, .val = val }; + pairs->len++; + + return true; +} + +static int kv_cmp(const void *lhs, const void *rhs) +{ + return rill_kv_cmp(lhs, rhs); +} + +void rill_pairs_compact(struct rill_pairs *pairs) +{ + if (pairs->len <= 1) return; + qsort(pairs->data, pairs->len, sizeof(*pairs->data), &kv_cmp); + + size_t j = 0; + for (size_t i = 1; i < pairs->len; ++i) { + if (!rill_kv_cmp(&pairs->data[i], &pairs->data[j])) continue; + ++j; + if (j != i) pairs->data[j] = pairs->data[i]; + } + + assert(j + 1 <= pairs->len); + pairs->len = j + 1; +} + +bool rill_pairs_scan_key( + const struct rill_pairs *pairs, + const rill_key_t *keys, size_t len, + struct rill_pairs *out) +{ + for (size_t i = 0; i < pairs->len; ++i) { + struct rill_kv *kv = &pairs->data[i]; + + for (size_t j = 0; j < len; ++j) { + if (kv->key != keys[j]) continue; + if (!rill_pairs_push(out, kv->key, kv->val)) return false; + } + } + + return true; +} + +bool rill_pairs_scan_val( + const struct rill_pairs *pairs, + const rill_val_t *vals, size_t len, + struct rill_pairs *out) +{ + for (size_t i = 0; i < pairs->len; ++i) { + struct rill_kv *kv = &pairs->data[i]; + + for (size_t j = 0; j < len; ++j) { + if (kv->val != vals[j]) continue; + if (!rill_pairs_push(out, kv->key, kv->val)) return false; + } + } + + return true; +} + +void rill_pairs_print(const struct rill_pairs *pairs) +{ + const rill_key_t no_key = -1ULL; + rill_key_t key = no_key; + + for (size_t i = 0; i < pairs->len; ++i) { + struct rill_kv *kv = &pairs->data[i]; + + if (kv->key == key) fprintf(stderr, "%lu, ", kv->val); + else { + if (key != no_key) fprintf(stderr, "]\n"); + fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); + key = kv->key; + } + } + + fprintf(stderr, "]\n"); +} diff --git a/src/rill.c b/src/rill.c new file mode 100644 index 0000000..af59af5 --- /dev/null +++ b/src/rill.c @@ -0,0 +1,346 @@ +/* rill.c + Rémi Attab (remi.attab@gmail.com), 03 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "store.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// config +// ----------------------------------------------------------------------------- + +enum { hours = 24, days = 30, months = 15}; +enum +{ + quant_hour = 60 * 60, + quant_day = hours * quant_hour, + quant_month = days * quant_day, +}; + + +// ----------------------------------------------------------------------------- +// rill +// ----------------------------------------------------------------------------- + +struct rill +{ + const char *dir; + rill_ts_t ts; + + lock_t lock; + struct rill_pairs *acc; + struct rill_pairs *dump; + + struct rill_store *hourly[hours]; + struct rill_store *daily[days]; + struct rill_store *monthly[months]; +}; + +static bool load_store(struct rill *db, const char *file) +{ + struct rill_store *store = rill_store_open(file); + if (!store) goto fail_open; + + rill_ts_t ts = rill_store_ts(store); + size_t quant = rill_store_quant(store); + + struct rill_store **bucket = NULL; + switch (quant) { + case quant_hour: bucket = &db->hourly[(ts / quant_hour) % hours]; break; + case quant_day: bucket = &db->daily[(ts / quant_hour) % days]; break; + case quant_month: bucket = &db->monthly[(ts / quant_month) % months]; break; + default: + fail("unknown quant '%lu' for '%s'", quant, file); + goto fail_quant; + } + + if (*bucket) { + fail("file '%s' is a duplicate for quant '%lu' at timestamp %lu'", + file, quant, ts); + goto fail_dup; + } + + *bucket = store; + + return true; + + fail_dup: + fail_quant: + rill_store_close(store); + fail_open: + return false; +} + +struct rill * rill_open(const char *dir) +{ + struct rill *db = calloc(1, sizeof(*db)); + if (!db) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_struct; + } + + db->dir = strndup(dir, NAME_MAX); + if (!db->dir) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_dir; + } + + db->acc = calloc(1, sizeof(*db->acc)); + if (!db->acc) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_acc; + } + + db->dump = calloc(1, sizeof(*db->dump)); + if (!db->dump) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_dump; + } + + if (!rill_pairs_reset(db->acc, 1 *1000 * 1000)) { + fail("unable to allocate pairs for '%s'", dir); + goto fail_pairs; + } + + if (!rill_pairs_reset(db->dump, 1 *1000 * 1000)) { + fail("unable to allocate pairs for '%s'", dir); + goto fail_pairs; + } + + if (mkdir(dir, 0775) == -1 && errno != EEXIST) { + fail_errno("unable to open create dir '%s'", dir); + goto fail_mkdir; + } + + DIR *dir_handle = opendir(dir); + if (!dir_handle) { + fail_errno("unable to open dir '%s'", dir); + goto fail_dir; + } + + struct dirent stream, *result; + while (true) { + if (readdir_r(dir_handle, &stream, &result) == -1) { + fail_errno("unable to read dir '%s'", dir); + goto fail_readdir; + } + else if (!result) break; + + (void) load_store(db, result->d_name); + } + + closedir(dir_handle); + + return db; + + fail_readdir: + closedir(dir_handle); + fail_dir: + fail_mkdir: + fail_pairs: + rill_pairs_free(db->dump); + free(db->dump); + fail_alloc_dump: + rill_pairs_free(db->acc); + free(db->acc); + fail_alloc_acc: + free((char *) db->dir); + fail_alloc_dir: + free(db); + fail_alloc_struct: + return NULL; +} + +void rill_close(struct rill *db) +{ + for (size_t i = 0; i < days; ++i) + rill_store_close(db->daily[i]); + + for (size_t i = 0; i < months; ++i) + rill_store_close(db->monthly[i]); + + rill_pairs_free(db->acc); + rill_pairs_free(db->dump); + + free((char *) db->dir); + free(db->dump); + free(db->acc); + free(db); +} + + +// ----------------------------------------------------------------------------- +// ingest +// ----------------------------------------------------------------------------- + +bool rill_ingest(struct rill *db, rill_ts_t now, rill_key_t key, rill_val_t val) +{ + (void) now; + + bool ret; + { + lock(&db->lock); + + ret = rill_pairs_push(db->acc, key, val); + + unlock(&db->lock); + } + + return ret; +} + + +// ----------------------------------------------------------------------------- +// rotate +// ----------------------------------------------------------------------------- +// \todo since we're deleting data, should be reviewed for robustness. +// +// \todo if we have a gap in ingestion, it's possible that we don't expire some +// data or that we hit one of the asserts. Need to improve the mechanism a +// bit. + + +static bool rotate_monthly( + struct rill *db, + struct rill_store **store, + rill_ts_t ts, + struct rill_store **list, size_t len) +{ + char file[NAME_MAX]; + snprintf(file, sizeof(file), "%s/%lu.rill", db->dir, ts / quant_month); + + if (*store) (void) rill_store_rm(*store); + if (!rill_store_merge(file, ts, quant_day, list, len)) return false; + if (!(*store = rill_store_open(file))) return false; + + for (size_t i = 0; i < len; ++i) { + (void) rill_store_rm(list[i]); + list[i] = NULL; + } + + return true; +} + +static bool rotate_daily( + struct rill *db, + struct rill_store **store, + rill_ts_t ts, + struct rill_store **list, size_t len) +{ + char file[NAME_MAX]; + snprintf(file, sizeof(file), "%s/%lu-%lu.rill", db->dir, + ts / quant_month, + (ts / quant_day) % days); + + assert(!*store); + if (!rill_store_merge(file, ts, quant_day, list, len)) return false; + if (!(*store = rill_store_open(file))) return false; + + for (size_t i = 0; i < len; ++i) { + (void) rill_store_rm(list[i]); + list[i] = NULL; + } + + return true; +} + +static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t ts) +{ + char file[NAME_MAX]; + snprintf(file, sizeof(file), "%s/%lu-%lu-%lu.rill", db->dir, + ts / quant_month, + (ts / quant_day) % days, + (ts / quant_hour) % hours); + + { + lock(&db->lock); + + struct rill_pairs *tmp = db->acc; + db->acc = db->dump; + db->dump = tmp; + + unlock(&db->lock); + } + + assert(!*store); + if (!rill_store_write(file, ts, quant_hour, db->dump)) return false; + if (!(*store = rill_store_open(file))) return false; + + rill_pairs_reset(db->dump, db->dump->cap); + + return true; +} + +bool rill_rotate(struct rill *db, rill_ts_t now) +{ + if (now / quant_month != db->ts / quant_month) { + size_t quant = db->ts / quant_month; + if (!rotate_monthly(db, &db->monthly[quant % days], db->ts, db->daily, days)) { + fail("unable to complete monthly rotation '%lu'", quant); + return false; + } + } + + if (now / quant_day != db->ts / quant_day) { + size_t quant = db->ts / quant_day; + if (!rotate_daily(db, &db->daily[quant % days], db->ts, db->hourly, hours)) { + fail("unable to complete daily rotation '%lu'", quant); + return false; + } + } + + if (now / quant_hour != db->ts / quant_hour) { + size_t quant = db->ts / quant_hour; + if (!rotate_hourly(db, &db->hourly[(now / quant_hour) % hours], db->ts)) { + fail("unable to complete hourly rotation '%lu'", quant); + return false; + } + } + + db->ts = now; + return true; +} + + +// ----------------------------------------------------------------------------- +// query +// ----------------------------------------------------------------------------- + +void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out) +{ + for (size_t i = 0; i < hours; ++i) + rill_store_scan_key(db->hourly[i], keys, len, out); + + for (size_t i = 0; i < days; ++i) + rill_store_scan_key(db->daily[i], keys, len, out); + + for (size_t i = 0; i < months; ++i) + rill_store_scan_key(db->monthly[i], keys, len, out); + + rill_pairs_compact(out); +} + +void rill_query_val(struct rill *db, rill_val_t *vals, size_t len, struct rill_pairs *out) +{ + for (size_t i = 0; i < hours; ++i) + rill_store_scan_val(db->hourly[i], vals, len, out); + + for (size_t i = 0; i < days; ++i) + rill_store_scan_val(db->daily[i], vals, len, out); + + for (size_t i = 0; i < months; ++i) + rill_store_scan_val(db->monthly[i], vals, len, out); + + rill_pairs_compact(out); +} diff --git a/src/rill.h b/src/rill.h new file mode 100644 index 0000000..94a0069 --- /dev/null +++ b/src/rill.h @@ -0,0 +1,86 @@ +/* rill.h + Rémi Attab (remi.attab@gmail.com), 03 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + +#include +#include +#include + +// ----------------------------------------------------------------------------- +// types +// ----------------------------------------------------------------------------- + +typedef uint64_t rill_ts_t; +typedef uint64_t rill_key_t; +typedef uint64_t rill_val_t; + + +// ----------------------------------------------------------------------------- +// kv +// ----------------------------------------------------------------------------- + +#define rill_packed __attribute__((__packed__)) + +struct rill_packed rill_kv +{ + rill_key_t key; + rill_val_t val; +}; + +static inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *rhs) +{ + if (lhs->key < rhs->key) return -1; + if (lhs->key > rhs->key) return +1; + + if (lhs->val < rhs->val) return -1; + if (lhs->val > rhs->val) return +1; + + return 0; +} + + +// ----------------------------------------------------------------------------- +// pairs +// ----------------------------------------------------------------------------- + +struct rill_pairs +{ + size_t len, cap; + struct rill_kv *data; +}; + +void rill_pairs_free(struct rill_pairs *pairs); +bool rill_pairs_reset(struct rill_pairs *pairs, size_t cap); +bool rill_pairs_push(struct rill_pairs *pairs, rill_key_t key, rill_val_t val); +void rill_pairs_compact(struct rill_pairs *pairs); + +bool rill_pairs_scan_key( + const struct rill_pairs *pairs, + const rill_key_t *keys, size_t len, + struct rill_pairs *out); + +bool rill_pairs_scan_val( + const struct rill_pairs *pairs, + const rill_val_t *vals, size_t len, + struct rill_pairs *out); + +void rill_pairs_print(const struct rill_pairs *pairs); + + +// ----------------------------------------------------------------------------- +// rill +// ----------------------------------------------------------------------------- + +struct rill; + +struct rill * rill_open(const char *dir); +void rill_close(struct rill *db); + +bool rill_ingest(struct rill *db, rill_ts_t now, rill_key_t key, rill_val_t val); +bool rill_rotate(struct rill *db, rill_ts_t now); + +void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out); +void rill_query_val(struct rill *db, rill_val_t *vals, size_t len, struct rill_pairs *out); diff --git a/src/store.c b/src/store.c new file mode 100644 index 0000000..b7e3910 --- /dev/null +++ b/src/store.c @@ -0,0 +1,411 @@ +/* store.c + Rémi Attab (remi.attab@gmail.com), 02 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "store.h" +#include "rill.h" +#include "utils.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// utils +// ----------------------------------------------------------------------------- + +static const size_t page_len = 1UL << 10; + +static inline size_t to_vma_len(size_t len) +{ + if (!(len % page_len)) return len; + return (len & ~(page_len - 1)) + page_len; +} + + +// ----------------------------------------------------------------------------- +// store +// ----------------------------------------------------------------------------- + +static const uint32_t version = 1; +static const uint32_t magic = 0x4C4C4952; + +struct rill_packed header +{ + uint32_t magic; + uint32_t version; + + uint64_t pairs; + uint64_t ts; + uint64_t quant; +}; + +struct rill_store +{ + int fd; + const char *file; + + void *vma; + size_t vma_len; + + struct header *head; + struct rill_kv *data; +}; + + +// ----------------------------------------------------------------------------- +// reader +// ----------------------------------------------------------------------------- + +struct rill_store *rill_store_open(const char *file) +{ + struct rill_store *store = calloc(1, sizeof(*store)); + if (!store) { + fail("unable to allocate memory for '%s'", file); + goto fail_alloc_struct; + } + + store->file = strndup(file, NAME_MAX); + if (!store->file) { + fail("unable to allocate memory for '%s'", file); + goto fail_alloc_file; + } + + struct stat stat_ret = {0}; + if (stat(file, &stat_ret) == -1) { + fail_errno("unable to stat '%s'", file); + goto fail_stat; + } + + size_t len = stat_ret.st_size; + if (len < sizeof(struct header)) { + fail("invalid size for '%s'", file); + goto fail_size; + } + + store->vma_len = to_vma_len(len); + + store->fd = open(file, O_RDONLY); + if (store->fd == -1) { + fail_errno("unable to open '%s'", file); + goto fail_open; + } + + store->vma = mmap(0, store->vma_len, PROT_READ, MAP_PRIVATE | MAP_HUGETLB, store->fd, 0); + if (store->vma == MAP_FAILED) { + fail_errno("unable to mmap '%s'", file); + goto fail_mmap; + } + + store->head = store->vma; + store->data = (void *) ((uintptr_t) store->vma + sizeof(struct header)); + + if (store->head->magic != magic) { + fail("invalid magic '0x%x' for '%s'", store->head->magic, file); + goto fail_magic; + } + + if (store->head->version != version) { + fail("unknown version '%du' for '%s'", store->head->version, file); + goto fail_version; + } + + size_t expected = sizeof(struct header) + sizeof(struct rill_kv) * store->head->pairs; + if (expected != len) { + fail("invalid file size '%lu != %lu' for '%s'", len, expected, file); + goto fail_len; + } + + return store; + + fail_len: + fail_version: + fail_magic: + munmap(store->vma, store->vma_len); + fail_mmap: + close(store->fd); + fail_open: + fail_size: + fail_stat: + free((char *) store->file); + fail_alloc_file: + free(store); + fail_alloc_struct: + return NULL; +} + +void rill_store_close(struct rill_store *store) +{ + munmap(store->vma, store->vma_len); + close(store->fd); + free((char *) store->file); + free(store); +} + +bool rill_store_rm(struct rill_store *store) +{ + if (!unlink(store->file)) { + fail_errno("unable to unlink '%s'", store->file); + return false; + } + + rill_store_close(store); + return true; +} + + +// ----------------------------------------------------------------------------- +// writer +// ----------------------------------------------------------------------------- + +static bool writer_open( + struct rill_store *store, + const char *file, size_t cap, + rill_ts_t ts, size_t quant) +{ + store->file = file; + + store->fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0640); + if (store->fd == -1) { + fail_errno("unable to open '%s'", file); + goto fail_open; + } + + size_t len = sizeof(struct header) + sizeof(struct rill_kv) * cap; + if (ftruncate(store->fd, len) == -1) { + fail_errno("unable to resize '%s'", file); + goto fail_truncate; + } + + store->vma_len = to_vma_len(len); + int vma_prot = PROT_WRITE | PROT_READ; + int vma_flags = MAP_PRIVATE | MAP_HUGETLB; + store->vma = mmap(0, store->vma_len, vma_prot, vma_flags, store->fd, 0); + if (store->vma == MAP_FAILED) { + fail_errno("unable to mmap '%s'", file); + goto fail_mmap; + } + + store->head = store->vma; + store->data = (void *) ((uintptr_t) store->vma + sizeof(struct header)); + + *store->head = (struct header) { + .magic = magic, + .version = version, + .ts = ts, + .quant = quant, + }; + + return true; + + munmap(store->vma, store->vma_len); + fail_mmap: + fail_truncate: + close(store->fd); + fail_open: + return false; +} + + +static void writer_close(struct rill_store *store, size_t pairs) +{ + store->head->pairs = pairs; + + munmap(store->vma, store->vma_len); + + size_t len = sizeof(struct header) + sizeof(struct rill_kv) * pairs; + if (ftruncate(store->fd, len) == -1) + fail_errno("unable to resize '%s'", store->file); + + if (fdatasync(store->fd) == -1) + fail_errno("unable to fsync '%s'", store->file); + + close(store->fd); +} + +bool rill_store_write( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_pairs *pairs) +{ + rill_pairs_compact(pairs); + + struct rill_store store = {0}; + if (!writer_open(&store, file, pairs->len, ts, quant)) { + fail("unable to create '%s'", file); + goto fail_open; + } + + for (size_t i = 0; i < pairs->len; ++i) { + store.data[i].key = pairs->data[i].key; + store.data[i].val = pairs->data[i].val; + } + + writer_close(&store, pairs->len); + return true; + + fail_open: + return false; +} + +bool rill_store_merge( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_store **list, size_t len) +{ + assert(len > 1); + + size_t cap = 0; + struct { struct rill_kv *it, *end; } its[len]; + + for (size_t i = 0; i < len; ++i) { + cap += list[i]->head->pairs; + its[i].it = list[i]->data; + its[i].it = list[i]->data + list[i]->head->pairs; + } + + struct rill_store store = {0}; + if (!writer_open(&store, file, cap, ts, quant)) { + fail("unable to create '%s'", file); + goto fail_open; + } + + size_t pairs = 0; + struct rill_kv *current = store.data; + + while (true) { + size_t smallest = 0; + for (size_t i = 0; i < len; ++i) { + if (its[i].it != its[i].end) break; + smallest++; + } + if (smallest == len) break; + + for (size_t i = smallest + 1; i < len; ++i) { + if (its[i].it == its[i].end) continue; + if (rill_kv_cmp(its[i].it, its[smallest].it) < 0) + smallest = i; + } + + if (rill_kv_cmp(current, its[smallest].it) < 0) { + pairs++; + current++; + assert(pairs != cap); + + *current = *its[smallest].it; + } + + its[smallest].it++; + } + + writer_close(&store, pairs); + return true; + + fail_open: + return false; +} + + +// ----------------------------------------------------------------------------- +// scan +// ----------------------------------------------------------------------------- + +const char * rill_store_file(struct rill_store *store) +{ + return store->file; +} + +rill_ts_t rill_store_ts(struct rill_store *store) +{ + return store->head->ts; +} + +size_t rill_store_quant(struct rill_store *store) +{ + return store->head->quant; +} + +static inline void vma_will_need(struct rill_store *store) +{ + if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) + fail("unable to madvise '%s'", store->file); +} + +static inline void vma_dont_need(struct rill_store *store) +{ + if (madvise(store->vma, store->vma_len, MADV_DONTNEED) == -1) + fail("unable to madvise '%s'", store->file); +} + +bool rill_store_scan_key( + struct rill_store *store, + const rill_key_t *keys, size_t len, + struct rill_pairs *out) +{ + vma_will_need(store); + + for (size_t i = 0; i < store->head->pairs; ++i) { + struct rill_kv *kv = &store->data[i]; + + for (size_t j = 0; j < len; ++j) { + if (kv->key != keys[j]) continue; + if (!rill_pairs_push(out, kv->key, kv->val)) return false; + } + } + + vma_dont_need(store); + return true; +} + +bool rill_store_scan_val( + struct rill_store *store, + const rill_val_t *vals, size_t len, + struct rill_pairs *out) +{ + vma_will_need(store); + + for (size_t i = 0; i < store->head->pairs; ++i) { + struct rill_kv *kv = &store->data[i]; + + for (size_t j = 0; j < len; ++j) { + if (kv->val != vals[j]) continue; + if (!rill_pairs_push(out, kv->key, kv->val)) return false; + } + } + + vma_dont_need(store); + return true; +} + +void rill_store_print(struct rill_store *store) +{ + vma_will_need(store); + + const rill_key_t no_key = -1ULL; + rill_key_t key = no_key; + + for (size_t i = 0; i < store->head->pairs; ++i) { + struct rill_kv *kv = &store->data[i]; + + if (kv->key == key) fprintf(stderr, "%lu, ", kv->val); + else { + if (key != no_key) fprintf(stderr, "]\n"); + fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); + key = kv->key; + } + } + + fprintf(stderr, "]\n"); + + vma_dont_need(store); +} diff --git a/src/store.h b/src/store.h new file mode 100644 index 0000000..a8b25ff --- /dev/null +++ b/src/store.h @@ -0,0 +1,48 @@ +/* store.h + Rémi Attab (remi.attab@gmail.com), 30 Aug 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + +#include "rill.h" + +#include + + +// ----------------------------------------------------------------------------- +// store +// ----------------------------------------------------------------------------- + +struct rill_pairs; +struct rill_store; + +struct rill_store *rill_store_open(const char *file); +void rill_store_close(struct rill_store *store); + +bool rill_store_write( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_pairs *pairs); + +bool rill_store_merge( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_store **list, size_t len); + +bool rill_store_rm(struct rill_store *store); + +const char * rill_store_file(struct rill_store *store); +rill_ts_t rill_store_ts(struct rill_store *store); +size_t rill_store_quant(struct rill_store *store); + +bool rill_store_scan_key( + struct rill_store *store, + const rill_key_t *keys, size_t len, + struct rill_pairs *out); +bool rill_store_scan_val( + struct rill_store *store, + const rill_val_t *vals, size_t len, + struct rill_pairs *out); + +void rill_store_print(struct rill_store *store); diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..411f60d --- /dev/null +++ b/src/utils.h @@ -0,0 +1,51 @@ +/* utils.h + Rémi Attab (remi.attab@gmail.com), 04 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + + +#include +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// err +// ----------------------------------------------------------------------------- + + +#define fail(fmt, ...) \ + fprintf(stderr, "[fail] "fmt"\n", __VA_ARGS__) + +#define fail_errno(fmt, ...) \ + fprintf(stderr, "[fail] "fmt"(%d): %s\n", __VA_ARGS__, errno, strerror(errno)) + + +// ----------------------------------------------------------------------------- +// lock +// ----------------------------------------------------------------------------- + +typedef atomic_size_t lock_t; + +static inline void lock(atomic_size_t *l) +{ + bool ret = false; + uint64_t old; + + do { + old = atomic_load_explicit(l, memory_order_relaxed); + if (old) continue; + + ret = atomic_compare_exchange_weak_explicit(l, &old, 1, + memory_order_acquire, memory_order_relaxed); + } while (!ret); +} + + +static inline void unlock(atomic_size_t *l) +{ + atomic_store_explicit(l, 0, memory_order_release); +} From 0bb349518ac1fd83fcf0ac9a234656039bf16522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 4 Sep 2017 14:36:26 -0400 Subject: [PATCH 02/91] Add dirt simple bench --- .gitignore | 2 ++ compile.sh | 8 +++++--- src/bench.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pairs.c | 5 +++-- src/rill.c | 44 +++++++++++++++++++++++++-------------- src/store.c | 59 +++++++++++++++++++++++++---------------------------- 6 files changed, 125 insertions(+), 51 deletions(-) create mode 100644 src/bench.c diff --git a/.gitignore b/.gitignore index 6d5206b..93f41c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +build + # Prerequisites *.d diff --git a/compile.sh b/compile.sh index 498f528..5f49719 100755 --- a/compile.sh +++ b/compile.sh @@ -1,7 +1,8 @@ #! /usr/bin/env bash -set -o errexit -o nounset -o pipefail -set -o xtrace +set -o errexit -o nounset -o pipefail -o xtrace + +: ${PREFIX:="."} declare -a SRC SRC=(pairs store rill) @@ -14,8 +15,9 @@ CFLAGS="$CFLAGS -fno-strict-aliasing" OBJ="" for src in "${SRC[@]}"; do - gcc -c -o "$src.o" "src/$src.c" $CFLAGS + gcc -c -o "$src.o" "${PREFIX}/src/$src.c" $CFLAGS OBJ="$OBJ $src.o" done ar rcs librill.a $OBJ +gcc -o bench "${PREFIX}/src/bench.c" librill.a $CFLAGS diff --git a/src/bench.c b/src/bench.c new file mode 100644 index 0000000..14e4d7b --- /dev/null +++ b/src/bench.c @@ -0,0 +1,58 @@ +/* bench.c + Rémi Attab (remi.attab@gmail.com), 04 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" + +#include +#include +#include + +#include +#include +#include + +void rm(const char *path) +{ + DIR *dir = opendir(path); + if (!dir) return; + + struct dirent stream, *entry; + while (true) { + if (readdir_r(dir, &stream, &entry) == -1) abort(); + else if (!entry) break; + else if (entry->d_type != DT_REG) continue; + + char file[NAME_MAX]; + snprintf(file, sizeof(file), "%s/%s", path, entry->d_name); + unlink(file); + } + + closedir(dir); + rmdir(path); +} + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + rm("db"); + + struct rill *db = rill_open("db"); + if (!db) return 1; + + enum { max = 1000 * 1000 * 1000 }; + + for (size_t i = 0; i < max; ++i) { + if (!rill_ingest(db, i / 1000, i, i)) return 1; + + if (i % (30 * 60 * 1000) == 0) { + if (!rill_rotate(db, i / 1000)) return 1; + } + } + + if (!rill_rotate(db, max + 60 * 60)) return 1; + rill_close(db); + + return 0; +} diff --git a/src/pairs.c b/src/pairs.c index bf6683b..f1e98f3 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -17,16 +17,17 @@ void rill_pairs_free(struct rill_pairs *pairs) { free(pairs->data); + free(pairs); } static bool resize(struct rill_pairs *pairs, size_t len) { if (len <= pairs->cap) return true; - size_t cap = pairs->cap ? pairs->cap : 1; + size_t cap = pairs->cap ? pairs->cap : 16; while (cap < len) cap *= 2; - void *ret = realloc(pairs->data, cap); + void *ret = realloc(pairs->data, cap * sizeof(*pairs->data)); if (!ret) return false; pairs->data = ret; diff --git a/src/rill.c b/src/rill.c index af59af5..2c04489 100644 --- a/src/rill.c +++ b/src/rill.c @@ -20,11 +20,11 @@ // config // ----------------------------------------------------------------------------- -enum { hours = 24, days = 30, months = 15}; +enum { hours = 24, days = 30, months = 13}; enum { quant_hour = 60 * 60, - quant_day = hours * quant_hour, + quant_day = hours * quant_hour, quant_month = days * quant_day, }; @@ -129,15 +129,18 @@ struct rill * rill_open(const char *dir) goto fail_dir; } - struct dirent stream, *result; + struct dirent it, *entry; while (true) { - if (readdir_r(dir_handle, &stream, &result) == -1) { + if (readdir_r(dir_handle, &it, &entry) == -1) { fail_errno("unable to read dir '%s'", dir); goto fail_readdir; } - else if (!result) break; + else if (!entry) break; + else if (entry->d_type != DT_REG) continue; - (void) load_store(db, result->d_name); + char file[NAME_MAX]; + snprintf(file, sizeof(file), "%s/%s", db->dir, entry->d_name); + (void) load_store(db, file); } closedir(dir_handle); @@ -164,11 +167,17 @@ struct rill * rill_open(const char *dir) void rill_close(struct rill *db) { - for (size_t i = 0; i < days; ++i) - rill_store_close(db->daily[i]); + for (size_t i = 0; i < hours; ++i) { + if (db->hourly[i]) rill_store_close(db->hourly[i]); + } - for (size_t i = 0; i < months; ++i) - rill_store_close(db->monthly[i]); + for (size_t i = 0; i < days; ++i) { + if (db->daily[i]) rill_store_close(db->daily[i]); + } + + for (size_t i = 0; i < months; ++i) { + if (db->monthly[i]) rill_store_close(db->monthly[i]); + } rill_pairs_free(db->acc); rill_pairs_free(db->dump); @@ -218,13 +227,17 @@ static bool rotate_monthly( struct rill_store **list, size_t len) { char file[NAME_MAX]; - snprintf(file, sizeof(file), "%s/%lu.rill", db->dir, ts / quant_month); + snprintf(file, sizeof(file), "%s/%06lu.rill", db->dir, ts / quant_month); - if (*store) (void) rill_store_rm(*store); + if (*store) { + (void) rill_store_rm(*store); + *store = NULL; + } if (!rill_store_merge(file, ts, quant_day, list, len)) return false; if (!(*store = rill_store_open(file))) return false; for (size_t i = 0; i < len; ++i) { + if (!list[i]) continue; (void) rill_store_rm(list[i]); list[i] = NULL; } @@ -239,7 +252,7 @@ static bool rotate_daily( struct rill_store **list, size_t len) { char file[NAME_MAX]; - snprintf(file, sizeof(file), "%s/%lu-%lu.rill", db->dir, + snprintf(file, sizeof(file), "%s/%06lu-%02lu.rill", db->dir, ts / quant_month, (ts / quant_day) % days); @@ -248,6 +261,7 @@ static bool rotate_daily( if (!(*store = rill_store_open(file))) return false; for (size_t i = 0; i < len; ++i) { + if (!list[i]) continue; (void) rill_store_rm(list[i]); list[i] = NULL; } @@ -258,7 +272,7 @@ static bool rotate_daily( static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t ts) { char file[NAME_MAX]; - snprintf(file, sizeof(file), "%s/%lu-%lu-%lu.rill", db->dir, + snprintf(file, sizeof(file), "%s/%06lu-%02lu-%02lu.rill", db->dir, ts / quant_month, (ts / quant_day) % days, (ts / quant_hour) % hours); @@ -286,7 +300,7 @@ bool rill_rotate(struct rill *db, rill_ts_t now) { if (now / quant_month != db->ts / quant_month) { size_t quant = db->ts / quant_month; - if (!rotate_monthly(db, &db->monthly[quant % days], db->ts, db->daily, days)) { + if (!rotate_monthly(db, &db->monthly[quant % months], db->ts, db->daily, days)) { fail("unable to complete monthly rotation '%lu'", quant); return false; } diff --git a/src/store.c b/src/store.c index b7e3910..f14a61a 100644 --- a/src/store.c +++ b/src/store.c @@ -24,7 +24,7 @@ // utils // ----------------------------------------------------------------------------- -static const size_t page_len = 1UL << 10; +static const size_t page_len = 4096; static inline size_t to_vma_len(size_t len) { @@ -54,7 +54,7 @@ struct rill_store { int fd; const char *file; - + void *vma; size_t vma_len; @@ -101,9 +101,9 @@ struct rill_store *rill_store_open(const char *file) goto fail_open; } - store->vma = mmap(0, store->vma_len, PROT_READ, MAP_PRIVATE | MAP_HUGETLB, store->fd, 0); + store->vma = mmap(NULL, store->vma_len, PROT_READ, MAP_SHARED, store->fd, 0); if (store->vma == MAP_FAILED) { - fail_errno("unable to mmap '%s'", file); + fail_errno("[reader] unable to mmap '%s'", file); goto fail_mmap; } @@ -154,7 +154,7 @@ void rill_store_close(struct rill_store *store) bool rill_store_rm(struct rill_store *store) { - if (!unlink(store->file)) { + if (unlink(store->file) == -1) { fail_errno("unable to unlink '%s'", store->file); return false; } @@ -188,9 +188,7 @@ static bool writer_open( } store->vma_len = to_vma_len(len); - int vma_prot = PROT_WRITE | PROT_READ; - int vma_flags = MAP_PRIVATE | MAP_HUGETLB; - store->vma = mmap(0, store->vma_len, vma_prot, vma_flags, store->fd, 0); + store->vma = mmap(NULL, store->vma_len, PROT_WRITE | PROT_READ, MAP_SHARED, store->fd, 0); if (store->vma == MAP_FAILED) { fail_errno("unable to mmap '%s'", file); goto fail_mmap; @@ -261,17 +259,21 @@ bool rill_store_write( bool rill_store_merge( const char *file, rill_ts_t ts, size_t quant, - struct rill_store **list, size_t len) + struct rill_store **list, size_t list_len) { - assert(len > 1); + assert(list_len > 1); size_t cap = 0; - struct { struct rill_kv *it, *end; } its[len]; + struct { struct rill_kv *it, *end; } its[list_len]; + + size_t it_len = 0; + for (size_t i = 0; i < list_len; ++i) { + if (!list[i]) continue; - for (size_t i = 0; i < len; ++i) { cap += list[i]->head->pairs; - its[i].it = list[i]->data; - its[i].it = list[i]->data + list[i]->head->pairs; + its[it_len].it = list[i]->data; + its[it_len].end = list[i]->data + list[i]->head->pairs; + it_len++; } struct rill_store store = {0}; @@ -283,29 +285,24 @@ bool rill_store_merge( size_t pairs = 0; struct rill_kv *current = store.data; - while (true) { - size_t smallest = 0; - for (size_t i = 0; i < len; ++i) { - if (its[i].it != its[i].end) break; - smallest++; + while (it_len > 0) { + size_t target = 0; + for (size_t i = 1; i < it_len; ++i) { + if (rill_kv_cmp(its[i].it, its[target].it) < 0) + target = i; } - if (smallest == len) break; - for (size_t i = smallest + 1; i < len; ++i) { - if (its[i].it == its[i].end) continue; - if (rill_kv_cmp(its[i].it, its[smallest].it) < 0) - smallest = i; - } - - if (rill_kv_cmp(current, its[smallest].it) < 0) { + if (rill_kv_cmp(current, its[target].it) < 0) { pairs++; current++; - assert(pairs != cap); - - *current = *its[smallest].it; + *current = *its[target].it; } - its[smallest].it++; + its[target].it++; + if (its[target].it == its[target].end) { + memmove(its + target, its + target + 1, (it_len - target - 1) * sizeof(its[0])); + it_len--; + } } writer_close(&store, pairs); From e37e27ec62d734ba1ed9fd767f59e487238d29cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 7 Sep 2017 17:30:11 -0400 Subject: [PATCH 03/91] Add dirt simple query test --- compile.sh | 4 +++- src/dump.c | 46 +++++++++++++++++++++++++++++++++++++++++ src/{bench.c => load.c} | 0 src/pairs.c | 4 ++-- src/query.c | 42 +++++++++++++++++++++++++++++++++++++ src/rill.c | 26 ++++++++++++++++------- src/store.c | 15 +++++++++++--- src/store.h | 1 + 8 files changed, 125 insertions(+), 13 deletions(-) create mode 100644 src/dump.c rename src/{bench.c => load.c} (100%) create mode 100644 src/query.c diff --git a/compile.sh b/compile.sh index 5f49719..b2be13a 100755 --- a/compile.sh +++ b/compile.sh @@ -20,4 +20,6 @@ for src in "${SRC[@]}"; do done ar rcs librill.a $OBJ -gcc -o bench "${PREFIX}/src/bench.c" librill.a $CFLAGS +gcc -o rill_load "${PREFIX}/src/load.c" librill.a $CFLAGS +gcc -o rill_query "${PREFIX}/src/query.c" librill.a $CFLAGS +gcc -o rill_dump "${PREFIX}/src/dump.c" librill.a $CFLAGS diff --git a/src/dump.c b/src/dump.c new file mode 100644 index 0000000..8644dc6 --- /dev/null +++ b/src/dump.c @@ -0,0 +1,46 @@ +/* dump.c + Rémi Attab (remi.attab@gmail.com), 07 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "store.h" + +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + + + const char *file = NULL; + bool header_only = false; + + switch (argc) { + case 3: + if (strcmp(argv[1], "-h") != 0) { + printf("unknown arg '%s'", argv[1]); + return 1; + } + header_only = true; + case 2: + file = argv[argc - 1]; + break; + default: + printf("you done goofed mate\n"); + return 1; + } + + struct rill_store *store = rill_store_open(file); + if (!store) return 1; + + rill_store_print_head(store); + if (!header_only) rill_store_print(store); + + rill_store_close(store); + + return 0; +} diff --git a/src/bench.c b/src/load.c similarity index 100% rename from src/bench.c rename to src/load.c diff --git a/src/pairs.c b/src/pairs.c index f1e98f3..5ddbeea 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -115,7 +115,7 @@ void rill_pairs_print(const struct rill_pairs *pairs) for (size_t i = 0; i < pairs->len; ++i) { struct rill_kv *kv = &pairs->data[i]; - if (kv->key == key) fprintf(stderr, "%lu, ", kv->val); + if (kv->key == key) fprintf(stderr, ", %lu", kv->val); else { if (key != no_key) fprintf(stderr, "]\n"); fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); @@ -123,5 +123,5 @@ void rill_pairs_print(const struct rill_pairs *pairs) } } - fprintf(stderr, "]\n"); + fprintf(stderr, " ]\n"); } diff --git a/src/query.c b/src/query.c new file mode 100644 index 0000000..4baef17 --- /dev/null +++ b/src/query.c @@ -0,0 +1,42 @@ +/* bench.c + Rémi Attab (remi.attab@gmail.com), 04 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" + +#include +#include +#include + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + struct rill *db = rill_open("db"); + if (!db) return 1; + + enum { n = 100 }; + + { + rill_key_t keys[100]; + for (size_t i = 0; i < 100; ++i) keys[i] = i; + + struct rill_pairs out = {0}; + rill_query_key(db, keys, n, &out); + + rill_pairs_print(&out); + } + + { + rill_val_t vals[100]; + for (size_t i = 0; i < 100; ++i) vals[i] = i; + + struct rill_pairs out = {0}; + rill_query_val(db, vals, n, &out); + + rill_pairs_print(&out); + } + + rill_close(db); + return 0; +} diff --git a/src/rill.c b/src/rill.c index 2c04489..ad8bdef 100644 --- a/src/rill.c +++ b/src/rill.c @@ -233,7 +233,7 @@ static bool rotate_monthly( (void) rill_store_rm(*store); *store = NULL; } - if (!rill_store_merge(file, ts, quant_day, list, len)) return false; + if (!rill_store_merge(file, ts, quant_month, list, len)) return false; if (!(*store = rill_store_open(file))) return false; for (size_t i = 0; i < len; ++i) { @@ -333,28 +333,40 @@ bool rill_rotate(struct rill *db, rill_ts_t now) void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out) { - for (size_t i = 0; i < hours; ++i) + for (size_t i = 0; i < hours; ++i) { + if (!db->hourly[i]) continue; rill_store_scan_key(db->hourly[i], keys, len, out); + } - for (size_t i = 0; i < days; ++i) + for (size_t i = 0; i < days; ++i) { + if (!db->daily[i]) continue; rill_store_scan_key(db->daily[i], keys, len, out); + } - for (size_t i = 0; i < months; ++i) + for (size_t i = 0; i < months; ++i) { + if (!db->monthly[i]) continue; rill_store_scan_key(db->monthly[i], keys, len, out); + } rill_pairs_compact(out); } void rill_query_val(struct rill *db, rill_val_t *vals, size_t len, struct rill_pairs *out) { - for (size_t i = 0; i < hours; ++i) + for (size_t i = 0; i < hours; ++i) { + if (!db->hourly[i]) continue; rill_store_scan_val(db->hourly[i], vals, len, out); + } - for (size_t i = 0; i < days; ++i) + for (size_t i = 0; i < days; ++i) { + if (!db->daily[i]) continue; rill_store_scan_val(db->daily[i], vals, len, out); + } - for (size_t i = 0; i < months; ++i) + for (size_t i = 0; i < months; ++i) { + if (!db->monthly[i]) continue; rill_store_scan_val(db->monthly[i], vals, len, out); + } rill_pairs_compact(out); } diff --git a/src/store.c b/src/store.c index f14a61a..fd1244c 100644 --- a/src/store.c +++ b/src/store.c @@ -174,7 +174,7 @@ static bool writer_open( rill_ts_t ts, size_t quant) { store->file = file; - + store->fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0640); if (store->fd == -1) { fail_errno("unable to open '%s'", file); @@ -384,6 +384,15 @@ bool rill_store_scan_val( return true; } +void rill_store_print_head(struct rill_store *store) +{ + fprintf(stderr, "magic: 0x%x\n", store->head->magic); + fprintf(stderr, "version: %u\n", store->head->version); + fprintf(stderr, "pairs: %lu\n", store->head->pairs); + fprintf(stderr, "ts: %lu\n", store->head->ts); + fprintf(stderr, "quant: %lu\n", store->head->quant); +} + void rill_store_print(struct rill_store *store) { vma_will_need(store); @@ -394,7 +403,7 @@ void rill_store_print(struct rill_store *store) for (size_t i = 0; i < store->head->pairs; ++i) { struct rill_kv *kv = &store->data[i]; - if (kv->key == key) fprintf(stderr, "%lu, ", kv->val); + if (kv->key == key) fprintf(stderr, ", %lu", kv->val); else { if (key != no_key) fprintf(stderr, "]\n"); fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); @@ -402,7 +411,7 @@ void rill_store_print(struct rill_store *store) } } - fprintf(stderr, "]\n"); + fprintf(stderr, " ]\n"); vma_dont_need(store); } diff --git a/src/store.h b/src/store.h index a8b25ff..bc73f25 100644 --- a/src/store.h +++ b/src/store.h @@ -45,4 +45,5 @@ bool rill_store_scan_val( const rill_val_t *vals, size_t len, struct rill_pairs *out); +void rill_store_print_head(struct rill_store *store); void rill_store_print(struct rill_store *store); From b0b6645089fb92d9f87d7372b78bc0e5348925f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 9 Sep 2017 06:24:49 -0400 Subject: [PATCH 04/91] Move to single public header --- src/dump.c | 1 - src/rill.c | 1 - src/rill.h | 39 +++++++++++++++++++++++++++++++++++++++ src/store.c | 1 - src/store.h | 49 ------------------------------------------------- 5 files changed, 39 insertions(+), 52 deletions(-) delete mode 100644 src/store.h diff --git a/src/dump.c b/src/dump.c index 8644dc6..9257262 100644 --- a/src/dump.c +++ b/src/dump.c @@ -4,7 +4,6 @@ */ #include "rill.h" -#include "store.h" #include #include diff --git a/src/rill.c b/src/rill.c index ad8bdef..73f0a30 100644 --- a/src/rill.c +++ b/src/rill.c @@ -4,7 +4,6 @@ */ #include "rill.h" -#include "store.h" #include "utils.h" #include diff --git a/src/rill.h b/src/rill.h index 94a0069..fa656de 100644 --- a/src/rill.h +++ b/src/rill.h @@ -70,6 +70,45 @@ bool rill_pairs_scan_val( void rill_pairs_print(const struct rill_pairs *pairs); + +// ----------------------------------------------------------------------------- +// store +// ----------------------------------------------------------------------------- + +struct rill_store; + +struct rill_store *rill_store_open(const char *file); +void rill_store_close(struct rill_store *store); + +bool rill_store_write( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_pairs *pairs); + +bool rill_store_merge( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_store **list, size_t len); + +bool rill_store_rm(struct rill_store *store); + +const char * rill_store_file(struct rill_store *store); +rill_ts_t rill_store_ts(struct rill_store *store); +size_t rill_store_quant(struct rill_store *store); + +bool rill_store_scan_key( + struct rill_store *store, + const rill_key_t *keys, size_t len, + struct rill_pairs *out); +bool rill_store_scan_val( + struct rill_store *store, + const rill_val_t *vals, size_t len, + struct rill_pairs *out); + +void rill_store_print_head(struct rill_store *store); +void rill_store_print(struct rill_store *store); + + // ----------------------------------------------------------------------------- // rill // ----------------------------------------------------------------------------- diff --git a/src/store.c b/src/store.c index fd1244c..a862205 100644 --- a/src/store.c +++ b/src/store.c @@ -3,7 +3,6 @@ FreeBSD-style copyright and disclaimer apply */ -#include "store.h" #include "rill.h" #include "utils.h" diff --git a/src/store.h b/src/store.h deleted file mode 100644 index bc73f25..0000000 --- a/src/store.h +++ /dev/null @@ -1,49 +0,0 @@ -/* store.h - Rémi Attab (remi.attab@gmail.com), 30 Aug 2017 - FreeBSD-style copyright and disclaimer apply -*/ - -#pragma once - -#include "rill.h" - -#include - - -// ----------------------------------------------------------------------------- -// store -// ----------------------------------------------------------------------------- - -struct rill_pairs; -struct rill_store; - -struct rill_store *rill_store_open(const char *file); -void rill_store_close(struct rill_store *store); - -bool rill_store_write( - const char *file, - rill_ts_t ts, size_t quant, - struct rill_pairs *pairs); - -bool rill_store_merge( - const char *file, - rill_ts_t ts, size_t quant, - struct rill_store **list, size_t len); - -bool rill_store_rm(struct rill_store *store); - -const char * rill_store_file(struct rill_store *store); -rill_ts_t rill_store_ts(struct rill_store *store); -size_t rill_store_quant(struct rill_store *store); - -bool rill_store_scan_key( - struct rill_store *store, - const rill_key_t *keys, size_t len, - struct rill_pairs *out); -bool rill_store_scan_val( - struct rill_store *store, - const rill_val_t *vals, size_t len, - struct rill_pairs *out); - -void rill_store_print_head(struct rill_store *store); -void rill_store_print(struct rill_store *store); From 06e6f6412bd7ac6a55563339312baaefd4d280c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 9 Sep 2017 06:27:57 -0400 Subject: [PATCH 05/91] Switch NAME_MAX to PATH_MAX --- src/load.c | 2 +- src/rill.c | 10 +++++----- src/store.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/load.c b/src/load.c index 14e4d7b..1965655 100644 --- a/src/load.c +++ b/src/load.c @@ -24,7 +24,7 @@ void rm(const char *path) else if (!entry) break; else if (entry->d_type != DT_REG) continue; - char file[NAME_MAX]; + char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%s", path, entry->d_name); unlink(file); } diff --git a/src/rill.c b/src/rill.c index 73f0a30..8b7c977 100644 --- a/src/rill.c +++ b/src/rill.c @@ -89,7 +89,7 @@ struct rill * rill_open(const char *dir) goto fail_alloc_struct; } - db->dir = strndup(dir, NAME_MAX); + db->dir = strndup(dir, PATH_MAX); if (!db->dir) { fail("unable to allocate memory for '%s'", dir); goto fail_alloc_dir; @@ -137,7 +137,7 @@ struct rill * rill_open(const char *dir) else if (!entry) break; else if (entry->d_type != DT_REG) continue; - char file[NAME_MAX]; + char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%s", db->dir, entry->d_name); (void) load_store(db, file); } @@ -225,7 +225,7 @@ static bool rotate_monthly( rill_ts_t ts, struct rill_store **list, size_t len) { - char file[NAME_MAX]; + char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%06lu.rill", db->dir, ts / quant_month); if (*store) { @@ -250,7 +250,7 @@ static bool rotate_daily( rill_ts_t ts, struct rill_store **list, size_t len) { - char file[NAME_MAX]; + char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%06lu-%02lu.rill", db->dir, ts / quant_month, (ts / quant_day) % days); @@ -270,7 +270,7 @@ static bool rotate_daily( static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t ts) { - char file[NAME_MAX]; + char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%06lu-%02lu-%02lu.rill", db->dir, ts / quant_month, (ts / quant_day) % days, diff --git a/src/store.c b/src/store.c index a862205..a23a984 100644 --- a/src/store.c +++ b/src/store.c @@ -74,7 +74,7 @@ struct rill_store *rill_store_open(const char *file) goto fail_alloc_struct; } - store->file = strndup(file, NAME_MAX); + store->file = strndup(file, PATH_MAX); if (!store->file) { fail("unable to allocate memory for '%s'", file); goto fail_alloc_file; From cdc1b3a6177d9c7aa7c0c8e62b78024c3f8f2815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 11 Sep 2017 07:10:05 -0400 Subject: [PATCH 06/91] Improve load generator --- compile.sh | 2 +- src/load.c | 32 +++++++++++++++++++++++++------ src/rng.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/rng.h | 28 +++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 src/rng.c create mode 100644 src/rng.h diff --git a/compile.sh b/compile.sh index b2be13a..972b6d0 100755 --- a/compile.sh +++ b/compile.sh @@ -5,7 +5,7 @@ set -o errexit -o nounset -o pipefail -o xtrace : ${PREFIX:="."} declare -a SRC -SRC=(pairs store rill) +SRC=(rng pairs store rill) CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -Werror -Wall -Wextra" diff --git a/src/load.c b/src/load.c index 1965655..b6cc301 100644 --- a/src/load.c +++ b/src/load.c @@ -4,6 +4,7 @@ */ #include "rill.h" +#include "rng.h" #include #include @@ -33,6 +34,12 @@ void rm(const char *path) rmdir(path); } +uint64_t rng_gen_val(struct rng *rng, uint64_t min, uint64_t range) +{ + uint64_t max = rng_gen_range(rng, 0, range) + 1; + return rng_gen_range(rng, min, min + max) + 1; +} + int main(int argc, char **argv) { (void) argc, (void) argv; @@ -41,17 +48,30 @@ int main(int argc, char **argv) struct rill *db = rill_open("db"); if (!db) return 1; - enum { max = 1000 * 1000 * 1000 }; + enum { + keys_per_sec = 10 * 1000, + seconds = 3 * 31 * 24 * 60 * 60, - for (size_t i = 0; i < max; ++i) { - if (!rill_ingest(db, i / 1000, i, i)) return 1; + keys_range = 1 * 1000 * 1000 * 1000, + vals_range = 1 * 1000, + vals_per_key = 4, + }; - if (i % (30 * 60 * 1000) == 0) { - if (!rill_rotate(db, i / 1000)) return 1; + struct rng rng = rng_make(0); + for (size_t ts = 0; ts < seconds; ++ts) { + for (size_t i = 0; i < keys_per_sec; ++i) { + uint64_t key = rng_gen_val(&rng, ts, keys_range); + + for (size_t j = 0; j < vals_per_key; ++j) { + uint64_t val = rng_gen_val(&rng, ts, vals_range); + if (!rill_ingest(db, ts, key, val)) return 1; + } } + + if (!rill_rotate(db, ts)) return 1; } - if (!rill_rotate(db, max + 60 * 60)) return 1; + if (!rill_rotate(db, seconds + 60 * 60)) return 1; rill_close(db); return 0; diff --git a/src/rng.c b/src/rng.c new file mode 100644 index 0000000..03ede5d --- /dev/null +++ b/src/rng.c @@ -0,0 +1,56 @@ +/* rng.c + Rémi Attab (remi.attab@gmail.com), 25 Feb 2016 + FreeBSD-style copyright and disclaimer apply + + Xorshift random number generator for testing and statsd sampling + + See George Marsaglia (2003). Xorshift RNGs. DOI: 10.18637/jss.v008.i14 + http://www.jstatsoft.org/article/view/v008i14 + (section 4, function xor128) + + Current implementation is the xorshift64* variant which has better + statistical properties. +*/ + +#include "rng.h" + +#include + + +// ----------------------------------------------------------------------------- +// init +// ----------------------------------------------------------------------------- + +struct rng rng_make(uint64_t seed) +{ + // We xor the seed with a randomly chosen number to avoid ending up with a 0 + // state which would be bad. + struct rng rng = { .x = seed ^ UINT64_C(0xedef335f00e170b3) }; + assert(rng.x); + return rng; +} + + + +// ----------------------------------------------------------------------------- +// gen +// ----------------------------------------------------------------------------- + +uint64_t rng_gen(struct rng *rng) +{ + rng->x ^= rng->x >> 12; + rng->x ^= rng->x << 25; + rng->x ^= rng->x >> 27; + return rng->x * UINT64_C(2685821657736338717); +} + +uint64_t rng_gen_range(struct rng *rng, uint64_t min, uint64_t max) +{ + assert(max - min != 0); + return rng_gen(rng) % (max - min) + min; +} + +bool rng_gen_prob(struct rng *rng, double prob) +{ + return rng_gen(rng) <= (uint64_t) (prob * rng_max()); +} diff --git a/src/rng.h b/src/rng.h new file mode 100644 index 0000000..a2364ac --- /dev/null +++ b/src/rng.h @@ -0,0 +1,28 @@ +/* rng.h + Rémi Attab (remi.attab@gmail.com), 25 Feb 2016 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + +#include +#include + +// ----------------------------------------------------------------------------- +// rng +// ----------------------------------------------------------------------------- + +struct rng { uint64_t x; }; +struct rng rng_make(uint64_t seed); + +inline uint64_t rng_max() { return (uint64_t) -1UL; } + + +// ----------------------------------------------------------------------------- +// gen +// ----------------------------------------------------------------------------- + +uint64_t rng_gen(struct rng *rng); +uint64_t rng_gen_range(struct rng *rng, uint64_t min, uint64_t max); + +bool rng_gen_prob(struct rng *rng, double prob); From 737c99bfd61b2499d2d9a6c60005e7f5a0f05a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 9 Sep 2017 08:13:26 -0400 Subject: [PATCH 07/91] Compress store files --- compile.sh | 7 +- src/coder.c | 299 ++++++++++++++++++++++++++++++++++++++++++++++ src/htable.c | 142 ++++++++++++++++++++++ src/htable.h | 40 +++++++ src/load.c | 10 +- src/pairs.c | 1 - src/rill.c | 35 ++++-- src/rill.h | 11 +- src/store.c | 260 +++++++++++++++++++++++++++++----------- src/utils.h | 22 +++- test/coder_test.c | 224 ++++++++++++++++++++++++++++++++++ test/test.h | 40 +++++++ 12 files changed, 991 insertions(+), 100 deletions(-) create mode 100644 src/coder.c create mode 100644 src/htable.c create mode 100644 src/htable.h create mode 100644 test/coder_test.c create mode 100644 test/test.h diff --git a/compile.sh b/compile.sh index 972b6d0..8efe9ed 100755 --- a/compile.sh +++ b/compile.sh @@ -5,9 +5,11 @@ set -o errexit -o nounset -o pipefail -o xtrace : ${PREFIX:="."} declare -a SRC -SRC=(rng pairs store rill) +SRC=(htable rng pairs store rill) CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" +CFLAGS="$CFLAGS -I${PREFIX}/src" + CFLAGS="$CFLAGS -Werror -Wall -Wextra" CFLAGS="$CFLAGS -Wundef -Wcast-align -Wwrite-strings -Wunreachable-code -Wformat=2" CFLAGS="$CFLAGS -Wswitch-enum -Wswitch-default -Winit-self -Wno-strict-aliasing" @@ -23,3 +25,6 @@ ar rcs librill.a $OBJ gcc -o rill_load "${PREFIX}/src/load.c" librill.a $CFLAGS gcc -o rill_query "${PREFIX}/src/query.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/dump.c" librill.a $CFLAGS + +gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS +./test_coder diff --git a/src/coder.c b/src/coder.c new file mode 100644 index 0000000..e1f5bc5 --- /dev/null +++ b/src/coder.c @@ -0,0 +1,299 @@ +/* coder.c + Rémi Attab (remi.attab@gmail.com), 10 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +// ----------------------------------------------------------------------------- +// leb128 +// ----------------------------------------------------------------------------- + +static inline uint8_t *leb128_encode(uint8_t *it, uint64_t val) +{ + static const size_t shift = 7; + static const uint64_t more_mask = 1UL << shift; + static const uint64_t body_mask = (1UL << shift) - 1; + + do { + *it = val & body_mask; + *it |= (val >>= shift) ? more_mask : 0; + it++; + } while (val); + + return it; +} + +static inline bool leb128_decode(uint8_t **it, uint8_t *end, uint64_t *val) +{ + static const size_t shift = 7; + static const uint64_t more_mask = 1UL << shift; + static const uint64_t body_mask = (1UL << shift) - 1; + + if (*it == end) return it; + + uint8_t data; + size_t pos = 0; + *val = 0; + + do { + data = **it; (*it)++; + *val |= (data & body_mask) << pos; + pos += shift; + } while ((data & more_mask) && *it != end); + + return !(data & more_mask); +} + + +// ----------------------------------------------------------------------------- +// vals +// ----------------------------------------------------------------------------- + +struct rill_packed vals +{ + uint64_t len; + uint64_t data[]; +}; + +typedef struct htable vals_rev_t; + +static rill_val_t vals_itov(struct vals *vals, size_t index) +{ + assert(index <= vals->len); + return vals->data[index - 1]; +} + +static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) +{ + if (!val) return 0; // \todo giant hack for coder_finish + + struct htable_ret ret = htable_get(rev, val); + assert(ret.ok); + return ret.value; +} + +static void vals_rev_make(struct vals *vals, vals_rev_t *rev) +{ + htable_reset(rev); + htable_reserve(rev, vals->len); + + for (size_t index = 1; index <= vals->len; ++index) { + struct htable_ret ret = htable_put(rev, vals->data[index-1], index); + assert(ret.ok); + } +} + +static int val_cmp(const void *l, const void *r) +{ + rill_val_t lhs = *((rill_val_t *) l); + rill_val_t rhs = *((rill_val_t *) r); + + if (lhs < rhs) return -1; + if (lhs > rhs) return 1; + return 0; +} + +static void vals_compact(struct vals *vals) +{ + assert(vals->len); + qsort(vals->data, vals->len, sizeof(vals->data[0]), &val_cmp); + + size_t j = 0; + for (size_t i = 1; i < vals->len; ++i) { + if (vals->data[j] == vals->data[i]) continue; + vals->data[++j] = vals->data[i]; + } + + assert(j + 1 <= vals->len); + vals->len = j + 1; +} + +static struct vals *vals_from_pairs(struct rill_pairs *pairs) +{ + struct vals *vals = + calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * pairs->len); + if (!vals) return NULL; + + vals->len = pairs->len; + for (size_t i = 0; i < pairs->len; ++i) + vals->data[i] = pairs->data[i].val; + + vals_compact(vals); + return vals; +} + +static struct vals *vals_merge(struct vals *vals, struct vals *merge) +{ + if (!vals) { + size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; + vals = calloc(1, len); + memcpy(vals, merge, len); + return vals; + } + + vals = realloc(vals, + sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); + if (!vals) return NULL; + + memcpy( vals->data + vals->len, + merge->data, + sizeof(merge->data[0]) * merge->len); + vals->len += merge->len; + + vals_compact(vals); + return vals; +} + + +// ----------------------------------------------------------------------------- +// coder +// ----------------------------------------------------------------------------- + +static const size_t coder_max_val_len = sizeof(rill_val_t) + 2 + 1; + +struct coder +{ + struct vals *vals; + vals_rev_t rev; + + rill_key_t key; + uint8_t *it; + uint8_t *end; + + size_t keys; + size_t pairs; +}; + +// ----------------------------------------------------------------------------- +// encode +// ----------------------------------------------------------------------------- + +static inline bool coder_write_sep(struct coder *coder) +{ + if (rill_unlikely(coder->it + 1 > coder->end)) return false; + + *coder->it = 0; + coder->it++; + + return true; +} + +static inline bool coder_write_key(struct coder *coder, rill_key_t key) +{ + if (rill_unlikely(coder->it + sizeof(key) > coder->end)) return false; + + memcpy(coder->it, &key, sizeof(key)); + coder->it += sizeof(key); + + return true; +} + +static inline bool coder_write_val(struct coder *coder, rill_val_t val) +{ + val = vals_vtoi(&coder->rev, val); + + uint8_t buffer[coder_max_val_len]; + size_t len = leb128_encode(buffer, val) - buffer; + + if (rill_unlikely(coder->it + len > coder->end)) return false; + + memcpy(coder->it, buffer, len); + coder->it += len; + + return true; +} + +static bool coder_encode(struct coder *coder, const struct rill_kv *kv) +{ + if (coder->key != kv->key) { + if (rill_likely(coder->key)) { + if (!coder_write_sep(coder)) return false; + } + + coder->key = kv->key; + if (!coder_write_key(coder, kv->key)) return false; + coder->keys++; + } + + if (!coder_write_val(coder, kv->val)) return false; + + coder->pairs++; + return true; +} + +static bool coder_finish(struct coder *coder) +{ + if (!coder_write_sep(coder)) return false; + if (!coder_write_key(coder, 0)) return false; + + htable_reset(&coder->rev); + return true; +} + +static struct coder make_encoder(struct vals *vals, uint8_t *it, uint8_t *end) +{ + struct coder coder = { + .vals = vals, + .it = it, + .end = end, + }; + + vals_rev_make(coder.vals, &coder.rev); + return coder; +} + + +// ----------------------------------------------------------------------------- +// decode +// ----------------------------------------------------------------------------- + +static inline bool coder_read_key(struct coder *coder, rill_key_t *key) +{ + if (rill_unlikely(coder->it + sizeof(*key) > coder->end)) { + fail("unable to decode key: %p + %lu = %p > %p'\n", + (void *) coder->it, sizeof(*key), + (void *) (coder->it + sizeof(*key)), + (void *) coder->end); + return false; + } + + memcpy(key, coder->it, sizeof(*key)); + coder->it += sizeof(*key); + + return true; +} + +static inline bool coder_read_val(struct coder *coder, rill_val_t *val) +{ + if (!leb128_decode(&coder->it, coder->end, val)) { + fail("unable to decode value at '%p-%p'\n", + (void *) coder->it, (void *) coder->end); + return false; + } + + if (*val) *val = vals_itov(coder->vals, *val); + return true; +} + +static bool coder_decode(struct coder *coder, struct rill_kv *kv) +{ + if (rill_likely(coder->key)) { + kv->key = coder->key; + if (!coder_read_val(coder, &kv->val)) return false; + if (kv->val) return true; + } + + if (!coder_read_key(coder, &coder->key)) return false; + kv->key = coder->key; + if (!kv->key) return true; // eof + + return coder_read_val(coder, &kv->val); +} + +static struct coder make_decoder(struct vals *vals, uint8_t *it, uint8_t *end) +{ + return (struct coder) { + .vals = vals, + .it = it, + .end = end, + }; +} diff --git a/src/htable.c b/src/htable.c new file mode 100644 index 0000000..961167b --- /dev/null +++ b/src/htable.c @@ -0,0 +1,142 @@ +/* htable.c + Rémi Attab (remi.attab@gmail.com), 10 Mar 2016 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "htable.h" + +#include +#include +#include + +// ----------------------------------------------------------------------------- +// config +// ----------------------------------------------------------------------------- + +enum { probe_window = 8 }; + + +// ----------------------------------------------------------------------------- +// hash +// ----------------------------------------------------------------------------- + +// FNV-1a hash implementation: http://isthe.com/chongo/tech/comp/fnv/ +inline uint64_t hash_key(uint64_t key) +{ + const uint8_t *data = (uint8_t *) &key; + + uint64_t hash = 0xcbf29ce484222325; + for (size_t i = 0; i < sizeof(key); ++i) + hash = (hash ^ data[i]) * 0x100000001b3; + + assert(hash); // \todo Can't be 0 + return hash; +} + + +// ----------------------------------------------------------------------------- +// htable +// ----------------------------------------------------------------------------- + +void htable_reset(struct htable *ht) +{ + free(ht->table); + *ht = (struct htable) {0}; +} + +static bool table_put( + struct htable_bucket *table, size_t cap, + uint64_t key, uint64_t value) +{ + assert(key); + uint64_t hash = hash_key(key); + + for (size_t i = 0; i < probe_window; ++i) { + struct htable_bucket *bucket = &table[(hash + i) % cap]; + if (bucket->key) continue; + + bucket->key = key; + bucket->value = value; + return true; + } + + return false; +} + +static void htable_resize(struct htable *ht, size_t cap) +{ + if (cap <= ht->cap) return; + + size_t new_cap = ht->cap ? ht->cap : 1; + while (new_cap < cap) new_cap *= 2; + + struct htable_bucket *new_table = calloc(new_cap, sizeof(*new_table)); + for (size_t i = 0; i < ht->cap; ++i) { + struct htable_bucket *bucket = &ht->table[i]; + if (!bucket->key) continue; + + if (!table_put(new_table, new_cap, bucket->key, bucket->value)) { + free(new_table); + htable_resize(ht, new_cap * 2); + return; + } + } + + free(ht->table); + ht->cap = new_cap; + ht->table = new_table; +} + +void htable_reserve(struct htable *ht, size_t items) +{ + htable_resize(ht, items * 4); +} + + +// ----------------------------------------------------------------------------- +// ops +// ----------------------------------------------------------------------------- + +struct htable_ret htable_get(struct htable *ht, uint64_t key) +{ + assert(key); + + uint64_t hash = hash_key(key); + htable_resize(ht, probe_window); + + for (size_t i = 0; i < probe_window; ++i) { + struct htable_bucket *bucket = &ht->table[(hash + i) % ht->cap]; + + if (!bucket->key) continue; + if (bucket->key != key) continue; + + return (struct htable_ret) { .ok = true, .value = bucket->value }; + } + + return (struct htable_ret) { .ok = false }; +} + +struct htable_ret htable_put(struct htable *ht, uint64_t key, uint64_t value) +{ + assert(key); + + uint64_t hash = hash_key(key); + htable_resize(ht, probe_window); + + for (size_t i = 0; i < probe_window; ++i) { + struct htable_bucket *bucket = &ht->table[(hash + i) % ht->cap]; + + if (bucket->key) { + if (bucket->key != key) continue; + return (struct htable_ret) { .ok = false, .value = bucket->value }; + } + + ht->len++; + bucket->key = key; + bucket->value = value; + return (struct htable_ret) { .ok = true }; + } + + htable_resize(ht, ht->cap * 2); + return htable_put(ht, key, value); +} diff --git a/src/htable.h b/src/htable.h new file mode 100644 index 0000000..02ecadb --- /dev/null +++ b/src/htable.h @@ -0,0 +1,40 @@ +/* htable.h + Rémi Attab (remi.attab@gmail.com), 10 Mar 2016 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// struct +// ----------------------------------------------------------------------------- + +struct htable_bucket +{ + uint64_t key; + uint64_t value; +}; + +struct htable +{ + size_t len; + size_t cap; + struct htable_bucket *table; +}; + +struct htable_ret +{ + bool ok; + uint64_t value; +}; + + +void htable_reset(struct htable *); +void htable_reserve(struct htable *, size_t items); +struct htable_ret htable_get(struct htable *, uint64_t key); +struct htable_ret htable_put(struct htable *, uint64_t key, uint64_t value); diff --git a/src/load.c b/src/load.c index b6cc301..d15f644 100644 --- a/src/load.c +++ b/src/load.c @@ -49,22 +49,22 @@ int main(int argc, char **argv) if (!db) return 1; enum { - keys_per_sec = 10 * 1000, + keys_per_sec = 1 * 1000, seconds = 3 * 31 * 24 * 60 * 60, keys_range = 1 * 1000 * 1000 * 1000, - vals_range = 1 * 1000, + vals_range = 10 * 1000, vals_per_key = 4, }; struct rng rng = rng_make(0); for (size_t ts = 0; ts < seconds; ++ts) { for (size_t i = 0; i < keys_per_sec; ++i) { - uint64_t key = rng_gen_val(&rng, ts, keys_range); + uint64_t key = rng_gen_val(&rng, 0, keys_range); for (size_t j = 0; j < vals_per_key; ++j) { - uint64_t val = rng_gen_val(&rng, ts, vals_range); - if (!rill_ingest(db, ts, key, val)) return 1; + uint64_t val = rng_gen_val(&rng, 0, vals_range); + if (!rill_ingest(db, key, val)) return 1; } } diff --git a/src/pairs.c b/src/pairs.c index 5ddbeea..5470df4 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -17,7 +17,6 @@ void rill_pairs_free(struct rill_pairs *pairs) { free(pairs->data); - free(pairs); } static bool resize(struct rill_pairs *pairs, size_t len) diff --git a/src/rill.c b/src/rill.c index 8b7c977..4fe4069 100644 --- a/src/rill.c +++ b/src/rill.c @@ -192,9 +192,17 @@ void rill_close(struct rill *db) // ingest // ----------------------------------------------------------------------------- -bool rill_ingest(struct rill *db, rill_ts_t now, rill_key_t key, rill_val_t val) +bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val) { - (void) now; + if (!key) { + fail("invalid nil key '%lu'", key); + return false; + } + + if (!val) { + fail("invalid nil value '%lu'", val); + return false; + } bool ret; { @@ -287,20 +295,21 @@ static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t } assert(!*store); - if (!rill_store_write(file, ts, quant_hour, db->dump)) return false; - if (!(*store = rill_store_open(file))) return false; + if (db->dump->len) { + if (!rill_store_write(file, ts, quant_hour, db->dump)) return false; + if (!(*store = rill_store_open(file))) return false; + } rill_pairs_reset(db->dump, db->dump->cap); - return true; } bool rill_rotate(struct rill *db, rill_ts_t now) { - if (now / quant_month != db->ts / quant_month) { - size_t quant = db->ts / quant_month; - if (!rotate_monthly(db, &db->monthly[quant % months], db->ts, db->daily, days)) { - fail("unable to complete monthly rotation '%lu'", quant); + if (now / quant_hour != db->ts / quant_hour) { + size_t quant = db->ts / quant_hour; + if (!rotate_hourly(db, &db->hourly[(now / quant_hour) % hours], db->ts)) { + fail("unable to complete hourly rotation '%lu'", quant); return false; } } @@ -313,10 +322,10 @@ bool rill_rotate(struct rill *db, rill_ts_t now) } } - if (now / quant_hour != db->ts / quant_hour) { - size_t quant = db->ts / quant_hour; - if (!rotate_hourly(db, &db->hourly[(now / quant_hour) % hours], db->ts)) { - fail("unable to complete hourly rotation '%lu'", quant); + if (now / quant_month != db->ts / quant_month) { + size_t quant = db->ts / quant_month; + if (!rotate_monthly(db, &db->monthly[quant % months], db->ts, db->daily, days)) { + fail("unable to complete monthly rotation '%lu'", quant); return false; } } diff --git a/src/rill.h b/src/rill.h index fa656de..83b02a4 100644 --- a/src/rill.h +++ b/src/rill.h @@ -22,14 +22,17 @@ typedef uint64_t rill_val_t; // kv // ----------------------------------------------------------------------------- -#define rill_packed __attribute__((__packed__)) - -struct rill_packed rill_kv +struct rill_kv { rill_key_t key; rill_val_t val; }; +static inline bool rill_kv_nil(const struct rill_kv *kv) +{ + return !kv->key && !kv->val; +} + static inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *rhs) { if (lhs->key < rhs->key) return -1; @@ -118,7 +121,7 @@ struct rill; struct rill * rill_open(const char *dir); void rill_close(struct rill *db); -bool rill_ingest(struct rill *db, rill_ts_t now, rill_key_t key, rill_val_t val); +bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val); bool rill_rotate(struct rill *db, rill_ts_t now); void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out); diff --git a/src/store.c b/src/store.c index a23a984..559bcd7 100644 --- a/src/store.c +++ b/src/store.c @@ -5,6 +5,7 @@ #include "rill.h" #include "utils.h" +#include "htable.h" #include #include @@ -20,23 +21,17 @@ // ----------------------------------------------------------------------------- -// utils +// impl // ----------------------------------------------------------------------------- -static const size_t page_len = 4096; - -static inline size_t to_vma_len(size_t len) -{ - if (!(len % page_len)) return len; - return (len & ~(page_len - 1)) + page_len; -} +#include "coder.c" // ----------------------------------------------------------------------------- // store // ----------------------------------------------------------------------------- -static const uint32_t version = 1; +static const uint32_t version = 2; static const uint32_t magic = 0x4C4C4952; struct rill_packed header @@ -44,9 +39,14 @@ struct rill_packed header uint32_t magic; uint32_t version; - uint64_t pairs; uint64_t ts; uint64_t quant; + + uint64_t keys; + uint64_t pairs; + + uint64_t vals_off; + uint64_t data_off; }; struct rill_store @@ -58,10 +58,56 @@ struct rill_store size_t vma_len; struct header *head; - struct rill_kv *data; + struct vals *vals; + uint8_t *data; + uint8_t *end; }; +// ----------------------------------------------------------------------------- +// coder +// ----------------------------------------------------------------------------- + +static struct coder store_encoder(struct rill_store *store) +{ + return make_encoder( + store->vals, + store->vma + store->head->data_off, + store->vma + store->vma_len); +} + +static struct coder store_decoder(struct rill_store *store) +{ + return make_decoder( + store->vals, + store->vma + store->head->data_off, + store->vma + store->vma_len); +} + + +// ----------------------------------------------------------------------------- +// vma +// ----------------------------------------------------------------------------- + +static inline size_t to_vma_len(size_t len) +{ + if (!(len % page_len)) return len; + return (len & ~(page_len - 1)) + page_len; +} + +static inline void vma_will_need(struct rill_store *store) +{ + if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) + fail("unable to madvise '%s'", store->file); +} + +static inline void vma_dont_need(struct rill_store *store) +{ + if (madvise(store->vma, store->vma_len, MADV_DONTNEED) == -1) + fail("unable to madvise '%s'", store->file); +} + + // ----------------------------------------------------------------------------- // reader // ----------------------------------------------------------------------------- @@ -107,7 +153,9 @@ struct rill_store *rill_store_open(const char *file) } store->head = store->vma; - store->data = (void *) ((uintptr_t) store->vma + sizeof(struct header)); + store->vals = (void *) ((uintptr_t) store->vma + store->head->vals_off); + store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); + store->end = (void *) ((uintptr_t) store->vma + store->vma_len); if (store->head->magic != magic) { fail("invalid magic '0x%x' for '%s'", store->head->magic, file); @@ -119,15 +167,8 @@ struct rill_store *rill_store_open(const char *file) goto fail_version; } - size_t expected = sizeof(struct header) + sizeof(struct rill_kv) * store->head->pairs; - if (expected != len) { - fail("invalid file size '%lu != %lu' for '%s'", len, expected, file); - goto fail_len; - } - return store; - fail_len: fail_version: fail_magic: munmap(store->vma, store->vma_len); @@ -180,7 +221,7 @@ static bool writer_open( goto fail_open; } - size_t len = sizeof(struct header) + sizeof(struct rill_kv) * cap; + size_t len = sizeof(struct header) + cap; if (ftruncate(store->fd, len) == -1) { fail_errno("unable to resize '%s'", file); goto fail_truncate; @@ -194,13 +235,15 @@ static bool writer_open( } store->head = store->vma; - store->data = (void *) ((uintptr_t) store->vma + sizeof(struct header)); + store->vals = (void *) ((uintptr_t) store->vma + sizeof(struct header)); + store->end = (void *) ((uintptr_t) store->vma + store->vma_len); *store->head = (struct header) { .magic = magic, .version = version, .ts = ts, .quant = quant, + .vals_off = sizeof(struct header), }; return true; @@ -213,45 +256,78 @@ static bool writer_open( return false; } - -static void writer_close(struct rill_store *store, size_t pairs) +static void writer_close(struct rill_store *store, size_t len) { - store->head->pairs = pairs; - munmap(store->vma, store->vma_len); - size_t len = sizeof(struct header) + sizeof(struct rill_kv) * pairs; - if (ftruncate(store->fd, len) == -1) - fail_errno("unable to resize '%s'", store->file); + if (len) { + if (ftruncate(store->fd, len) == -1) + fail_errno("unable to resize '%s'", store->file); - if (fdatasync(store->fd) == -1) - fail_errno("unable to fsync '%s'", store->file); + if (fdatasync(store->fd) == -1) + fail_errno("unable to fsync '%s'", store->file); + } + else if (unlink(store->file) == -1) + fail_errno("unable to unlink '%s'", store->file); close(store->fd); } +static struct coder writer_begin( + struct rill_store *store, const struct vals *vals) +{ + size_t len = sizeof(*vals) + sizeof(vals->data[0]) * vals->len; + assert(store->head->vals_off + len < store->vma_len); + + memcpy(store->vals, vals, len); + + store->head->data_off = store->head->vals_off + len; + store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); + + return store_encoder(store); +} + bool rill_store_write( const char *file, rill_ts_t ts, size_t quant, struct rill_pairs *pairs) { rill_pairs_compact(pairs); + if (!pairs->len) return true; + + struct vals *vals = vals_from_pairs(pairs); + if (!vals) goto fail_vals; + + size_t cap = + sizeof(struct vals) + (sizeof(vals->data[0]) * vals->len) + + (sizeof(rill_key_t) * (pairs->len + 1)) + + (coder_max_val_len * (pairs->len + 1)); struct rill_store store = {0}; - if (!writer_open(&store, file, pairs->len, ts, quant)) { + if (!writer_open(&store, file, cap, ts, quant)) { fail("unable to create '%s'", file); goto fail_open; } + struct coder coder = writer_begin(&store, vals); + for (size_t i = 0; i < pairs->len; ++i) { - store.data[i].key = pairs->data[i].key; - store.data[i].val = pairs->data[i].val; + if (!coder_encode(&coder, &pairs->data[i])) goto fail_encode; } + if (!coder_finish(&coder)) goto fail_encode; - writer_close(&store, pairs->len); + store.head->keys = coder.keys; + store.head->pairs = coder.pairs; + + writer_close(&store, (uintptr_t) coder.end - (uintptr_t) store.vma); + free(vals); return true; + fail_encode: + writer_close(&store, 0); fail_open: + free(vals); + fail_vals: return false; } @@ -263,15 +339,21 @@ bool rill_store_merge( assert(list_len > 1); size_t cap = 0; - struct { struct rill_kv *it, *end; } its[list_len]; + struct vals *vals = NULL; + struct it { + struct rill_kv kv; + struct coder decoder; + } its[list_len]; size_t it_len = 0; for (size_t i = 0; i < list_len; ++i) { if (!list[i]) continue; + vma_will_need(list[i]); + + if (!(vals = vals_merge(vals, list[i]->vals))) goto fail_vals; + its[it_len].decoder = store_decoder(list[i]); - cap += list[i]->head->pairs; - its[it_len].it = list[i]->data; - its[it_len].end = list[i]->data + list[i]->head->pairs; + cap += list[i]->vma_len; it_len++; } @@ -281,33 +363,53 @@ bool rill_store_merge( goto fail_open; } - size_t pairs = 0; - struct rill_kv *current = store.data; + struct coder encoder = writer_begin(&store, vals); + + for (size_t i = 0; i < it_len; ++i) { + if (!(coder_decode(&its[i].decoder, &its[i].kv))) goto fail_coder; + } + + struct rill_kv prev = {0}; while (it_len > 0) { size_t target = 0; for (size_t i = 1; i < it_len; ++i) { - if (rill_kv_cmp(its[i].it, its[target].it) < 0) + if (rill_kv_cmp(&its[i].kv, &its[target].kv) < 0) target = i; } - if (rill_kv_cmp(current, its[target].it) < 0) { - pairs++; - current++; - *current = *its[target].it; + struct it *it = &its[target]; + if (rill_likely(rill_kv_nil(&prev) || rill_kv_cmp(&prev, &it->kv) < 0)) { + prev = it->kv; + if (!coder_encode(&encoder, &it->kv)) goto fail_coder; } - its[target].it++; - if (its[target].it == its[target].end) { - memmove(its + target, its + target + 1, (it_len - target - 1) * sizeof(its[0])); + if (!coder_decode(&it->decoder, &it->kv)) goto fail_coder; + if (rill_unlikely(rill_kv_nil(&it->kv))) { + memmove(its + target, + its + target + 1, + (it_len - target - 1) * sizeof(its[0])); it_len--; } } - writer_close(&store, pairs); + store.head->keys = encoder.keys; + store.head->pairs = encoder.pairs; + + if (!coder_finish(&encoder)) goto fail_coder; + writer_close(&store, (uintptr_t) encoder.end - (uintptr_t) store.vma); + + for (size_t i = 0; i < list_len; ++i) { + if (list[i]) vma_dont_need(list[i]); + } + return true; + fail_coder: + writer_close(&store, 0); fail_open: + free(vals); + fail_vals: return false; } @@ -331,18 +433,6 @@ size_t rill_store_quant(struct rill_store *store) return store->head->quant; } -static inline void vma_will_need(struct rill_store *store) -{ - if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) - fail("unable to madvise '%s'", store->file); -} - -static inline void vma_dont_need(struct rill_store *store) -{ - if (madvise(store->vma, store->vma_len, MADV_DONTNEED) == -1) - fail("unable to madvise '%s'", store->file); -} - bool rill_store_scan_key( struct rill_store *store, const rill_key_t *keys, size_t len, @@ -350,17 +440,25 @@ bool rill_store_scan_key( { vma_will_need(store); - for (size_t i = 0; i < store->head->pairs; ++i) { - struct rill_kv *kv = &store->data[i]; + struct rill_kv kv = {0}; + struct coder coder = store_decoder(store); + + while (true) { + if (!coder_decode(&coder, &kv)) goto fail; + if (rill_kv_nil(&kv)) break; for (size_t j = 0; j < len; ++j) { - if (kv->key != keys[j]) continue; - if (!rill_pairs_push(out, kv->key, kv->val)) return false; + if (kv.key != keys[j]) continue; + if (!rill_pairs_push(out, kv.key, kv.val)) goto fail; } } vma_dont_need(store); return true; + + fail: + vma_dont_need(store); + return false; } bool rill_store_scan_val( @@ -370,47 +468,63 @@ bool rill_store_scan_val( { vma_will_need(store); + struct rill_kv kv = {0}; + struct coder coder = store_decoder(store); + for (size_t i = 0; i < store->head->pairs; ++i) { - struct rill_kv *kv = &store->data[i]; + if (!coder_decode(&coder, &kv)) goto fail; + if (rill_kv_nil(&kv)) break; for (size_t j = 0; j < len; ++j) { - if (kv->val != vals[j]) continue; - if (!rill_pairs_push(out, kv->key, kv->val)) return false; + if (kv.val != vals[j]) continue; + if (!rill_pairs_push(out, kv.key, kv.val)) goto fail; } } vma_dont_need(store); return true; + + fail: + vma_dont_need(store); + return false; } void rill_store_print_head(struct rill_store *store) { + fprintf(stderr, "%s\n", store->file); fprintf(stderr, "magic: 0x%x\n", store->head->magic); fprintf(stderr, "version: %u\n", store->head->version); - fprintf(stderr, "pairs: %lu\n", store->head->pairs); fprintf(stderr, "ts: %lu\n", store->head->ts); fprintf(stderr, "quant: %lu\n", store->head->quant); + fprintf(stderr, "keys: %lu\n", store->head->keys); + fprintf(stderr, "vals: %lu\n", store->vals->len); + fprintf(stderr, "pairs: %lu\n", store->head->pairs); } void rill_store_print(struct rill_store *store) { vma_will_need(store); + struct rill_kv kv = {0}; + struct coder coder = store_decoder(store); + const rill_key_t no_key = -1ULL; rill_key_t key = no_key; for (size_t i = 0; i < store->head->pairs; ++i) { - struct rill_kv *kv = &store->data[i]; + if (!coder_decode(&coder, &kv)) goto fail; + if (rill_kv_nil(&kv)) break; - if (kv->key == key) fprintf(stderr, ", %lu", kv->val); + if (kv.key == key) fprintf(stderr, ", %lu", kv.val); else { if (key != no_key) fprintf(stderr, "]\n"); - fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); - key = kv->key; + fprintf(stderr, "%p: [ %lu", (void *) kv.key, kv.val); + key = kv.key; } } fprintf(stderr, " ]\n"); + fail: vma_dont_need(store); } diff --git a/src/utils.h b/src/utils.h index 411f60d..d2d8bdb 100644 --- a/src/utils.h +++ b/src/utils.h @@ -6,24 +6,40 @@ #pragma once +#include #include +#include #include -#include #include // ----------------------------------------------------------------------------- -// err +// attributes +// ----------------------------------------------------------------------------- + +#define rill_packed __attribute__((__packed__)) +#define rill_likely(x) __builtin_expect(x, 1) +#define rill_unlikely(x) __builtin_expect(x, 0) + + // ----------------------------------------------------------------------------- +// misc +// ----------------------------------------------------------------------------- + +enum { page_len_s = 4096 }; +static const size_t page_len = page_len_s; +// ----------------------------------------------------------------------------- +// err +// ----------------------------------------------------------------------------- + #define fail(fmt, ...) \ fprintf(stderr, "[fail] "fmt"\n", __VA_ARGS__) #define fail_errno(fmt, ...) \ fprintf(stderr, "[fail] "fmt"(%d): %s\n", __VA_ARGS__, errno, strerror(errno)) - // ----------------------------------------------------------------------------- // lock // ----------------------------------------------------------------------------- diff --git a/test/coder_test.c b/test/coder_test.c new file mode 100644 index 0000000..1c21cf0 --- /dev/null +++ b/test/coder_test.c @@ -0,0 +1,224 @@ +/* coder_test.c + Rémi Attab (remi.attab@gmail.com), 11 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "test.h" + +#include "coder.c" + + +// ----------------------------------------------------------------------------- +// leb128 +// ----------------------------------------------------------------------------- + +static void check_leb128(uint64_t val) +{ + uint8_t data[10] = {0}; + + { + uint8_t *it = leb128_encode(data, val); + size_t len = (uintptr_t) it - (uintptr_t) data; + if (val < (1UL << 7)) assert(len == 1); + else if (val < (1UL << 14)) assert(len == 2); + else if (val < (1UL << 21)) assert(len == 3); + else if (val < (1UL << 28)) assert(len == 4); + else if (val < (1UL << 35)) assert(len == 5); + else if (val < (1UL << 42)) assert(len == 6); + else if (val < (1UL << 49)) assert(len == 7); + else if (val < (1UL << 56)) assert(len == 8); + else if (val < (1UL << 63)) assert(len == 9); + else assert(len == 10); + } + + { + uint8_t *it = data; + uint64_t result = 0; + assert(leb128_decode(&it, it + sizeof(data), &result)); + assert(val == result); + } +} + +bool test_leb128(void) +{ + check_leb128(0); + for (size_t i = 0; i < 64; ++i) check_leb128(1UL << i); + + struct rng rng = rng_make(0); + for (size_t i = 0; i < 64; ++i) { + for (size_t j = 0; j < 100; ++j) + check_leb128(rng_gen_range(&rng, 0, 1UL << i)); + } + + return true; +} + + +// ----------------------------------------------------------------------------- +// vals +// ----------------------------------------------------------------------------- + +#define make_vals(...) \ + ({ \ + rill_val_t vals[] = { __VA_ARGS__ }; \ + make_vals_impl(vals, sizeof(vals) / sizeof(vals[0])); \ + }) + +static struct vals *make_vals_impl(rill_val_t *list, size_t len) +{ + struct vals *vals = calloc(1, sizeof(struct vals) + sizeof(list[0]) * len); + + vals->len = len; + for (size_t i = 0; i < len; ++i) vals->data[i] = list[i]; + + vals_compact(vals); + return vals; +} + +static void check_vals(struct rill_pairs pairs, struct vals *exp) +{ + struct vals *vals = vals_from_pairs(&pairs); + + assert(vals->len == exp->len); + for (size_t i = 0; i < exp->len; ++i) + assert(vals->data[i] == exp->data[i]); + + vals_rev_t rev = {0}; + vals_rev_make(vals, &rev); + + for (size_t i = 0; i < exp->len; ++i) { + size_t index = vals_vtoi(&rev, exp->data[i]); + assert(vals_itov(vals, index) == exp->data[i]); + } + + free(vals); + free(exp); +} + +static void check_vals_merge(struct vals *a, struct vals *b, struct vals *exp) +{ + struct vals *result = vals_merge(a, b); + + assert(result->len == exp->len); + for (size_t i = 0; i < exp->len; ++i) + assert(result->data[i] == exp->data[i]); + + free(a); + free(b); +} + +bool test_vals(void) +{ + check_vals(make_pair(kv(1, 10)), make_vals(10)); + + check_vals(make_pair(kv(1, 10), kv(1, 10)), make_vals(10)); + check_vals(make_pair(kv(1, 10), kv(2, 10)), make_vals(10)); + + check_vals(make_pair(kv(1, 10), kv(1, 20)), make_vals(10, 20)); + check_vals(make_pair(kv(1, 10), kv(2, 20)), make_vals(10, 20)); + + check_vals(make_pair(kv(2, 20), kv(1, 10)), make_vals(10, 20)); + check_vals(make_pair(kv(1, 20), kv(1, 10)), make_vals(10, 20)); + + check_vals_merge(make_vals(10), make_vals(10), make_vals(10)); + check_vals_merge(make_vals(10), make_vals(20), make_vals(10, 20)); + + check_vals_merge(make_vals(10, 20), make_vals(20), make_vals(10, 20)); + check_vals_merge(make_vals(10, 20), make_vals(20, 30), make_vals(10, 20, 30)); + + return true; +} + + +// ----------------------------------------------------------------------------- +// coder +// ----------------------------------------------------------------------------- + +void check_coder(struct rill_pairs pairs) +{ + rill_pairs_compact(&pairs); + + size_t cap = (pairs.len + 1) * (sizeof(pairs.data[0]) + 3); + uint8_t *buffer = calloc(1, cap); + struct vals *vals = vals_from_pairs(&pairs); + + size_t len = 0; + { + struct coder coder = make_encoder(vals, buffer, buffer + cap); + for (size_t i = 0; i < pairs.len; ++i) + assert(coder_encode(&coder, &pairs.data[i])); + assert(coder_finish(&coder)); + + len = coder.it - buffer; + assert(len <= cap); + } + + /* printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); */ + /* for (size_t i = 0; i < cap;) { */ + /* printf("%6p: ", (void *) i); */ + /* for (size_t j = 0; j < 16 && i < cap; ++i, ++j) { */ + /* if (j % 2 == 0) printf(" "); */ + /* printf("%02x", buffer[i]); */ + /* } */ + /* printf("\n"); */ + /* } */ + + { + struct coder coder = make_decoder(vals, buffer, buffer + len); + + struct rill_kv kv = {0}; + for (size_t i = 0; i < pairs.len; ++i) { + assert(coder_decode(&coder, &kv)); + assert(rill_kv_cmp(&kv, &pairs.data[i]) == 0); + } + + assert(coder_decode(&coder, &kv)); + assert(rill_kv_nil(&kv)); + } + + free(vals); +} + + +bool test_coder(void) +{ + check_coder(make_pair(kv(1, 10))); + check_coder(make_pair(kv(1, 10), kv(1, 20))); + check_coder(make_pair(kv(1, 10), kv(2, 20))); + check_coder(make_pair(kv(1, 10), kv(1, 20), kv(2, 30))); + check_coder(make_pair(kv(1, 10), kv(1, 20), kv(2, 10))); + + struct rng rng = rng_make(0); + for (size_t iterations = 0; iterations < 100; ++iterations) { + + struct rill_pairs pairs = {0}; + for (size_t i = 0; i < 1000; ++i) { + uint64_t key = rng_gen_range(&rng, 1, 500); + uint64_t val = rng_gen_range(&rng, 1, 100); + rill_pairs_push(&pairs, key, val); + } + + check_coder(pairs); + rill_pairs_free(&pairs); + } + + return true; +} + + + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + bool ret = true; + + ret = ret && test_leb128(); + ret = ret && test_vals(); + ret = ret && test_coder(); + + return ret ? 0 : 1; +} diff --git a/test/test.h b/test/test.h new file mode 100644 index 0000000..e5712e1 --- /dev/null +++ b/test/test.h @@ -0,0 +1,40 @@ +/* test.h + Rémi Attab (remi.attab@gmail.com), 11 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#pragma once + +#include "rill.h" +#include "utils.h" +#include "htable.h" +#include "rng.h" + +#include +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// utils +// ----------------------------------------------------------------------------- + +struct rill_kv kv(rill_key_t key, rill_val_t val) +{ + return (struct rill_kv) { .key = key, .val = val }; +} + +#define make_pair(...) \ + ({ \ + struct rill_kv kvs[] = { __VA_ARGS__ }; \ + make_pair_impl(kvs, sizeof(kvs) / sizeof(kvs[0])); \ + }) + +struct rill_pairs make_pair_impl(const struct rill_kv *kv, size_t len) +{ + struct rill_pairs pairs = {0}; + for (size_t i = 0; i < len; ++i) + rill_pairs_push(&pairs, kv[i].key, kv[i].val); + return pairs; +} From fbb1a1e26dd6b027321c6ced51949dd45e0bc652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Wed, 13 Sep 2017 16:33:05 -0400 Subject: [PATCH 08/91] Changed stuff --- compile.sh | 4 +- src/load.c | 2 +- src/pairs.c | 76 ++++++++++++++++++++++++------------- src/rill.c | 96 +++++++++++++++++++++++++++++------------------ src/rill.h | 33 ++++++++++------ src/store.c | 29 +++++++++----- test/coder_test.c | 47 ++++++++++++----------- test/rill_test.c | 75 ++++++++++++++++++++++++++++++++++++ test/test.h | 51 +++++++++++++++++++++++-- 9 files changed, 299 insertions(+), 114 deletions(-) create mode 100644 test/rill_test.c diff --git a/compile.sh b/compile.sh index 8efe9ed..75b25d9 100755 --- a/compile.sh +++ b/compile.sh @@ -26,5 +26,5 @@ gcc -o rill_load "${PREFIX}/src/load.c" librill.a $CFLAGS gcc -o rill_query "${PREFIX}/src/query.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/dump.c" librill.a $CFLAGS -gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS -./test_coder +gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder +gcc -o test_rill "${PREFIX}/test/rill_test.c" librill.a $CFLAGS && ./test_rill diff --git a/src/load.c b/src/load.c index d15f644..990a14a 100644 --- a/src/load.c +++ b/src/load.c @@ -49,7 +49,7 @@ int main(int argc, char **argv) if (!db) return 1; enum { - keys_per_sec = 1 * 1000, + keys_per_sec = 200, seconds = 3 * 31 * 24 * 60 * 60, keys_range = 1 * 1000 * 1000 * 1000, diff --git a/src/pairs.c b/src/pairs.c index 5470df4..6299465 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -4,6 +4,7 @@ */ #include "rill.h" +#include "utils.h" #include #include @@ -14,41 +15,52 @@ // pairs // ----------------------------------------------------------------------------- -void rill_pairs_free(struct rill_pairs *pairs) +static size_t adjust_cap(size_t cap, size_t len) { - free(pairs->data); + while (len > cap) cap *= 2; + return cap; } -static bool resize(struct rill_pairs *pairs, size_t len) +struct rill_pairs *rill_pairs_new(size_t cap) { - if (len <= pairs->cap) return true; - - size_t cap = pairs->cap ? pairs->cap : 16; - while (cap < len) cap *= 2; + cap = adjust_cap(1, cap); - void *ret = realloc(pairs->data, cap * sizeof(*pairs->data)); - if (!ret) return false; + struct rill_pairs *pairs = + calloc(1, sizeof(*pairs) + cap * sizeof(pairs->data[0])); + if (!pairs) return NULL; - pairs->data = ret; pairs->cap = cap; + return pairs; +} + - return true; +void rill_pairs_free(struct rill_pairs *pairs) +{ + free(pairs); } -bool rill_pairs_reset(struct rill_pairs *pairs, size_t cap) + +void rill_pairs_clear(struct rill_pairs *pairs) { pairs->len = 0; - return resize(pairs, cap); } -bool rill_pairs_push(struct rill_pairs *pairs, rill_key_t key, rill_val_t val) +struct rill_pairs *rill_pairs_push( + struct rill_pairs *pairs, rill_key_t key, rill_val_t val) { - if (!resize(pairs, pairs->len + 1)) return false; + if (rill_unlikely(pairs->len + 1 > pairs->cap)) { + size_t cap = adjust_cap(pairs->cap, pairs->len + 1); + + pairs = realloc(pairs, sizeof(*pairs) + cap * sizeof(pairs->data[0])); + if (!pairs) return NULL; + + pairs->cap = cap; + } pairs->data[pairs->len] = (struct rill_kv) { .key = key, .val = val }; pairs->len++; - return true; + return pairs; } static int kv_cmp(const void *lhs, const void *rhs) @@ -72,38 +84,46 @@ void rill_pairs_compact(struct rill_pairs *pairs) pairs->len = j + 1; } -bool rill_pairs_scan_key( +struct rill_pairs *rill_pairs_scan_key( const struct rill_pairs *pairs, const rill_key_t *keys, size_t len, struct rill_pairs *out) { + struct rill_pairs *result = out; + for (size_t i = 0; i < pairs->len; ++i) { - struct rill_kv *kv = &pairs->data[i]; + const struct rill_kv *kv = &pairs->data[i]; for (size_t j = 0; j < len; ++j) { if (kv->key != keys[j]) continue; - if (!rill_pairs_push(out, kv->key, kv->val)) return false; + + result = rill_pairs_push(result, kv->key, kv->val); + if (!result) return NULL; } } - return true; + return result; } -bool rill_pairs_scan_val( +struct rill_pairs *rill_pairs_scan_val( const struct rill_pairs *pairs, const rill_val_t *vals, size_t len, struct rill_pairs *out) { + struct rill_pairs *result = out; + for (size_t i = 0; i < pairs->len; ++i) { - struct rill_kv *kv = &pairs->data[i]; + const struct rill_kv *kv = &pairs->data[i]; for (size_t j = 0; j < len; ++j) { if (kv->val != vals[j]) continue; - if (!rill_pairs_push(out, kv->key, kv->val)) return false; + + result = rill_pairs_push(result, kv->key, kv->val); + if (!result) return NULL; } } - return true; + return result; } void rill_pairs_print(const struct rill_pairs *pairs) @@ -111,16 +131,18 @@ void rill_pairs_print(const struct rill_pairs *pairs) const rill_key_t no_key = -1ULL; rill_key_t key = no_key; + printf("pairs(%p, %lu, %lu):\n", (void *) pairs, pairs->len, pairs->cap); + for (size_t i = 0; i < pairs->len; ++i) { - struct rill_kv *kv = &pairs->data[i]; + const struct rill_kv *kv = &pairs->data[i]; if (kv->key == key) fprintf(stderr, ", %lu", kv->val); else { if (key != no_key) fprintf(stderr, "]\n"); - fprintf(stderr, "%p: [ %lu", (void *) kv->key, kv->val); + fprintf(stderr, " %p: [ %lu", (void *) kv->key, kv->val); key = kv->key; } } - fprintf(stderr, " ]\n"); + if (pairs->len) fprintf(stderr, " ]\n"); } diff --git a/src/rill.c b/src/rill.c index 4fe4069..d6b98a1 100644 --- a/src/rill.c +++ b/src/rill.c @@ -15,6 +15,14 @@ #include +// ----------------------------------------------------------------------------- +// kv +// ----------------------------------------------------------------------------- + +extern inline bool rill_kv_nil(const struct rill_kv *); +extern inline int rill_kv_cmp(const struct rill_kv *, const struct rill_kv *); + + // ----------------------------------------------------------------------------- // config // ----------------------------------------------------------------------------- @@ -95,26 +103,14 @@ struct rill * rill_open(const char *dir) goto fail_alloc_dir; } - db->acc = calloc(1, sizeof(*db->acc)); - if (!db->acc) { - fail("unable to allocate memory for '%s'", dir); - goto fail_alloc_acc; - } - - db->dump = calloc(1, sizeof(*db->dump)); - if (!db->dump) { - fail("unable to allocate memory for '%s'", dir); - goto fail_alloc_dump; - } - - if (!rill_pairs_reset(db->acc, 1 *1000 * 1000)) { + if (!(db->acc = rill_pairs_new(1 * 1000 * 1000))) { fail("unable to allocate pairs for '%s'", dir); - goto fail_pairs; + goto fail_alloc_acc; } - if (!rill_pairs_reset(db->dump, 1 *1000 * 1000)) { + if (!(db->dump = rill_pairs_new(1 * 1000 * 1000))) { fail("unable to allocate pairs for '%s'", dir); - goto fail_pairs; + goto fail_alloc_dump; } if (mkdir(dir, 0775) == -1 && errno != EEXIST) { @@ -150,12 +146,9 @@ struct rill * rill_open(const char *dir) closedir(dir_handle); fail_dir: fail_mkdir: - fail_pairs: rill_pairs_free(db->dump); - free(db->dump); fail_alloc_dump: rill_pairs_free(db->acc); - free(db->acc); fail_alloc_acc: free((char *) db->dir); fail_alloc_dir: @@ -178,12 +171,10 @@ void rill_close(struct rill *db) if (db->monthly[i]) rill_store_close(db->monthly[i]); } - rill_pairs_free(db->acc); - rill_pairs_free(db->dump); free((char *) db->dir); - free(db->dump); - free(db->acc); + rill_pairs_free(db->acc); + rill_pairs_free(db->dump); free(db); } @@ -204,16 +195,17 @@ bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val) return false; } - bool ret; + struct rill_pairs *result; { lock(&db->lock); - ret = rill_pairs_push(db->acc, key, val); + result = rill_pairs_push(db->acc, key, val); unlock(&db->lock); } - return ret; + if (result) db->acc = result; + return result != NULL; } @@ -240,6 +232,11 @@ static bool rotate_monthly( (void) rill_store_rm(*store); *store = NULL; } + + bool all_null = true; + for (size_t i = 0; i < len; ++i) all_null = all_null && !list[i]; + if (all_null) return true; + if (!rill_store_merge(file, ts, quant_month, list, len)) return false; if (!(*store = rill_store_open(file))) return false; @@ -264,6 +261,11 @@ static bool rotate_daily( (ts / quant_day) % days); assert(!*store); + + bool all_null = true; + for (size_t i = 0; i < len; ++i) all_null = all_null && !list[i]; + if (all_null) return true; + if (!rill_store_merge(file, ts, quant_day, list, len)) return false; if (!(*store = rill_store_open(file))) return false; @@ -300,7 +302,7 @@ static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t if (!(*store = rill_store_open(file))) return false; } - rill_pairs_reset(db->dump, db->dump->cap); + rill_pairs_clear(db->dump); return true; } @@ -339,42 +341,62 @@ bool rill_rotate(struct rill *db, rill_ts_t now) // query // ----------------------------------------------------------------------------- -void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out) +struct rill_pairs *rill_query_key( + struct rill *db, + const rill_key_t *keys, size_t len, + struct rill_pairs *out) { + struct rill_pairs *result = out; + if (!len) return result; + for (size_t i = 0; i < hours; ++i) { if (!db->hourly[i]) continue; - rill_store_scan_key(db->hourly[i], keys, len, out); + result = rill_store_scan_key(db->hourly[i], keys, len, result); + if (!result) return NULL; } for (size_t i = 0; i < days; ++i) { if (!db->daily[i]) continue; - rill_store_scan_key(db->daily[i], keys, len, out); + result = rill_store_scan_key(db->daily[i], keys, len, result); + if (!result) return NULL; } for (size_t i = 0; i < months; ++i) { if (!db->monthly[i]) continue; - rill_store_scan_key(db->monthly[i], keys, len, out); + result = rill_store_scan_key(db->monthly[i], keys, len, result); + if (!result) return NULL; } - rill_pairs_compact(out); + rill_pairs_compact(result); + return result; } -void rill_query_val(struct rill *db, rill_val_t *vals, size_t len, struct rill_pairs *out) +struct rill_pairs *rill_query_val( + struct rill *db, + const rill_val_t *vals, size_t len, + struct rill_pairs *out) { + struct rill_pairs *result = out; + if (!len) return result; + for (size_t i = 0; i < hours; ++i) { if (!db->hourly[i]) continue; - rill_store_scan_val(db->hourly[i], vals, len, out); + result = rill_store_scan_val(db->hourly[i], vals, len, result); + if (!result) return result; } for (size_t i = 0; i < days; ++i) { if (!db->daily[i]) continue; - rill_store_scan_val(db->daily[i], vals, len, out); + result = rill_store_scan_val(db->daily[i], vals, len, result); + if (!result) return result; } for (size_t i = 0; i < months; ++i) { if (!db->monthly[i]) continue; - rill_store_scan_val(db->monthly[i], vals, len, out); + result = rill_store_scan_val(db->monthly[i], vals, len, result); + if (!result) return result; } - rill_pairs_compact(out); + rill_pairs_compact(result); + return result; } diff --git a/src/rill.h b/src/rill.h index 83b02a4..1294bbc 100644 --- a/src/rill.h +++ b/src/rill.h @@ -28,12 +28,12 @@ struct rill_kv rill_val_t val; }; -static inline bool rill_kv_nil(const struct rill_kv *kv) +inline bool rill_kv_nil(const struct rill_kv *kv) { return !kv->key && !kv->val; } -static inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *rhs) +inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *rhs) { if (lhs->key < rhs->key) return -1; if (lhs->key > rhs->key) return +1; @@ -52,20 +52,24 @@ static inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *r struct rill_pairs { size_t len, cap; - struct rill_kv *data; + struct rill_kv data[]; }; +struct rill_pairs *rill_pairs_new(size_t cap); void rill_pairs_free(struct rill_pairs *pairs); -bool rill_pairs_reset(struct rill_pairs *pairs, size_t cap); -bool rill_pairs_push(struct rill_pairs *pairs, rill_key_t key, rill_val_t val); +void rill_pairs_clear(struct rill_pairs *pairs); + +struct rill_pairs *rill_pairs_push( + struct rill_pairs *pairs, rill_key_t key, rill_val_t val); + void rill_pairs_compact(struct rill_pairs *pairs); -bool rill_pairs_scan_key( +struct rill_pairs *rill_pairs_scan_key( const struct rill_pairs *pairs, const rill_key_t *keys, size_t len, struct rill_pairs *out); -bool rill_pairs_scan_val( +struct rill_pairs *rill_pairs_scan_val( const struct rill_pairs *pairs, const rill_val_t *vals, size_t len, struct rill_pairs *out); @@ -99,11 +103,11 @@ const char * rill_store_file(struct rill_store *store); rill_ts_t rill_store_ts(struct rill_store *store); size_t rill_store_quant(struct rill_store *store); -bool rill_store_scan_key( +struct rill_pairs *rill_store_scan_key( struct rill_store *store, const rill_key_t *keys, size_t len, struct rill_pairs *out); -bool rill_store_scan_val( +struct rill_pairs *rill_store_scan_val( struct rill_store *store, const rill_val_t *vals, size_t len, struct rill_pairs *out); @@ -124,5 +128,12 @@ void rill_close(struct rill *db); bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val); bool rill_rotate(struct rill *db, rill_ts_t now); -void rill_query_key(struct rill *db, rill_key_t *keys, size_t len, struct rill_pairs *out); -void rill_query_val(struct rill *db, rill_val_t *vals, size_t len, struct rill_pairs *out); +struct rill_pairs *rill_query_key( + struct rill *db, + const rill_key_t *keys, size_t len, + struct rill_pairs *out); + +struct rill_pairs *rill_query_val( + struct rill *db, + const rill_val_t *vals, size_t len, + struct rill_pairs *out); diff --git a/src/store.c b/src/store.c index 559bcd7..8de17f0 100644 --- a/src/store.c +++ b/src/store.c @@ -319,7 +319,7 @@ bool rill_store_write( store.head->keys = coder.keys; store.head->pairs = coder.pairs; - writer_close(&store, (uintptr_t) coder.end - (uintptr_t) store.vma); + writer_close(&store, (uintptr_t) coder.it - (uintptr_t) store.vma); free(vals); return true; @@ -357,6 +357,8 @@ bool rill_store_merge( it_len++; } + assert(it_len); + struct rill_store store = {0}; if (!writer_open(&store, file, cap, ts, quant)) { fail("unable to create '%s'", file); @@ -397,12 +399,13 @@ bool rill_store_merge( store.head->pairs = encoder.pairs; if (!coder_finish(&encoder)) goto fail_coder; - writer_close(&store, (uintptr_t) encoder.end - (uintptr_t) store.vma); + writer_close(&store, (uintptr_t) encoder.it - (uintptr_t) store.vma); for (size_t i = 0; i < list_len; ++i) { if (list[i]) vma_dont_need(list[i]); } + free(vals); return true; fail_coder: @@ -433,7 +436,7 @@ size_t rill_store_quant(struct rill_store *store) return store->head->quant; } -bool rill_store_scan_key( +struct rill_pairs *rill_store_scan_key( struct rill_store *store, const rill_key_t *keys, size_t len, struct rill_pairs *out) @@ -441,6 +444,7 @@ bool rill_store_scan_key( vma_will_need(store); struct rill_kv kv = {0}; + struct rill_pairs *result = out; struct coder coder = store_decoder(store); while (true) { @@ -449,19 +453,21 @@ bool rill_store_scan_key( for (size_t j = 0; j < len; ++j) { if (kv.key != keys[j]) continue; - if (!rill_pairs_push(out, kv.key, kv.val)) goto fail; + + result = rill_pairs_push(result, kv.key, kv.val); + if (!result) return NULL; } } vma_dont_need(store); - return true; + return result; fail: vma_dont_need(store); - return false; + return NULL; } -bool rill_store_scan_val( +struct rill_pairs *rill_store_scan_val( struct rill_store *store, const rill_val_t *vals, size_t len, struct rill_pairs *out) @@ -469,6 +475,7 @@ bool rill_store_scan_val( vma_will_need(store); struct rill_kv kv = {0}; + struct rill_pairs *result = out; struct coder coder = store_decoder(store); for (size_t i = 0; i < store->head->pairs; ++i) { @@ -477,16 +484,18 @@ bool rill_store_scan_val( for (size_t j = 0; j < len; ++j) { if (kv.val != vals[j]) continue; - if (!rill_pairs_push(out, kv.key, kv.val)) goto fail; + + result = rill_pairs_push(result, kv.key, kv.val); + if (!result) return NULL; } } vma_dont_need(store); - return true; + return result; fail: vma_dont_need(store); - return false; + return NULL; } void rill_store_print_head(struct rill_store *store) diff --git a/test/coder_test.c b/test/coder_test.c index 1c21cf0..4319f75 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -75,9 +75,9 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) return vals; } -static void check_vals(struct rill_pairs pairs, struct vals *exp) +static void check_vals(struct rill_pairs *pairs, struct vals *exp) { - struct vals *vals = vals_from_pairs(&pairs); + struct vals *vals = vals_from_pairs(pairs); assert(vals->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -93,6 +93,7 @@ static void check_vals(struct rill_pairs pairs, struct vals *exp) free(vals); free(exp); + free(pairs); } static void check_vals_merge(struct vals *a, struct vals *b, struct vals *exp) @@ -134,42 +135,44 @@ bool test_vals(void) // coder // ----------------------------------------------------------------------------- -void check_coder(struct rill_pairs pairs) +void check_coder(struct rill_pairs *pairs) { - rill_pairs_compact(&pairs); + rill_pairs_compact(pairs); - size_t cap = (pairs.len + 1) * (sizeof(pairs.data[0]) + 3); + size_t cap = (pairs->len + 1) * (sizeof(pairs->data[0]) + 3); uint8_t *buffer = calloc(1, cap); - struct vals *vals = vals_from_pairs(&pairs); + struct vals *vals = vals_from_pairs(pairs); size_t len = 0; { struct coder coder = make_encoder(vals, buffer, buffer + cap); - for (size_t i = 0; i < pairs.len; ++i) - assert(coder_encode(&coder, &pairs.data[i])); + for (size_t i = 0; i < pairs->len; ++i) + assert(coder_encode(&coder, &pairs->data[i])); assert(coder_finish(&coder)); len = coder.it - buffer; assert(len <= cap); } - /* printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); */ - /* for (size_t i = 0; i < cap;) { */ - /* printf("%6p: ", (void *) i); */ - /* for (size_t j = 0; j < 16 && i < cap; ++i, ++j) { */ - /* if (j % 2 == 0) printf(" "); */ - /* printf("%02x", buffer[i]); */ - /* } */ - /* printf("\n"); */ - /* } */ + if (false) { // hex dump for debuging + printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); + for (size_t i = 0; i < cap;) { + printf("%6p: ", (void *) i); + for (size_t j = 0; j < 16 && i < cap; ++i, ++j) { + if (j % 2 == 0) printf(" "); + printf("%02x", buffer[i]); + } + printf("\n"); + } + } { struct coder coder = make_decoder(vals, buffer, buffer + len); struct rill_kv kv = {0}; - for (size_t i = 0; i < pairs.len; ++i) { + for (size_t i = 0; i < pairs->len; ++i) { assert(coder_decode(&coder, &kv)); - assert(rill_kv_cmp(&kv, &pairs.data[i]) == 0); + assert(rill_kv_cmp(&kv, &pairs->data[i]) == 0); } assert(coder_decode(&coder, &kv)); @@ -177,6 +180,7 @@ void check_coder(struct rill_pairs pairs) } free(vals); + free(pairs); } @@ -191,15 +195,14 @@ bool test_coder(void) struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 100; ++iterations) { - struct rill_pairs pairs = {0}; + struct rill_pairs *pairs = rill_pairs_new(1000); for (size_t i = 0; i < 1000; ++i) { uint64_t key = rng_gen_range(&rng, 1, 500); uint64_t val = rng_gen_range(&rng, 1, 100); - rill_pairs_push(&pairs, key, val); + pairs = rill_pairs_push(pairs, key, val); } check_coder(pairs); - rill_pairs_free(&pairs); } return true; diff --git a/test/rill_test.c b/test/rill_test.c new file mode 100644 index 0000000..7d2f1e8 --- /dev/null +++ b/test/rill_test.c @@ -0,0 +1,75 @@ +/* rill_test.c + Rémi Attab (remi.attab@gmail.com), 13 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "test.h" + + +// ----------------------------------------------------------------------------- +// rotate +// ----------------------------------------------------------------------------- + +bool test_rotate(void) +{ + const char *dir = "test.rotate.db"; + + rm(dir); + struct rill *db = rill_open(dir); + + const uint64_t key = 1; + + for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { + rill_ingest(db, key, ts + 1); + rill_rotate(db, ts); + } + rill_rotate(db, 13 * month); + + { + struct rill_pairs *pairs = rill_query_key(db, &key, 1, rill_pairs_new(1)); + + size_t i = 0; + for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { + assert(pairs->data[i].key == key); + assert(pairs->data[i].val == ts + 1); + ++i; + } + + rill_pairs_free(pairs); + } + + // \todo this is bad and doesn't properly expire things. + for (size_t i = 1; i <= 6; ++i) + rill_rotate(db, (13 + i) * month); + + { + struct rill_pairs *pairs = rill_query_key(db, &key, 1, rill_pairs_new(1)); + + for (size_t i = 0; i < pairs->len; ++i) { + assert(pairs->data[i].key == key); + assert(pairs->data[i].val >= (5 * month) + 1); + } + + rill_pairs_free(pairs); + } + + rill_close(db); + rm(dir); + + return true; +} + + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + bool ret = true; + + ret = ret && test_rotate(); + + return ret ? 0 : 1; +} diff --git a/test/test.h b/test/test.h index e5712e1..04d5249 100644 --- a/test/test.h +++ b/test/test.h @@ -15,9 +15,27 @@ #include #include +#include +#include +#include + // ----------------------------------------------------------------------------- -// utils +// time +// ----------------------------------------------------------------------------- + +enum +{ + sec = 1, + min = 60 * sec, + hour = 60 * min, + day = 24 * hour, + month = 31 * day, +}; + + +// ----------------------------------------------------------------------------- +// pairs // ----------------------------------------------------------------------------- struct rill_kv kv(rill_key_t key, rill_val_t val) @@ -31,10 +49,35 @@ struct rill_kv kv(rill_key_t key, rill_val_t val) make_pair_impl(kvs, sizeof(kvs) / sizeof(kvs[0])); \ }) -struct rill_pairs make_pair_impl(const struct rill_kv *kv, size_t len) +struct rill_pairs *make_pair_impl(const struct rill_kv *kv, size_t len) { - struct rill_pairs pairs = {0}; + struct rill_pairs *pairs = rill_pairs_new(len); for (size_t i = 0; i < len; ++i) - rill_pairs_push(&pairs, kv[i].key, kv[i].val); + pairs = rill_pairs_push(pairs, kv[i].key, kv[i].val); return pairs; } + + +// ----------------------------------------------------------------------------- +// rm +// ----------------------------------------------------------------------------- + +void rm(const char *path) +{ + DIR *dir = opendir(path); + if (!dir) return; + + struct dirent stream, *entry; + while (true) { + if (readdir_r(dir, &stream, &entry) == -1) abort(); + else if (!entry) break; + else if (entry->d_type != DT_REG) continue; + + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%s", path, entry->d_name); + unlink(file); + } + + closedir(dir); + rmdir(path); +} From e241731faae2f666ce8bc704b78e1e699f92d5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 15 Sep 2017 10:44:04 -0400 Subject: [PATCH 09/91] Fix hourly rotation collision --- src/rill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rill.c b/src/rill.c index d6b98a1..ae6d233 100644 --- a/src/rill.c +++ b/src/rill.c @@ -310,7 +310,7 @@ bool rill_rotate(struct rill *db, rill_ts_t now) { if (now / quant_hour != db->ts / quant_hour) { size_t quant = db->ts / quant_hour; - if (!rotate_hourly(db, &db->hourly[(now / quant_hour) % hours], db->ts)) { + if (!rotate_hourly(db, &db->hourly[quant % hours], db->ts)) { fail("unable to complete hourly rotation '%lu'", quant); return false; } From bc86a1daead608a1ec91b859ef5a2443c98f3a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 16 Sep 2017 21:53:03 -0400 Subject: [PATCH 10/91] Add acc and rotate --- compile.sh | 5 +- src/acc.c | 213 +++++++++++++++ src/pairs.c | 8 + src/query.c | 123 +++++++-- src/rill.c | 402 ---------------------------- src/rill.h | 41 ++- src/{dump.c => rill_dump.c} | 0 src/{load.c => rill_load.c} | 20 +- src/rotate.c | 164 ++++++++++++ src/store.c | 14 +- src/utils.h | 28 ++ test/{rill_test.c => rotate_test.c} | 27 +- test/test.h | 14 - 13 files changed, 578 insertions(+), 481 deletions(-) create mode 100644 src/acc.c delete mode 100644 src/rill.c rename src/{dump.c => rill_dump.c} (100%) rename src/{load.c => rill_load.c} (78%) create mode 100644 src/rotate.c rename test/{rill_test.c => rotate_test.c} (65%) diff --git a/compile.sh b/compile.sh index 75b25d9..77e9715 100755 --- a/compile.sh +++ b/compile.sh @@ -5,7 +5,7 @@ set -o errexit -o nounset -o pipefail -o xtrace : ${PREFIX:="."} declare -a SRC -SRC=(htable rng pairs store rill) +SRC=(htable rng pairs store acc rotate query) CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -I${PREFIX}/src" @@ -23,8 +23,7 @@ done ar rcs librill.a $OBJ gcc -o rill_load "${PREFIX}/src/load.c" librill.a $CFLAGS -gcc -o rill_query "${PREFIX}/src/query.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/dump.c" librill.a $CFLAGS gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder -gcc -o test_rill "${PREFIX}/test/rill_test.c" librill.a $CFLAGS && ./test_rill +gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS && ./test_rotate diff --git a/src/acc.c b/src/acc.c new file mode 100644 index 0000000..93ba236 --- /dev/null +++ b/src/acc.c @@ -0,0 +1,213 @@ +/* acc.c + Rémi Attab (remi.attab@gmail.com), 16 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +// ----------------------------------------------------------------------------- +// acc +// ----------------------------------------------------------------------------- + +static const uint32_t version = 1; +static const uint32_t magic = 0x43434152; + +struct rill_packed header +{ + uint32_t magic; + uint32_t version; + + uint64_t len; + + atomic_size_t read; + atomic_size_t write; +}; + +struct rill_packed kv +{ + uint64_t key, val; +}; + +struct rill_acc +{ + int fd; + const char *dir; + + void *vma; + size_t vma_len; + + struct header *head; + struct kv *data; +}; + +enum { min_cap = 32 }; + +struct rill_acc *rill_acc_open(const char *dir, size_t cap) +{ + if (cap != rill_acc_read_only && cap < min_cap) cap = min_cap; + + struct rill_acc *acc = calloc(1, sizeof(*acc)); + if (!acc) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_struct; + } + + acc->dir = strndup(dir, PATH_MAX); + if (!acc->dir) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_dir; + } + + if (mkdir(dir, 0775) == -1 && errno != EEXIST) { + fail_errno("unable to open create dir '%s'", dir); + goto fail_mkdir; + } + + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/acc", dir); + + bool create = false; + struct stat stat_ret = {0}; + if (stat(file, &stat_ret) == -1) { + if (errno != ENOENT) { + fail_errno("unable to stat '%s'", file); + goto fail_stat; + } + + if (cap == rill_acc_read_only) return false; + + create = true; + acc->fd = open(file, O_RDWR | O_CREAT | O_EXCL | O_NOATIME, 0644); + } + else acc->fd = open(file, O_RDWR); + + if (acc->fd == -1) { + fail_errno("unable to create '%s'", file); + goto fail_open; + } + + acc->vma_len = to_vma_len(sizeof(*acc->head) + cap * sizeof(*acc->data)); + if (create) { + if (ftruncate(acc->fd, acc->vma_len) == -1) { + fail_errno("unable to ftruncate '%s' to len '%lu'", file, acc->vma_len); + goto fail_truncate; + } + } + + acc->vma = mmap(NULL, acc->vma_len, PROT_READ | PROT_WRITE, MAP_SHARED, acc->fd, 0); + if (acc->vma == MAP_FAILED) { + fail_errno("unable to mmap '%s' of len '%lu'", file, acc->vma_len); + goto fail_mmap; + } + + acc->head = acc->vma; + acc->data = (void *) (acc->head + 1); + + if (create) { + acc->head->magic = magic; + acc->head->version = version; + acc->head->len = cap; + } + else { + if (acc->head->magic != magic) { + fail("invalid magic '0x%x' for '%s'", acc->head->magic, file); + goto fail_magic; + } + + if (acc->head->version != version) { + fail("unknown version '%du' for '%s'", acc->head->version, file); + goto fail_version; + } + } + + return acc; + + fail_version: + fail_magic: + munmap(acc->vma, acc->vma_len); + fail_mmap: + fail_truncate: + close(acc->fd); + fail_open: + fail_stat: + fail_mkdir: + free((char *) acc->dir); + fail_alloc_dir: + free(acc); + fail_alloc_struct: + return NULL; +} + +void rill_acc_close(struct rill_acc *acc) +{ + munmap(acc->vma, acc->vma_len); + close(acc->fd); + free((char *) acc->dir); + free(acc); +} + +void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val) +{ + size_t read = atomic_load_explicit(&acc->head->read, memory_order_relaxed); + size_t index = read % acc->head->len; + + acc->data[index].key = key; + acc->data[index].val = val; + + atomic_store_explicit(&acc->head->read, read + 1, memory_order_release); +} + +bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) +{ + struct rill_pairs *pairs = rill_pairs_new(acc->head->len); + if (!pairs) { + fail("unable to allocate pairs for len '%lu'", acc->head->len); + return false; + } + + size_t start = atomic_load_explicit(&acc->head->write, memory_order_acquire); + size_t end = atomic_load_explicit(&acc->head->read, memory_order_acquire); + if (start == end) return true; + assert(start < end); + + if (end - start >= acc->head->len) { + printf("acc lost '%lu' events\n", (end - start) - acc->head->len); + size_t leeway = min_cap / 2; // to avoid contentention between reader and writer + start = end - acc->head->len + leeway; + } + + struct rill_pairs *ret = NULL; + for (size_t i = start; i < end; ++i) { + size_t index = i % acc->head->len; + + ret = rill_pairs_push(pairs, acc->data[index].key, acc->data[index].val); + assert(ret == pairs); + } + + if (!rill_store_write(file, now, 0, pairs)) { + fail("unable to write acc file '%s'", file); + goto fail_write; + } + + atomic_store_explicit(&acc->head->write, end, memory_order_release); + return true; + + fail_write: + free(pairs); + return false; +} diff --git a/src/pairs.c b/src/pairs.c index 6299465..3b84f9e 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -11,6 +11,14 @@ #include +// ----------------------------------------------------------------------------- +// kv +// ----------------------------------------------------------------------------- + +extern inline bool rill_kv_nil(const struct rill_kv *); +extern inline int rill_kv_cmp(const struct rill_kv *, const struct rill_kv *); + + // ----------------------------------------------------------------------------- // pairs // ----------------------------------------------------------------------------- diff --git a/src/query.c b/src/query.c index 4baef17..27ca1c0 100644 --- a/src/query.c +++ b/src/query.c @@ -1,42 +1,119 @@ -/* bench.c - Rémi Attab (remi.attab@gmail.com), 04 Sep 2017 +/* rill.c + Rémi Attab (remi.attab@gmail.com), 03 Sep 2017 FreeBSD-style copyright and disclaimer apply */ #include "rill.h" +#include "utils.h" -#include -#include #include +#include +#include +#include +#include +#include +#include + + + +// ----------------------------------------------------------------------------- +// rill +// ----------------------------------------------------------------------------- + +struct rill_query +{ + const char *dir; -int main(int argc, char **argv) + size_t len; + struct rill_store *list[1024]; +}; + +struct rill_query * rill_query_open(const char *dir) { - (void) argc, (void) argv; - struct rill *db = rill_open("db"); - if (!db) return 1; + struct rill_query *query = calloc(1, sizeof(*query)); + if (!query) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_struct; + } - enum { n = 100 }; + query->dir = strndup(dir, PATH_MAX); + if (!query->dir) { + fail("unable to allocate memory for '%s'", dir); + goto fail_alloc_dir; + } - { - rill_key_t keys[100]; - for (size_t i = 0; i < 100; ++i) keys[i] = i; + DIR *dir_handle = opendir(dir); + if (!dir_handle) { + fail_errno("unable to open dir '%s'", dir); + goto fail_dir; + } - struct rill_pairs out = {0}; - rill_query_key(db, keys, n, &out); + struct dirent *entry; + while ((entry = readdir(dir_handle))) { + if (entry->d_type != DT_REG) continue; + if (!strcmp(entry->d_name, "acc")) continue; - rill_pairs_print(&out); + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%s", query->dir, entry->d_name); + + query->list[query->len] = rill_store_open(file); + if (!query->list[query->len]) continue; + + query->len++; } - { - rill_val_t vals[100]; - for (size_t i = 0; i < 100; ++i) vals[i] = i; + closedir(dir_handle); + + return query; - struct rill_pairs out = {0}; - rill_query_val(db, vals, n, &out); + closedir(dir_handle); + fail_dir: + free((char *) query->dir); + fail_alloc_dir: + free(query); + fail_alloc_struct: + return NULL; +} + +void rill_query_close(struct rill_query *query) +{ + for (size_t i = 0; i < query->len; ++i) + rill_store_close(query->list[i]); + + free((char *) query->dir); + free(query); +} + +struct rill_pairs *rill_query_key( + struct rill_query *query, + const rill_key_t *keys, size_t len, + struct rill_pairs *out) +{ + if (!len) return out; + + struct rill_pairs *result = out; + for (size_t i = 0; i < query->len; ++i) { + result = rill_store_scan_key(query->list[i], keys, len, result); + if (!result) return NULL; + } + + rill_pairs_compact(result); + return result; +} + +struct rill_pairs *rill_query_val( + struct rill_query *query, + const rill_val_t *vals, size_t len, + struct rill_pairs *out) +{ + if (!len) return out; - rill_pairs_print(&out); + struct rill_pairs *result = out; + for (size_t i = 0; i < query->len; ++i) { + result = rill_store_scan_val(query->list[i], vals, len, result); + if (!result) return result; } - rill_close(db); - return 0; + rill_pairs_compact(result); + return result; } diff --git a/src/rill.c b/src/rill.c deleted file mode 100644 index ae6d233..0000000 --- a/src/rill.c +++ /dev/null @@ -1,402 +0,0 @@ -/* rill.c - Rémi Attab (remi.attab@gmail.com), 03 Sep 2017 - FreeBSD-style copyright and disclaimer apply -*/ - -#include "rill.h" -#include "utils.h" - -#include -#include -#include -#include -#include -#include -#include - - -// ----------------------------------------------------------------------------- -// kv -// ----------------------------------------------------------------------------- - -extern inline bool rill_kv_nil(const struct rill_kv *); -extern inline int rill_kv_cmp(const struct rill_kv *, const struct rill_kv *); - - -// ----------------------------------------------------------------------------- -// config -// ----------------------------------------------------------------------------- - -enum { hours = 24, days = 30, months = 13}; -enum -{ - quant_hour = 60 * 60, - quant_day = hours * quant_hour, - quant_month = days * quant_day, -}; - - -// ----------------------------------------------------------------------------- -// rill -// ----------------------------------------------------------------------------- - -struct rill -{ - const char *dir; - rill_ts_t ts; - - lock_t lock; - struct rill_pairs *acc; - struct rill_pairs *dump; - - struct rill_store *hourly[hours]; - struct rill_store *daily[days]; - struct rill_store *monthly[months]; -}; - -static bool load_store(struct rill *db, const char *file) -{ - struct rill_store *store = rill_store_open(file); - if (!store) goto fail_open; - - rill_ts_t ts = rill_store_ts(store); - size_t quant = rill_store_quant(store); - - struct rill_store **bucket = NULL; - switch (quant) { - case quant_hour: bucket = &db->hourly[(ts / quant_hour) % hours]; break; - case quant_day: bucket = &db->daily[(ts / quant_hour) % days]; break; - case quant_month: bucket = &db->monthly[(ts / quant_month) % months]; break; - default: - fail("unknown quant '%lu' for '%s'", quant, file); - goto fail_quant; - } - - if (*bucket) { - fail("file '%s' is a duplicate for quant '%lu' at timestamp %lu'", - file, quant, ts); - goto fail_dup; - } - - *bucket = store; - - return true; - - fail_dup: - fail_quant: - rill_store_close(store); - fail_open: - return false; -} - -struct rill * rill_open(const char *dir) -{ - struct rill *db = calloc(1, sizeof(*db)); - if (!db) { - fail("unable to allocate memory for '%s'", dir); - goto fail_alloc_struct; - } - - db->dir = strndup(dir, PATH_MAX); - if (!db->dir) { - fail("unable to allocate memory for '%s'", dir); - goto fail_alloc_dir; - } - - if (!(db->acc = rill_pairs_new(1 * 1000 * 1000))) { - fail("unable to allocate pairs for '%s'", dir); - goto fail_alloc_acc; - } - - if (!(db->dump = rill_pairs_new(1 * 1000 * 1000))) { - fail("unable to allocate pairs for '%s'", dir); - goto fail_alloc_dump; - } - - if (mkdir(dir, 0775) == -1 && errno != EEXIST) { - fail_errno("unable to open create dir '%s'", dir); - goto fail_mkdir; - } - - DIR *dir_handle = opendir(dir); - if (!dir_handle) { - fail_errno("unable to open dir '%s'", dir); - goto fail_dir; - } - - struct dirent it, *entry; - while (true) { - if (readdir_r(dir_handle, &it, &entry) == -1) { - fail_errno("unable to read dir '%s'", dir); - goto fail_readdir; - } - else if (!entry) break; - else if (entry->d_type != DT_REG) continue; - - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%s", db->dir, entry->d_name); - (void) load_store(db, file); - } - - closedir(dir_handle); - - return db; - - fail_readdir: - closedir(dir_handle); - fail_dir: - fail_mkdir: - rill_pairs_free(db->dump); - fail_alloc_dump: - rill_pairs_free(db->acc); - fail_alloc_acc: - free((char *) db->dir); - fail_alloc_dir: - free(db); - fail_alloc_struct: - return NULL; -} - -void rill_close(struct rill *db) -{ - for (size_t i = 0; i < hours; ++i) { - if (db->hourly[i]) rill_store_close(db->hourly[i]); - } - - for (size_t i = 0; i < days; ++i) { - if (db->daily[i]) rill_store_close(db->daily[i]); - } - - for (size_t i = 0; i < months; ++i) { - if (db->monthly[i]) rill_store_close(db->monthly[i]); - } - - - free((char *) db->dir); - rill_pairs_free(db->acc); - rill_pairs_free(db->dump); - free(db); -} - - -// ----------------------------------------------------------------------------- -// ingest -// ----------------------------------------------------------------------------- - -bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val) -{ - if (!key) { - fail("invalid nil key '%lu'", key); - return false; - } - - if (!val) { - fail("invalid nil value '%lu'", val); - return false; - } - - struct rill_pairs *result; - { - lock(&db->lock); - - result = rill_pairs_push(db->acc, key, val); - - unlock(&db->lock); - } - - if (result) db->acc = result; - return result != NULL; -} - - -// ----------------------------------------------------------------------------- -// rotate -// ----------------------------------------------------------------------------- -// \todo since we're deleting data, should be reviewed for robustness. -// -// \todo if we have a gap in ingestion, it's possible that we don't expire some -// data or that we hit one of the asserts. Need to improve the mechanism a -// bit. - - -static bool rotate_monthly( - struct rill *db, - struct rill_store **store, - rill_ts_t ts, - struct rill_store **list, size_t len) -{ - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%06lu.rill", db->dir, ts / quant_month); - - if (*store) { - (void) rill_store_rm(*store); - *store = NULL; - } - - bool all_null = true; - for (size_t i = 0; i < len; ++i) all_null = all_null && !list[i]; - if (all_null) return true; - - if (!rill_store_merge(file, ts, quant_month, list, len)) return false; - if (!(*store = rill_store_open(file))) return false; - - for (size_t i = 0; i < len; ++i) { - if (!list[i]) continue; - (void) rill_store_rm(list[i]); - list[i] = NULL; - } - - return true; -} - -static bool rotate_daily( - struct rill *db, - struct rill_store **store, - rill_ts_t ts, - struct rill_store **list, size_t len) -{ - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%06lu-%02lu.rill", db->dir, - ts / quant_month, - (ts / quant_day) % days); - - assert(!*store); - - bool all_null = true; - for (size_t i = 0; i < len; ++i) all_null = all_null && !list[i]; - if (all_null) return true; - - if (!rill_store_merge(file, ts, quant_day, list, len)) return false; - if (!(*store = rill_store_open(file))) return false; - - for (size_t i = 0; i < len; ++i) { - if (!list[i]) continue; - (void) rill_store_rm(list[i]); - list[i] = NULL; - } - - return true; -} - -static bool rotate_hourly(struct rill *db, struct rill_store **store, rill_ts_t ts) -{ - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%06lu-%02lu-%02lu.rill", db->dir, - ts / quant_month, - (ts / quant_day) % days, - (ts / quant_hour) % hours); - - { - lock(&db->lock); - - struct rill_pairs *tmp = db->acc; - db->acc = db->dump; - db->dump = tmp; - - unlock(&db->lock); - } - - assert(!*store); - if (db->dump->len) { - if (!rill_store_write(file, ts, quant_hour, db->dump)) return false; - if (!(*store = rill_store_open(file))) return false; - } - - rill_pairs_clear(db->dump); - return true; -} - -bool rill_rotate(struct rill *db, rill_ts_t now) -{ - if (now / quant_hour != db->ts / quant_hour) { - size_t quant = db->ts / quant_hour; - if (!rotate_hourly(db, &db->hourly[quant % hours], db->ts)) { - fail("unable to complete hourly rotation '%lu'", quant); - return false; - } - } - - if (now / quant_day != db->ts / quant_day) { - size_t quant = db->ts / quant_day; - if (!rotate_daily(db, &db->daily[quant % days], db->ts, db->hourly, hours)) { - fail("unable to complete daily rotation '%lu'", quant); - return false; - } - } - - if (now / quant_month != db->ts / quant_month) { - size_t quant = db->ts / quant_month; - if (!rotate_monthly(db, &db->monthly[quant % months], db->ts, db->daily, days)) { - fail("unable to complete monthly rotation '%lu'", quant); - return false; - } - } - - db->ts = now; - return true; -} - - -// ----------------------------------------------------------------------------- -// query -// ----------------------------------------------------------------------------- - -struct rill_pairs *rill_query_key( - struct rill *db, - const rill_key_t *keys, size_t len, - struct rill_pairs *out) -{ - struct rill_pairs *result = out; - if (!len) return result; - - for (size_t i = 0; i < hours; ++i) { - if (!db->hourly[i]) continue; - result = rill_store_scan_key(db->hourly[i], keys, len, result); - if (!result) return NULL; - } - - for (size_t i = 0; i < days; ++i) { - if (!db->daily[i]) continue; - result = rill_store_scan_key(db->daily[i], keys, len, result); - if (!result) return NULL; - } - - for (size_t i = 0; i < months; ++i) { - if (!db->monthly[i]) continue; - result = rill_store_scan_key(db->monthly[i], keys, len, result); - if (!result) return NULL; - } - - rill_pairs_compact(result); - return result; -} - -struct rill_pairs *rill_query_val( - struct rill *db, - const rill_val_t *vals, size_t len, - struct rill_pairs *out) -{ - struct rill_pairs *result = out; - if (!len) return result; - - for (size_t i = 0; i < hours; ++i) { - if (!db->hourly[i]) continue; - result = rill_store_scan_val(db->hourly[i], vals, len, result); - if (!result) return result; - } - - for (size_t i = 0; i < days; ++i) { - if (!db->daily[i]) continue; - result = rill_store_scan_val(db->daily[i], vals, len, result); - if (!result) return result; - } - - for (size_t i = 0; i < months; ++i) { - if (!db->monthly[i]) continue; - result = rill_store_scan_val(db->monthly[i], vals, len, result); - if (!result) return result; - } - - rill_pairs_compact(result); - return result; -} diff --git a/src/rill.h b/src/rill.h index 1294bbc..52b9acb 100644 --- a/src/rill.h +++ b/src/rill.h @@ -99,9 +99,9 @@ bool rill_store_merge( bool rill_store_rm(struct rill_store *store); -const char * rill_store_file(struct rill_store *store); -rill_ts_t rill_store_ts(struct rill_store *store); -size_t rill_store_quant(struct rill_store *store); +const char * rill_store_file(const struct rill_store *store); +rill_ts_t rill_store_ts(const struct rill_store *store); +size_t rill_store_quant(const struct rill_store *store); struct rill_pairs *rill_store_scan_key( struct rill_store *store, @@ -117,23 +117,42 @@ void rill_store_print(struct rill_store *store); // ----------------------------------------------------------------------------- -// rill +// acc // ----------------------------------------------------------------------------- -struct rill; +struct rill_acc; -struct rill * rill_open(const char *dir); -void rill_close(struct rill *db); +enum { rill_acc_read_only = 0 }; -bool rill_ingest(struct rill *db, rill_key_t key, rill_val_t val); -bool rill_rotate(struct rill *db, rill_ts_t now); +struct rill_acc *rill_acc_open(const char *dir, size_t cap); +void rill_acc_close(struct rill_acc *acc); + +void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val); +bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now); + + +// ----------------------------------------------------------------------------- +// rotate +// ----------------------------------------------------------------------------- + +bool rill_rotate(const char *dir, rill_ts_t now); + + +// ----------------------------------------------------------------------------- +// query +// ----------------------------------------------------------------------------- + +struct rill_query; + +struct rill_query * rill_query_open(const char *dir); +void rill_query_close(struct rill_query *db); struct rill_pairs *rill_query_key( - struct rill *db, + struct rill_query *query, const rill_key_t *keys, size_t len, struct rill_pairs *out); struct rill_pairs *rill_query_val( - struct rill *db, + struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out); diff --git a/src/dump.c b/src/rill_dump.c similarity index 100% rename from src/dump.c rename to src/rill_dump.c diff --git a/src/load.c b/src/rill_load.c similarity index 78% rename from src/load.c rename to src/rill_load.c index 990a14a..b63e539 100644 --- a/src/load.c +++ b/src/rill_load.c @@ -5,6 +5,7 @@ #include "rill.h" #include "rng.h" +#include "utils.h" #include #include @@ -45,18 +46,19 @@ int main(int argc, char **argv) (void) argc, (void) argv; rm("db"); - struct rill *db = rill_open("db"); - if (!db) return 1; - enum { keys_per_sec = 200, - seconds = 3 * 31 * 24 * 60 * 60, + seconds = 3 * month, + rotation_rate = 10 * min, keys_range = 1 * 1000 * 1000 * 1000, vals_range = 10 * 1000, vals_per_key = 4, }; + struct rill_acc *acc = rill_acc_open("db", keys_per_sec * rotation_rate); + if (!acc) return 1; + struct rng rng = rng_make(0); for (size_t ts = 0; ts < seconds; ++ts) { for (size_t i = 0; i < keys_per_sec; ++i) { @@ -64,15 +66,17 @@ int main(int argc, char **argv) for (size_t j = 0; j < vals_per_key; ++j) { uint64_t val = rng_gen_val(&rng, 0, vals_range); - if (!rill_ingest(db, key, val)) return 1; + rill_acc_ingest(acc, key, val); } } - if (!rill_rotate(db, ts)) return 1; + if (ts % rotation_rate == 0) { + if (!rill_rotate("db", ts)) return 0; + } } - if (!rill_rotate(db, seconds + 60 * 60)) return 1; - rill_close(db); + rill_acc_close(acc); + if (!rill_rotate("db", seconds + 60 * 60)) return 1; return 0; } diff --git a/src/rotate.c b/src/rotate.c new file mode 100644 index 0000000..9f45a8f --- /dev/null +++ b/src/rotate.c @@ -0,0 +1,164 @@ +/* rotate.c + Rémi Attab (remi.attab@gmail.com), 16 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include + +#include +#include +#include + +// ----------------------------------------------------------------------------- +// rotate +// ----------------------------------------------------------------------------- + +static void rotate_acc(const char *dir, rill_ts_t now) +{ + struct rill_acc *acc = rill_acc_open(dir, rill_acc_read_only); + if (!acc) return; + + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%010lu.rill", dir, now); + printf("rotate: writing acc to '%s' with timestamp '%lu'\n", file, now); + + (void) rill_acc_write(acc, file, now); + rill_acc_close(acc); +} + +static size_t load_dir(const char *dir, struct rill_store **list, size_t cap) +{ + DIR *dir_handle = opendir(dir); + if (!dir_handle) return 0; + + size_t len = 0; + struct dirent *entry = NULL; + while ((entry = readdir(dir_handle))) { + if (entry->d_type != DT_REG) continue; + if (!strcmp(entry->d_name, "acc")) continue; + + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%s", dir, entry->d_name); + + list[len] = rill_store_open(file); + if (!list[len]) continue; + + len++; + if (len == cap) { + fail("rotate: too many files to rotate in '%s'", dir); + break; + } + } + + closedir(dir_handle); + return len; +} + +static int store_cmp(const void *l, const void *r) +{ + const struct rill_store *const *lhs = l; + const struct rill_store *const *rhs = r; + + if (rill_store_ts(*lhs) < rill_store_ts(*rhs)) return +1; + if (rill_store_ts(*lhs) > rill_store_ts(*rhs)) return -1; + return 0; +} + +static size_t expire(struct rill_store **list, size_t len, rill_ts_t now) +{ + if (now < expiration) return len; // mostly for tests. + + size_t i = 0; + for (; i < len; ++i) { + if (rill_store_ts(list[i]) < (now - expiration)) break; + } + + size_t end = i; + for (; i < len; ++i) { + printf("rotate: expiring '%s' with timestamp '%lu < %lu'\n", + rill_store_file(list[i]), rill_store_ts(list[i]), (now - expiration)); + rill_store_rm(list[i]); + list[i] = NULL; + } + return end; +} + +static bool merge( + struct rill_store **list, size_t len, const char *dir, rill_ts_t now) +{ + rill_ts_t earliest = rill_store_ts(list[0]); + + rill_ts_t quant = 0; + if (earliest / hour != now / hour) quant = hour; + if (earliest / day != now / day) quant = day; + if (earliest / month != now / month) quant = month; + + printf("rotate: now=%lu, earliest=%lu, quant=%lu\n", now, earliest, quant); + + if (!quant) return true; + + rill_ts_t oldest = ((now / quant) - 1) * quant; + size_t merge_end = 0; + for (; merge_end < len; ++merge_end) { + if (rill_store_ts(list[merge_end]) < oldest) break; + + printf("rotate: merging '%s' with timestamp '%lu >= %lu'\n", + rill_store_file(list[merge_end]), + rill_store_ts(list[merge_end]), + oldest); + } + if (merge_end <= 1) return true; + + rill_ts_t ts = oldest + quant - 1; + + char file[PATH_MAX]; + if (quant == hour) + snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu.rill", + dir, ts / month, (ts / day) % days, (ts / hour) % hours); + else if (quant == day) + snprintf(file, sizeof(file), "%s/%05lu-%02lu.rill", + dir, ts / month, (ts / day) % days); + else if (quant == month) + snprintf(file, sizeof(file), "%s/%05lu.rill", dir, ts / month); + + + printf("rotate: merging to '%s' with timestamp '%lu'\n", file, ts); + + if (!rill_store_merge(file, ts, quant, list, merge_end)) return false; + + for (size_t i = 0; i < merge_end; ++i) { + printf("rotate: deleting '%s'\n", rill_store_file(list[i])); + rill_store_rm(list[i]); + list[i] = NULL; + } + + return true; +} + + +bool rill_rotate(const char *dir, rill_ts_t now) +{ + printf("rotate: rotating '%s' at timestamp '%lu'\n", dir, now); + + enum { cap = 1024 }; + struct rill_store *list[cap]; + size_t len = load_dir(dir, list, cap); + qsort(list, len, sizeof(list[0]), store_cmp); + + // We don't want the latest file in the merge list. + rotate_acc(dir, now); + + len = expire(list, len, now); + if (!len) return true; + + bool ret = merge(list, len, dir, now); + for (size_t i = 0; i < len; ++i) { + if (list[i]) rill_store_close(list[i]); + } + + return ret; +} diff --git a/src/store.c b/src/store.c index 8de17f0..615a45b 100644 --- a/src/store.c +++ b/src/store.c @@ -89,12 +89,6 @@ static struct coder store_decoder(struct rill_store *store) // vma // ----------------------------------------------------------------------------- -static inline size_t to_vma_len(size_t len) -{ - if (!(len % page_len)) return len; - return (len & ~(page_len - 1)) + page_len; -} - static inline void vma_will_need(struct rill_store *store) { if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) @@ -148,7 +142,7 @@ struct rill_store *rill_store_open(const char *file) store->vma = mmap(NULL, store->vma_len, PROT_READ, MAP_SHARED, store->fd, 0); if (store->vma == MAP_FAILED) { - fail_errno("[reader] unable to mmap '%s'", file); + fail_errno("unable to mmap '%s' of len '%lu'", file, store->vma_len); goto fail_mmap; } @@ -421,17 +415,17 @@ bool rill_store_merge( // scan // ----------------------------------------------------------------------------- -const char * rill_store_file(struct rill_store *store) +const char * rill_store_file(const struct rill_store *store) { return store->file; } -rill_ts_t rill_store_ts(struct rill_store *store) +rill_ts_t rill_store_ts(const struct rill_store *store) { return store->head->ts; } -size_t rill_store_quant(struct rill_store *store) +size_t rill_store_quant(const struct rill_store *store) { return store->head->quant; } diff --git a/src/utils.h b/src/utils.h index d2d8bdb..b4ea3e1 100644 --- a/src/utils.h +++ b/src/utils.h @@ -26,6 +26,23 @@ // misc // ----------------------------------------------------------------------------- +enum +{ + hours = 24, + days = 31, + months = 13, +}; + +enum +{ + min = 60, + hour = 60 * min, + day = hours * hour, + month = days * day, + expiration = months * month, +}; + + enum { page_len_s = 4096 }; static const size_t page_len = page_len_s; @@ -65,3 +82,14 @@ static inline void unlock(atomic_size_t *l) { atomic_store_explicit(l, 0, memory_order_release); } + + +// ----------------------------------------------------------------------------- +// vma +// ----------------------------------------------------------------------------- + +static inline size_t to_vma_len(size_t len) +{ + if (!(len % page_len)) return len; + return (len & ~(page_len - 1)) + page_len; +} diff --git a/test/rill_test.c b/test/rotate_test.c similarity index 65% rename from test/rill_test.c rename to test/rotate_test.c index 7d2f1e8..dbf1fb1 100644 --- a/test/rill_test.c +++ b/test/rotate_test.c @@ -15,18 +15,25 @@ bool test_rotate(void) const char *dir = "test.rotate.db"; rm(dir); - struct rill *db = rill_open(dir); const uint64_t key = 1; - for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { - rill_ingest(db, key, ts + 1); - rill_rotate(db, ts); + { + struct rill_acc *acc = rill_acc_open(dir, 1); + + for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { + rill_acc_ingest(acc, key, ts + 1); + rill_rotate(dir, ts); + } + + rill_acc_close(acc); + rill_rotate(dir, 13 * month); } - rill_rotate(db, 13 * month); { - struct rill_pairs *pairs = rill_query_key(db, &key, 1, rill_pairs_new(1)); + struct rill_query *query = rill_query_open(dir); + struct rill_pairs *pairs = rill_query_key(query, &key, 1, rill_pairs_new(1)); + rill_query_close(query); size_t i = 0; for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { @@ -38,12 +45,13 @@ bool test_rotate(void) rill_pairs_free(pairs); } - // \todo this is bad and doesn't properly expire things. for (size_t i = 1; i <= 6; ++i) - rill_rotate(db, (13 + i) * month); + rill_rotate(dir, (13 + i) * month); { - struct rill_pairs *pairs = rill_query_key(db, &key, 1, rill_pairs_new(1)); + struct rill_query *query = rill_query_open(dir); + struct rill_pairs *pairs = rill_query_key(query, &key, 1, rill_pairs_new(1)); + rill_query_close(query); for (size_t i = 0; i < pairs->len; ++i) { assert(pairs->data[i].key == key); @@ -53,7 +61,6 @@ bool test_rotate(void) rill_pairs_free(pairs); } - rill_close(db); rm(dir); return true; diff --git a/test/test.h b/test/test.h index 04d5249..66ef039 100644 --- a/test/test.h +++ b/test/test.h @@ -20,20 +20,6 @@ #include -// ----------------------------------------------------------------------------- -// time -// ----------------------------------------------------------------------------- - -enum -{ - sec = 1, - min = 60 * sec, - hour = 60 * min, - day = 24 * hour, - month = 31 * day, -}; - - // ----------------------------------------------------------------------------- // pairs // ----------------------------------------------------------------------------- From a9cdc0948f3413078fc34d29d8f06c0d1736cf1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 17 Sep 2017 13:33:25 -0400 Subject: [PATCH 11/91] Fixed a bunch of stuff --- compile.sh | 19 +++-- src/acc.c | 56 ++++++++++---- src/pairs.c | 2 + src/query.c | 30 +------- src/rill.h | 7 ++ src/rill_load.c | 4 +- src/rotate.c | 180 +++++++++++++++++++++++---------------------- src/utils.c | 43 +++++++++++ src/utils.h | 27 ------- test/rotate_test.c | 5 +- 10 files changed, 207 insertions(+), 166 deletions(-) create mode 100644 src/utils.c diff --git a/compile.sh b/compile.sh index 77e9715..8646e59 100755 --- a/compile.sh +++ b/compile.sh @@ -5,14 +5,21 @@ set -o errexit -o nounset -o pipefail -o xtrace : ${PREFIX:="."} declare -a SRC -SRC=(htable rng pairs store acc rotate query) +SRC=(htable rng utils pairs store acc rotate query) CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -I${PREFIX}/src" CFLAGS="$CFLAGS -Werror -Wall -Wextra" -CFLAGS="$CFLAGS -Wundef -Wcast-align -Wwrite-strings -Wunreachable-code -Wformat=2" -CFLAGS="$CFLAGS -Wswitch-enum -Wswitch-default -Winit-self -Wno-strict-aliasing" +CFLAGS="$CFLAGS -Wundef" +CFLAGS="$CFLAGS -Wcast-align" +CFLAGS="$CFLAGS -Wwrite-strings" +CFLAGS="$CFLAGS -Wunreachable-code" +CFLAGS="$CFLAGS -Wformat=2" +CFLAGS="$CFLAGS -Wswitch-enum" +CFLAGS="$CFLAGS -Wswitch-default" +CFLAGS="$CFLAGS -Winit-self" +CFLAGS="$CFLAGS -Wno-strict-aliasing" CFLAGS="$CFLAGS -fno-strict-aliasing" OBJ="" @@ -22,8 +29,8 @@ for src in "${SRC[@]}"; do done ar rcs librill.a $OBJ -gcc -o rill_load "${PREFIX}/src/load.c" librill.a $CFLAGS -gcc -o rill_dump "${PREFIX}/src/dump.c" librill.a $CFLAGS +gcc -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS +gcc -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder -gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS && ./test_rotate +gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS diff --git a/src/acc.c b/src/acc.c index 93ba236..0b31085 100644 --- a/src/acc.c +++ b/src/acc.c @@ -61,6 +61,10 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) { if (cap != rill_acc_read_only && cap < min_cap) cap = min_cap; + // Add enough leeway to avoid contention between the reader and the writer. + // some might say this is an excessive amount of leeway but I don't care. + cap *= 2; + struct rill_acc *acc = calloc(1, sizeof(*acc)); if (!acc) { fail("unable to allocate memory for '%s'", dir); @@ -101,13 +105,22 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) goto fail_open; } - acc->vma_len = to_vma_len(sizeof(*acc->head) + cap * sizeof(*acc->data)); if (create) { + acc->vma_len = to_vma_len(sizeof(*acc->head) + cap * sizeof(*acc->data)); if (ftruncate(acc->fd, acc->vma_len) == -1) { fail_errno("unable to ftruncate '%s' to len '%lu'", file, acc->vma_len); goto fail_truncate; } } + else { + size_t len = stat_ret.st_size; + if (len < sizeof(struct header)) { + fail("invalid size for '%s'", file); + goto fail_size; + } + + acc->vma_len = to_vma_len(len); + } acc->vma = mmap(NULL, acc->vma_len, PROT_READ | PROT_WRITE, MAP_SHARED, acc->fd, 0); if (acc->vma == MAP_FAILED) { @@ -141,6 +154,7 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) fail_magic: munmap(acc->vma, acc->vma_len); fail_mmap: + fail_size: fail_truncate: close(acc->fd); fail_open: @@ -163,13 +177,16 @@ void rill_acc_close(struct rill_acc *acc) void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val) { - size_t read = atomic_load_explicit(&acc->head->read, memory_order_relaxed); - size_t index = read % acc->head->len; + assert(key && val); + + size_t write = atomic_load_explicit(&acc->head->write, memory_order_relaxed); + size_t index = write % acc->head->len; + struct kv *kv = &acc->data[index]; - acc->data[index].key = key; - acc->data[index].val = val; + kv->key = key; + kv->val = val; - atomic_store_explicit(&acc->head->read, read + 1, memory_order_release); + atomic_store_explicit(&acc->head->write, write + 1, memory_order_release); } bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) @@ -180,22 +197,26 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) return false; } - size_t start = atomic_load_explicit(&acc->head->write, memory_order_acquire); - size_t end = atomic_load_explicit(&acc->head->read, memory_order_acquire); - if (start == end) return true; + size_t start = atomic_load_explicit(&acc->head->read, memory_order_acquire); + size_t end = atomic_load_explicit(&acc->head->write, memory_order_acquire); + if (start == end) goto done; assert(start < end); - if (end - start >= acc->head->len) { - printf("acc lost '%lu' events\n", (end - start) - acc->head->len); - size_t leeway = min_cap / 2; // to avoid contentention between reader and writer - start = end - acc->head->len + leeway; + if (end - start > acc->head->len) { + printf("acc lost '%lu' events: read=%lu, write=%lu, cap=%lu\n", + (end - start) - acc->head->len, start, end, acc->head->len); + start = end - acc->head->len; } struct rill_pairs *ret = NULL; for (size_t i = start; i < end; ++i) { size_t index = i % acc->head->len; + struct kv *kv = &acc->data[index]; - ret = rill_pairs_push(pairs, acc->data[index].key, acc->data[index].val); + /* printf("read: [%lu] %lu/%lu -> %p{%lu, %lu}\n", */ + /* i, index, acc->head->len, (void *) kv, kv->key, kv->val); */ + + ret = rill_pairs_push(pairs, kv->key, kv->val); assert(ret == pairs); } @@ -204,10 +225,13 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) goto fail_write; } - atomic_store_explicit(&acc->head->write, end, memory_order_release); + atomic_store_explicit(&acc->head->read, end, memory_order_release); + + done: + rill_pairs_free(pairs); return true; fail_write: - free(pairs); + rill_pairs_free(pairs); return false; } diff --git a/src/pairs.c b/src/pairs.c index 3b84f9e..8840718 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -56,6 +56,8 @@ void rill_pairs_clear(struct rill_pairs *pairs) struct rill_pairs *rill_pairs_push( struct rill_pairs *pairs, rill_key_t key, rill_val_t val) { + assert(key && val); + if (rill_unlikely(pairs->len + 1 > pairs->cap)) { size_t cap = adjust_cap(pairs->cap, pairs->len + 1); diff --git a/src/query.c b/src/query.c index 27ca1c0..9821284 100644 --- a/src/query.c +++ b/src/query.c @@ -8,12 +8,11 @@ #include #include -#include -#include + #include #include #include - +#include // ----------------------------------------------------------------------------- @@ -42,32 +41,11 @@ struct rill_query * rill_query_open(const char *dir) goto fail_alloc_dir; } - DIR *dir_handle = opendir(dir); - if (!dir_handle) { - fail_errno("unable to open dir '%s'", dir); - goto fail_dir; - } - - struct dirent *entry; - while ((entry = readdir(dir_handle))) { - if (entry->d_type != DT_REG) continue; - if (!strcmp(entry->d_name, "acc")) continue; - - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%s", query->dir, entry->d_name); - - query->list[query->len] = rill_store_open(file); - if (!query->list[query->len]) continue; - - query->len++; - } - - closedir(dir_handle); + size_t cap = sizeof(query->list) / sizeof(query->list[0]); + query->len = rill_scan_dir(query->dir, query->list, cap); return query; - closedir(dir_handle); - fail_dir: free((char *) query->dir); fail_alloc_dir: free(query); diff --git a/src/rill.h b/src/rill.h index 52b9acb..2d3f690 100644 --- a/src/rill.h +++ b/src/rill.h @@ -156,3 +156,10 @@ struct rill_pairs *rill_query_val( struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out); + + +// ----------------------------------------------------------------------------- +// misc +// ----------------------------------------------------------------------------- + +size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap); diff --git a/src/rill_load.c b/src/rill_load.c index b63e539..64ec1d2 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -54,9 +54,11 @@ int main(int argc, char **argv) keys_range = 1 * 1000 * 1000 * 1000, vals_range = 10 * 1000, vals_per_key = 4, + + acc_cap = keys_per_sec * vals_per_key * rotation_rate, }; - struct rill_acc *acc = rill_acc_open("db", keys_per_sec * rotation_rate); + struct rill_acc *acc = rill_acc_open("db", acc_cap); if (!acc) return 1; struct rng rng = rng_make(0); diff --git a/src/rotate.c b/src/rotate.c index 9f45a8f..f1964d4 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -8,9 +8,9 @@ #include #include +#include #include -#include #include // ----------------------------------------------------------------------------- @@ -24,96 +24,41 @@ static void rotate_acc(const char *dir, rill_ts_t now) char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%010lu.rill", dir, now); - printf("rotate: writing acc to '%s' with timestamp '%lu'\n", file, now); (void) rill_acc_write(acc, file, now); rill_acc_close(acc); } -static size_t load_dir(const char *dir, struct rill_store **list, size_t cap) -{ - DIR *dir_handle = opendir(dir); - if (!dir_handle) return 0; - - size_t len = 0; - struct dirent *entry = NULL; - while ((entry = readdir(dir_handle))) { - if (entry->d_type != DT_REG) continue; - if (!strcmp(entry->d_name, "acc")) continue; - - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%s", dir, entry->d_name); - - list[len] = rill_store_open(file); - if (!list[len]) continue; - - len++; - if (len == cap) { - fail("rotate: too many files to rotate in '%s'", dir); - break; - } - } - - closedir(dir_handle); - return len; -} - -static int store_cmp(const void *l, const void *r) -{ - const struct rill_store *const *lhs = l; - const struct rill_store *const *rhs = r; - - if (rill_store_ts(*lhs) < rill_store_ts(*rhs)) return +1; - if (rill_store_ts(*lhs) > rill_store_ts(*rhs)) return -1; - return 0; -} - -static size_t expire(struct rill_store **list, size_t len, rill_ts_t now) +static ssize_t expire(rill_ts_t now, struct rill_store **list, ssize_t len) { + if (len < 0) return len; if (now < expiration) return len; // mostly for tests. size_t i = 0; - for (; i < len; ++i) { + for (; i < (size_t) len; ++i) { if (rill_store_ts(list[i]) < (now - expiration)) break; } size_t end = i; - for (; i < len; ++i) { - printf("rotate: expiring '%s' with timestamp '%lu < %lu'\n", - rill_store_file(list[i]), rill_store_ts(list[i]), (now - expiration)); + for (; i < (size_t) len; ++i) { rill_store_rm(list[i]); list[i] = NULL; } + return end; } -static bool merge( - struct rill_store **list, size_t len, const char *dir, rill_ts_t now) +static struct rill_store *merge( + const char *dir, + rill_ts_t ts, rill_ts_t quant, + struct rill_store **list, size_t len) { - rill_ts_t earliest = rill_store_ts(list[0]); - - rill_ts_t quant = 0; - if (earliest / hour != now / hour) quant = hour; - if (earliest / day != now / day) quant = day; - if (earliest / month != now / month) quant = month; - - printf("rotate: now=%lu, earliest=%lu, quant=%lu\n", now, earliest, quant); - - if (!quant) return true; - - rill_ts_t oldest = ((now / quant) - 1) * quant; - size_t merge_end = 0; - for (; merge_end < len; ++merge_end) { - if (rill_store_ts(list[merge_end]) < oldest) break; - - printf("rotate: merging '%s' with timestamp '%lu >= %lu'\n", - rill_store_file(list[merge_end]), - rill_store_ts(list[merge_end]), - oldest); + assert(len > 0); + if (len == 1) { + struct rill_store *result = list[0]; + list[0] = NULL; + return result; } - if (merge_end <= 1) return true; - - rill_ts_t ts = oldest + quant - 1; char file[PATH_MAX]; if (quant == hour) @@ -125,40 +70,99 @@ static bool merge( else if (quant == month) snprintf(file, sizeof(file), "%s/%05lu.rill", dir, ts / month); + if (!rill_store_merge(file, ts, quant, list, len)) return NULL; - printf("rotate: merging to '%s' with timestamp '%lu'\n", file, ts); - - if (!rill_store_merge(file, ts, quant, list, merge_end)) return false; - - for (size_t i = 0; i < merge_end; ++i) { - printf("rotate: deleting '%s'\n", rill_store_file(list[i])); + for (size_t i = 0; i < len; ++i) { rill_store_rm(list[i]); list[i] = NULL; } - return true; + return rill_store_open(file); } +static ssize_t merge_quant( + const char *dir, + rill_ts_t now, rill_ts_t quant, + struct rill_store **list, ssize_t len) +{ + if (len <= 1) return len; + + size_t out_len = 0; + struct rill_store *out[(size_t) len]; + + size_t start = 0; + rill_ts_t current_quant = rill_store_ts(list[0]) / quant; + + for (size_t i = 0; i < (size_t) len; i++) { + size_t end = i + 1; + assert(i >= start); + assert(end > start); + + size_t next_ts = i + 1 != (size_t) len ? rill_store_ts(list[i + 1]) : -1UL; + if (next_ts / quant == current_quant) continue; + + rill_ts_t earliest_ts = rill_store_ts(list[start]); + if (earliest_ts / quant != now / quant) { + struct rill_store *store = merge(dir, earliest_ts, quant, list + start, end - start); + if (!store) goto fail; + out[out_len++] = store; + } + + // if a file is in the quant represented by now then we don't want to + // merge it as we're still filling in this quant. Additionally, if it's + // in our current quant then it will also be in all bigger quants so we + // can just forget these files for the rest of the rotation. + else { + for (size_t j = start; j < end; ++j) { + rill_store_close(list[j]); + list[j] = NULL; + } + } + + current_quant = next_ts / quant; + start = i + 1; + } + + for (size_t i = 0; i < (size_t) len; ++i) assert(!list[i]); + memcpy(list, out, out_len * sizeof(out[0])); + return out_len; + + fail: + for (size_t i = 0; i < out_len; ++i) + rill_store_close(out[i]); + + return -1; +} + +static int store_cmp(const void *l, const void *r) +{ + const struct rill_store *const *lhs = l; + const struct rill_store *const *rhs = r; + + // earliest (biggest) to oldest (smallest) + if (rill_store_ts(*lhs) < rill_store_ts(*rhs)) return +1; + if (rill_store_ts(*lhs) > rill_store_ts(*rhs)) return -1; + return 0; +} bool rill_rotate(const char *dir, rill_ts_t now) { - printf("rotate: rotating '%s' at timestamp '%lu'\n", dir, now); + rotate_acc(dir, now); enum { cap = 1024 }; struct rill_store *list[cap]; - size_t len = load_dir(dir, list, cap); - qsort(list, len, sizeof(list[0]), store_cmp); + size_t list_len = rill_scan_dir(dir, list, cap); + qsort(list, list_len, sizeof(list[0]), store_cmp); - // We don't want the latest file in the merge list. - rotate_acc(dir, now); - - len = expire(list, len, now); - if (!len) return true; + ssize_t len = list_len; + len = expire(now, list, len); + len = merge_quant(dir, now, hour, list, len); + len = merge_quant(dir, now, day, list, len); + len = merge_quant(dir, now, month, list, len); - bool ret = merge(list, len, dir, now); - for (size_t i = 0; i < len; ++i) { + for (size_t i = 0; i < list_len; ++i) { if (list[i]) rill_store_close(list[i]); } - return ret; + return len >= 0; } diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..2cdad75 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,43 @@ +/* utils.c + Rémi Attab (remi.attab@gmail.com), 17 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include + + +// ----------------------------------------------------------------------------- +// scan_dir +// ----------------------------------------------------------------------------- + +size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) +{ + DIR *dir_handle = opendir(dir); + if (!dir_handle) return 0; + + size_t len = 0; + struct dirent *entry = NULL; + while ((entry = readdir(dir_handle))) { + if (entry->d_type != DT_REG) continue; + if (!strcmp(entry->d_name, "acc")) continue; + + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%s", dir, entry->d_name); + + list[len] = rill_store_open(file); + if (!list[len]) continue; + + len++; + if (len == cap) { + fail("rotate: too many files to rotate in '%s'", dir); + break; + } + } + + closedir(dir_handle); + return len; +} + diff --git a/src/utils.h b/src/utils.h index b4ea3e1..469c471 100644 --- a/src/utils.h +++ b/src/utils.h @@ -10,7 +10,6 @@ #include #include #include -#include // ----------------------------------------------------------------------------- @@ -57,32 +56,6 @@ static const size_t page_len = page_len_s; #define fail_errno(fmt, ...) \ fprintf(stderr, "[fail] "fmt"(%d): %s\n", __VA_ARGS__, errno, strerror(errno)) -// ----------------------------------------------------------------------------- -// lock -// ----------------------------------------------------------------------------- - -typedef atomic_size_t lock_t; - -static inline void lock(atomic_size_t *l) -{ - bool ret = false; - uint64_t old; - - do { - old = atomic_load_explicit(l, memory_order_relaxed); - if (old) continue; - - ret = atomic_compare_exchange_weak_explicit(l, &old, 1, - memory_order_acquire, memory_order_relaxed); - } while (!ret); -} - - -static inline void unlock(atomic_size_t *l) -{ - atomic_store_explicit(l, 0, memory_order_release); -} - // ----------------------------------------------------------------------------- // vma diff --git a/test/rotate_test.c b/test/rotate_test.c index dbf1fb1..4901b24 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -17,11 +17,12 @@ bool test_rotate(void) rm(dir); const uint64_t key = 1; + enum { step = 10 * min }; { struct rill_acc *acc = rill_acc_open(dir, 1); - for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { + for (rill_ts_t ts = 0; ts < 13 * month; ts += step) { rill_acc_ingest(acc, key, ts + 1); rill_rotate(dir, ts); } @@ -36,7 +37,7 @@ bool test_rotate(void) rill_query_close(query); size_t i = 0; - for (rill_ts_t ts = 0; ts < 13 * month; ts += 1 * hour) { + for (rill_ts_t ts = 0; ts < 13 * month; ts += step) { assert(pairs->data[i].key == key); assert(pairs->data[i].val == ts + 1); ++i; From 08ecfc2824f4e7076221d21e71011a957532022f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 17 Sep 2017 15:09:45 -0400 Subject: [PATCH 12/91] Add weekly rotations --- src/rill_load.c | 4 ++-- src/rotate.c | 36 +++++++++++++++++++++++------------- src/utils.h | 25 +++++++++++++------------ test/rotate_test.c | 12 ++++++------ 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/rill_load.c b/src/rill_load.c index 64ec1d2..39d6c30 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -48,8 +48,8 @@ int main(int argc, char **argv) enum { keys_per_sec = 200, - seconds = 3 * month, - rotation_rate = 10 * min, + seconds = 3 * month_secs, + rotation_rate = 10 * min_secs, keys_range = 1 * 1000 * 1000 * 1000, vals_range = 10 * 1000, diff --git a/src/rotate.c b/src/rotate.c index f1964d4..3c28724 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -13,6 +13,7 @@ #include #include + // ----------------------------------------------------------------------------- // rotate // ----------------------------------------------------------------------------- @@ -32,11 +33,11 @@ static void rotate_acc(const char *dir, rill_ts_t now) static ssize_t expire(rill_ts_t now, struct rill_store **list, ssize_t len) { if (len < 0) return len; - if (now < expiration) return len; // mostly for tests. + if (now < expire_secs) return len; // mostly for tests. size_t i = 0; for (; i < (size_t) len; ++i) { - if (rill_store_ts(list[i]) < (now - expiration)) break; + if (rill_store_ts(list[i]) < (now - expire_secs)) break; } size_t end = i; @@ -60,15 +61,23 @@ static struct rill_store *merge( return result; } + rill_ts_t month = ts / month_secs; + rill_ts_t week = (ts / week_secs) % weeks_in_month; + rill_ts_t day = (ts / day_secs) % days_in_week; + rill_ts_t hour = (ts / hour_secs) % hours_in_day; + char file[PATH_MAX]; - if (quant == hour) - snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu.rill", - dir, ts / month, (ts / day) % days, (ts / hour) % hours); - else if (quant == day) - snprintf(file, sizeof(file), "%s/%05lu-%02lu.rill", - dir, ts / month, (ts / day) % days); - else if (quant == month) - snprintf(file, sizeof(file), "%s/%05lu.rill", dir, ts / month); + if (quant == hour_secs) + snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu-%02lu.rill", + dir, month, week, day, hour); + else if (quant == day_secs) + snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu.rill", dir, month, week, day); + else if (quant == week_secs) + snprintf(file, sizeof(file), "%s/%05lu-%02lu.rill", dir, month, week); + else if (quant == month_secs) + snprintf(file, sizeof(file), "%s/%05lu.rill", dir, month); + else assert(false); + if (!rill_store_merge(file, ts, quant, list, len)) return NULL; @@ -156,9 +165,10 @@ bool rill_rotate(const char *dir, rill_ts_t now) ssize_t len = list_len; len = expire(now, list, len); - len = merge_quant(dir, now, hour, list, len); - len = merge_quant(dir, now, day, list, len); - len = merge_quant(dir, now, month, list, len); + len = merge_quant(dir, now, hour_secs, list, len); + len = merge_quant(dir, now, day_secs, list, len); + len = merge_quant(dir, now, week_secs, list, len); + len = merge_quant(dir, now, month_secs, list, len); for (size_t i = 0; i < list_len; ++i) { if (list[i]) rill_store_close(list[i]); diff --git a/src/utils.h b/src/utils.h index 469c471..1a7fb51 100644 --- a/src/utils.h +++ b/src/utils.h @@ -27,18 +27,19 @@ enum { - hours = 24, - days = 31, - months = 13, -}; - -enum -{ - min = 60, - hour = 60 * min, - day = hours * hour, - month = days * day, - expiration = months * month, + mins_in_hour = 60, + hours_in_day = 24, + days_in_week = 8, // more closely approximates a month + weeks_in_month = 4, + months_in_expire = 13, + + sec_secs = 1, + min_secs = 60 * sec_secs, + hour_secs = mins_in_hour * min_secs, + day_secs = hours_in_day * hour_secs, + week_secs = days_in_week * day_secs, + month_secs = weeks_in_month * week_secs, + expire_secs = months_in_expire * month_secs, }; diff --git a/test/rotate_test.c b/test/rotate_test.c index 4901b24..6758073 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -17,18 +17,18 @@ bool test_rotate(void) rm(dir); const uint64_t key = 1; - enum { step = 10 * min }; + enum { step = 10 * min_secs }; { struct rill_acc *acc = rill_acc_open(dir, 1); - for (rill_ts_t ts = 0; ts < 13 * month; ts += step) { + for (rill_ts_t ts = 0; ts < 13 * month_secs; ts += step) { rill_acc_ingest(acc, key, ts + 1); rill_rotate(dir, ts); } rill_acc_close(acc); - rill_rotate(dir, 13 * month); + rill_rotate(dir, 13 * month_secs); } { @@ -37,7 +37,7 @@ bool test_rotate(void) rill_query_close(query); size_t i = 0; - for (rill_ts_t ts = 0; ts < 13 * month; ts += step) { + for (rill_ts_t ts = 0; ts < 13 * month_secs; ts += step) { assert(pairs->data[i].key == key); assert(pairs->data[i].val == ts + 1); ++i; @@ -47,7 +47,7 @@ bool test_rotate(void) } for (size_t i = 1; i <= 6; ++i) - rill_rotate(dir, (13 + i) * month); + rill_rotate(dir, (13 + i) * month_secs); { struct rill_query *query = rill_query_open(dir); @@ -56,7 +56,7 @@ bool test_rotate(void) for (size_t i = 0; i < pairs->len; ++i) { assert(pairs->data[i].key == key); - assert(pairs->data[i].val >= (5 * month) + 1); + assert(pairs->data[i].val >= (5 * month_secs) + 1); } rill_pairs_free(pairs); From ca14319653e0d2979d2b27e7ec348086e96b6625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 17 Sep 2017 19:40:56 -0400 Subject: [PATCH 13/91] Print to stdout --- src/pairs.c | 8 ++++---- src/store.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/pairs.c b/src/pairs.c index 8840718..1b3b0d5 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -146,13 +146,13 @@ void rill_pairs_print(const struct rill_pairs *pairs) for (size_t i = 0; i < pairs->len; ++i) { const struct rill_kv *kv = &pairs->data[i]; - if (kv->key == key) fprintf(stderr, ", %lu", kv->val); + if (kv->key == key) printf(", %lu", kv->val); else { - if (key != no_key) fprintf(stderr, "]\n"); - fprintf(stderr, " %p: [ %lu", (void *) kv->key, kv->val); + if (key != no_key) printf("]\n"); + printf(" %p: [ %lu", (void *) kv->key, kv->val); key = kv->key; } } - if (pairs->len) fprintf(stderr, " ]\n"); + if (pairs->len) printf(" ]\n"); } diff --git a/src/store.c b/src/store.c index 615a45b..f1c663d 100644 --- a/src/store.c +++ b/src/store.c @@ -494,14 +494,14 @@ struct rill_pairs *rill_store_scan_val( void rill_store_print_head(struct rill_store *store) { - fprintf(stderr, "%s\n", store->file); - fprintf(stderr, "magic: 0x%x\n", store->head->magic); - fprintf(stderr, "version: %u\n", store->head->version); - fprintf(stderr, "ts: %lu\n", store->head->ts); - fprintf(stderr, "quant: %lu\n", store->head->quant); - fprintf(stderr, "keys: %lu\n", store->head->keys); - fprintf(stderr, "vals: %lu\n", store->vals->len); - fprintf(stderr, "pairs: %lu\n", store->head->pairs); + printf("%s\n", store->file); + printf("magic: 0x%x\n", store->head->magic); + printf("version: %u\n", store->head->version); + printf("ts: %lu\n", store->head->ts); + printf("quant: %lu\n", store->head->quant); + printf("keys: %lu\n", store->head->keys); + printf("vals: %lu\n", store->vals->len); + printf("pairs: %lu\n", store->head->pairs); } void rill_store_print(struct rill_store *store) @@ -518,15 +518,15 @@ void rill_store_print(struct rill_store *store) if (!coder_decode(&coder, &kv)) goto fail; if (rill_kv_nil(&kv)) break; - if (kv.key == key) fprintf(stderr, ", %lu", kv.val); + if (kv.key == key) printf(", %lu", kv.val); else { - if (key != no_key) fprintf(stderr, "]\n"); - fprintf(stderr, "%p: [ %lu", (void *) kv.key, kv.val); + if (key != no_key) printf("]\n"); + printf("%p: [ %lu", (void *) kv.key, kv.val); key = kv.key; } } - fprintf(stderr, " ]\n"); + printf(" ]\n"); fail: vma_dont_need(store); From 25c4f47452566ebe708c3436a8a54d051b40cc40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 18 Sep 2017 09:41:20 -0400 Subject: [PATCH 14/91] Add some reserved fields in the store header Index off being the primary future use case for one of these reserved fields. Also has the effect of properly alligning things. --- src/store.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/store.c b/src/store.c index f1c663d..0be1eb5 100644 --- a/src/store.c +++ b/src/store.c @@ -31,7 +31,7 @@ // store // ----------------------------------------------------------------------------- -static const uint32_t version = 2; +static const uint32_t version = 3; static const uint32_t magic = 0x4C4C4952; struct rill_packed header @@ -47,6 +47,8 @@ struct rill_packed header uint64_t vals_off; uint64_t data_off; + + uint64_t reserved[5]; // for future use }; struct rill_store From 7b654343e578a2a16475257298fd901da48712f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 18 Sep 2017 10:11:00 -0400 Subject: [PATCH 15/91] Make query functions const --- src/query.c | 4 ++-- src/rill.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query.c b/src/query.c index 9821284..6c87494 100644 --- a/src/query.c +++ b/src/query.c @@ -63,7 +63,7 @@ void rill_query_close(struct rill_query *query) } struct rill_pairs *rill_query_key( - struct rill_query *query, + const struct rill_query *query, const rill_key_t *keys, size_t len, struct rill_pairs *out) { @@ -80,7 +80,7 @@ struct rill_pairs *rill_query_key( } struct rill_pairs *rill_query_val( - struct rill_query *query, + const struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out) { diff --git a/src/rill.h b/src/rill.h index 2d3f690..7933ffd 100644 --- a/src/rill.h +++ b/src/rill.h @@ -148,12 +148,12 @@ struct rill_query * rill_query_open(const char *dir); void rill_query_close(struct rill_query *db); struct rill_pairs *rill_query_key( - struct rill_query *query, + const struct rill_query *query, const rill_key_t *keys, size_t len, struct rill_pairs *out); struct rill_pairs *rill_query_val( - struct rill_query *query, + const struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out); From 34356bbea4198487bb4ea479c8a25f7736a3b342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Wed, 20 Sep 2017 18:16:53 -0400 Subject: [PATCH 16/91] Avoid dirent.d_type in rill_scan_dir --- src/utils.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/utils.c b/src/utils.c index 2cdad75..76c17d5 100644 --- a/src/utils.c +++ b/src/utils.c @@ -13,16 +13,30 @@ // scan_dir // ----------------------------------------------------------------------------- +bool is_rill_file(const char *name) +{ + static const char ext[] = ".rill"; + + size_t len = strnlen(name, NAME_MAX); + if (len < sizeof(ext)) return false; + + return !strcmp(name + (len - sizeof(ext) + 1), ext); +} + size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) { DIR *dir_handle = opendir(dir); - if (!dir_handle) return 0; + if (!dir_handle) { + if (errno == ENOENT) return 0; + fail_errno("unable to open dir '%s'", dir); + return 0; + } size_t len = 0; struct dirent *entry = NULL; while ((entry = readdir(dir_handle))) { - if (entry->d_type != DT_REG) continue; - if (!strcmp(entry->d_name, "acc")) continue; + // I found the one filesystem that doesn't support dirent->d_type... + if (!is_rill_file(entry->d_name)) continue; char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%s", dir, entry->d_name); @@ -40,4 +54,3 @@ size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) closedir(dir_handle); return len; } - From 3e56ab63b026badd051915e514383d1bb16ac8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Wed, 20 Sep 2017 18:17:25 -0400 Subject: [PATCH 17/91] Add rill_rotate util --- compile.sh | 1 + src/rill_dump.c | 3 --- src/rill_rotate.c | 24 ++++++++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 src/rill_rotate.c diff --git a/compile.sh b/compile.sh index 8646e59..407007d 100755 --- a/compile.sh +++ b/compile.sh @@ -31,6 +31,7 @@ ar rcs librill.a $OBJ gcc -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS +gcc -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS diff --git a/src/rill_dump.c b/src/rill_dump.c index 9257262..b0945e7 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -12,9 +12,6 @@ int main(int argc, char **argv) { - (void) argc, (void) argv; - - const char *file = NULL; bool header_only = false; diff --git a/src/rill_rotate.c b/src/rill_rotate.c new file mode 100644 index 0000000..9f55e03 --- /dev/null +++ b/src/rill_rotate.c @@ -0,0 +1,24 @@ +/* rill_rotate.c + Rémi Attab (remi.attab@gmail.com), 20 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" + +#include +#include + +int main(int argc, const char **argv) +{ + if (argc != 2) { + fprintf(stderr, "./rill_rotate "); + return 1; + } + + struct timespec ts; + (void) clock_gettime(CLOCK_REALTIME, &ts); + + printf("rotating '%s' at '%lu'\n", argv[1], ts.tv_sec); + return rill_rotate(argv[1], ts.tv_sec) ? 0 : 1; +} + From 71b66ed8c7c10ba67c32468e58632ab5b51d5aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 23 Sep 2017 11:10:55 -0400 Subject: [PATCH 18/91] Add iterator to store --- src/rill.h | 5 +++++ src/store.c | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/rill.h b/src/rill.h index 7933ffd..1a77ca0 100644 --- a/src/rill.h +++ b/src/rill.h @@ -83,6 +83,7 @@ void rill_pairs_print(const struct rill_pairs *pairs); // ----------------------------------------------------------------------------- struct rill_store; +struct rill_store_it; struct rill_store *rill_store_open(const char *file); void rill_store_close(struct rill_store *store); @@ -112,6 +113,10 @@ struct rill_pairs *rill_store_scan_val( const rill_val_t *vals, size_t len, struct rill_pairs *out); +struct rill_store_it *rill_store_begin(struct rill_store *store); +void rill_store_it_free(struct rill_store_it *it); +bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv); + void rill_store_print_head(struct rill_store *store); void rill_store_print(struct rill_store *store); diff --git a/src/store.c b/src/store.c index 0be1eb5..2def867 100644 --- a/src/store.c +++ b/src/store.c @@ -494,6 +494,29 @@ struct rill_pairs *rill_store_scan_val( return NULL; } + +struct rill_store_it { struct coder decoder; }; + +struct rill_store_it *rill_store_begin(struct rill_store *store) +{ + struct rill_store_it *it = calloc(1, sizeof(*it)); + if (!it) return NULL; + + it->decoder = store_decoder(store); + return it; +} + +void rill_store_it_free(struct rill_store_it *it) +{ + free(it); +} + +bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv) +{ + return coder_decode(&it->decoder, kv); +} + + void rill_store_print_head(struct rill_store *store) { printf("%s\n", store->file); From 5b4fabfffe843e23b3350130b773f4d43c235284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 23 Sep 2017 11:11:40 -0400 Subject: [PATCH 19/91] Add extra dump utility --- compile.sh | 1 + src/rill_historical_dump.c | 134 +++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 src/rill_historical_dump.c diff --git a/compile.sh b/compile.sh index 407007d..9a305cd 100755 --- a/compile.sh +++ b/compile.sh @@ -31,6 +31,7 @@ ar rcs librill.a $OBJ gcc -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS +gcc -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS gcc -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder diff --git a/src/rill_historical_dump.c b/src/rill_historical_dump.c new file mode 100644 index 0000000..9d8db4d --- /dev/null +++ b/src/rill_historical_dump.c @@ -0,0 +1,134 @@ +/* rill_historical_dump.c + Rémi Attab (remi.attab@gmail.com), 22 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + + +#include "rill.h" +#include "htable.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static const uint64_t val_mask = 1UL << 63; + +static void read_utf8(int fd, char *out, size_t to_read) +{ + size_t len = 0; + for (size_t i = 0; i < to_read; ++i) { + uint8_t c; + assert(read(fd, &c, sizeof(c)) > 0); + out[len++] = c; + + size_t extra = 0; + if (c >= 0xc0) extra++; + if (c >= 0xe0) extra++; + for (size_t j = 0; j < extra; ++j) { + assert(read(fd, &c, sizeof(c)) > 0); + out[len++] = c; + } + } + + out[len++] = 0; +} + +static void read_table(const char *file, struct htable *table) +{ + htable_reset(table); + + int fd = open(file, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "file not there %s: %s", file, strerror(errno)); + abort(); + } + + ssize_t ret; + while (true) { + uint64_t len = 0; + assert(ret = read(fd, &len, sizeof(len)) >= 0); + if (!ret || !len) break; + + char *name = calloc(len * 4 + 1, sizeof(*name)); + read_utf8(fd, name, len); + + uint64_t key = 0; + assert(read(fd, &key, sizeof(key)) > 0); + + uint64_t type = 0; + assert(read(fd, &type, sizeof(type)) > 0); + + assert ((key & val_mask) == 0); + if (type == 12) key |= val_mask; + else assert(type == 13); + + { + uint64_t val = key & ~val_mask; + size_t type = key & val_mask ? 1 : 0; + static const char *type_str[] = {"set", "rov"}; + printf("%s %lu -> %s\n", type_str[type], val, name); + } + + assert(htable_put(table, key, (uint64_t)name).ok); + } +} + +void dump_store(const char *file, struct htable *table) +{ + struct rill_store *store = rill_store_open(file); + assert(store); + + rill_store_print_head(store); + + rill_key_t current = 0; + struct rill_kv kv = {0}; + struct rill_store_it *it = rill_store_begin(store); + while (rill_store_it_next(it, &kv)) { + if (rill_kv_nil(&kv)) break; + + if (kv.key != current) { + current = kv.key; + printf("%p:\n", (void *) kv.key); + } + + uint64_t val = kv.val & ~val_mask; + size_t type = kv.val & val_mask ? 1 : 0; + static const char *type_str[] = {"set", "rov"}; + printf(" %s %lu", type_str[type], val); + + struct htable_ret ret = htable_get(table, kv.val); + if (ret.ok) printf(" -> %s", (char *) ret.value); + printf("\n"); + } + + rill_store_close(store); +} + +int main(int argc, char **argv) +{ + const char *store_file = NULL; + const char *table_file = NULL; + + if (argc != 3) { + fprintf(stderr, + "invalid number of arguments\n" + " rill_historical_dump "); + return 1; + } + + table_file = argv[argc - 1]; + store_file = argv[argc - 2]; + + struct htable table = {0}; + read_table(table_file, &table); + dump_store(store_file, &table); + + return 0; +} From 503277943f51f7327cc39c457f002287ceb931c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 1 Oct 2017 08:59:56 -0400 Subject: [PATCH 20/91] Add val dump functions to store --- src/rill.h | 4 +++ src/rill_historical_dump.c | 67 +++++++++++++++++++++++++++++--------- src/rill_load.c | 14 +++----- src/store.c | 14 ++++++++ 4 files changed, 73 insertions(+), 26 deletions(-) diff --git a/src/rill.h b/src/rill.h index 1a77ca0..dd42c0b 100644 --- a/src/rill.h +++ b/src/rill.h @@ -103,6 +103,7 @@ bool rill_store_rm(struct rill_store *store); const char * rill_store_file(const struct rill_store *store); rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); +size_t rill_store_vals(const struct rill_store *store); struct rill_pairs *rill_store_scan_key( struct rill_store *store, @@ -113,6 +114,9 @@ struct rill_pairs *rill_store_scan_val( const rill_val_t *vals, size_t len, struct rill_pairs *out); +size_t rill_store_dump_vals( + const struct rill_store *store, rill_val_t *out, size_t cap); + struct rill_store_it *rill_store_begin(struct rill_store *store); void rill_store_it_free(struct rill_store_it *it); bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv); diff --git a/src/rill_historical_dump.c b/src/rill_historical_dump.c index 9d8db4d..4d75380 100644 --- a/src/rill_historical_dump.c +++ b/src/rill_historical_dump.c @@ -20,7 +20,7 @@ static const uint64_t val_mask = 1UL << 63; -static void read_utf8(int fd, char *out, size_t to_read) +void read_utf8(int fd, char *out, size_t to_read) { size_t len = 0; for (size_t i = 0; i < to_read; ++i) { @@ -57,7 +57,8 @@ static void read_table(const char *file, struct htable *table) if (!ret || !len) break; char *name = calloc(len * 4 + 1, sizeof(*name)); - read_utf8(fd, name, len); + //read_utf8(fd, name, len); + assert(read(fd, name, len) > 0); uint64_t key = 0; assert(read(fd, &key, sizeof(key)) > 0); @@ -69,7 +70,7 @@ static void read_table(const char *file, struct htable *table) if (type == 12) key |= val_mask; else assert(type == 13); - { + if (false) { uint64_t val = key & ~val_mask; size_t type = key & val_mask ? 1 : 0; static const char *type_str[] = {"set", "rov"}; @@ -80,7 +81,33 @@ static void read_table(const char *file, struct htable *table) } } -void dump_store(const char *file, struct htable *table) +void print_val(rill_val_t val, struct htable *table) +{ + uint64_t id = val & ~val_mask; + size_t type = val & val_mask ? 1 : 0; + static const char *type_str[] = {"set", "rov"}; + printf(" %s %lu", type_str[type], id); + + struct htable_ret ret = htable_get(table, val); + if (ret.ok) printf(" -> %s", (char *) ret.value); + printf("\n"); +} + +void dump_vals(const char *file, struct htable *table) +{ + struct rill_store *store = rill_store_open(file); + assert(store); + + size_t len = rill_store_vals(store); + rill_val_t *vals = calloc(1, len * sizeof(*vals)); + rill_store_dump_vals(store, vals, len); + + printf("values:\n"); + for (size_t i = 0; i < len; ++i) + print_val(vals[i], table); +} + +void dump_keys(const char *file, struct htable *table) { struct rill_store *store = rill_store_open(file); assert(store); @@ -98,14 +125,7 @@ void dump_store(const char *file, struct htable *table) printf("%p:\n", (void *) kv.key); } - uint64_t val = kv.val & ~val_mask; - size_t type = kv.val & val_mask ? 1 : 0; - static const char *type_str[] = {"set", "rov"}; - printf(" %s %lu", type_str[type], val); - - struct htable_ret ret = htable_get(table, kv.val); - if (ret.ok) printf(" -> %s", (char *) ret.value); - printf("\n"); + print_val(kv.val, table); } rill_store_close(store); @@ -115,20 +135,35 @@ int main(int argc, char **argv) { const char *store_file = NULL; const char *table_file = NULL; + bool vals_dump = false; + + if (argc == 4) { + if (strcmp(argv[1], "-v") != 0) { + printf("unknown arg '%s'", argv[1]); + return 1; + } + vals_dump = true; + } - if (argc != 3) { + if (argc == 3 || argc == 4) { + table_file = argv[argc - 1]; + store_file = argv[argc - 2]; + } + else { fprintf(stderr, "invalid number of arguments\n" " rill_historical_dump
"); return 1; } - table_file = argv[argc - 1]; - store_file = argv[argc - 2]; struct htable table = {0}; read_table(table_file, &table); - dump_store(store_file, &table); + + if (vals_dump) + dump_vals(store_file, &table); + else + dump_keys(store_file, &table); return 0; } diff --git a/src/rill_load.c b/src/rill_load.c index 39d6c30..d1185e4 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -35,12 +35,6 @@ void rm(const char *path) rmdir(path); } -uint64_t rng_gen_val(struct rng *rng, uint64_t min, uint64_t range) -{ - uint64_t max = rng_gen_range(rng, 0, range) + 1; - return rng_gen_range(rng, min, min + max) + 1; -} - int main(int argc, char **argv) { (void) argc, (void) argv; @@ -51,8 +45,8 @@ int main(int argc, char **argv) seconds = 3 * month_secs, rotation_rate = 10 * min_secs, - keys_range = 1 * 1000 * 1000 * 1000, - vals_range = 10 * 1000, + keys_range = 20 * 1000 * 1000, + vals_range = 100 * 1000, vals_per_key = 4, acc_cap = keys_per_sec * vals_per_key * rotation_rate, @@ -64,10 +58,10 @@ int main(int argc, char **argv) struct rng rng = rng_make(0); for (size_t ts = 0; ts < seconds; ++ts) { for (size_t i = 0; i < keys_per_sec; ++i) { - uint64_t key = rng_gen_val(&rng, 0, keys_range); + uint64_t key = rng_gen_range(&rng, 0, keys_range) + 1; for (size_t j = 0; j < vals_per_key; ++j) { - uint64_t val = rng_gen_val(&rng, 0, vals_range); + uint64_t val = rng_gen_range(&rng, 0, vals_range) + 1; rill_acc_ingest(acc, key, val); } } diff --git a/src/store.c b/src/store.c index 2def867..aee3cdd 100644 --- a/src/store.c +++ b/src/store.c @@ -432,6 +432,12 @@ size_t rill_store_quant(const struct rill_store *store) return store->head->quant; } +size_t rill_store_vals(const struct rill_store *store) +{ + return store->vals->len; +} + + struct rill_pairs *rill_store_scan_key( struct rill_store *store, const rill_key_t *keys, size_t len, @@ -494,6 +500,14 @@ struct rill_pairs *rill_store_scan_val( return NULL; } +size_t rill_store_dump_vals( + const struct rill_store *store, rill_val_t *out, size_t cap) +{ + size_t len = cap < store->vals->len ? cap : store->vals->len; + memcpy(out, store->vals->data, len * sizeof(*out)); + return len; +} + struct rill_store_it { struct coder decoder; }; From fb406810e03bc10a9e8d7bbfc9674fd7003cd154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 1 Oct 2017 09:48:42 -0400 Subject: [PATCH 21/91] Add write stamp in store --- src/store.c | 43 ++++++++++++++++++++++++++++++++++++------- src/utils.h | 2 ++ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/store.c b/src/store.c index aee3cdd..a5dd00d 100644 --- a/src/store.c +++ b/src/store.c @@ -31,8 +31,10 @@ // store // ----------------------------------------------------------------------------- -static const uint32_t version = 3; static const uint32_t magic = 0x4C4C4952; +static const uint64_t stamp = 0xFFFFFFFFFFFFFFFFUL; +static const uint32_t version = 4; +static const uint32_t supported_versions[] = { 3, 4 }; struct rill_packed header { @@ -48,7 +50,9 @@ struct rill_packed header uint64_t vals_off; uint64_t data_off; - uint64_t reserved[5]; // for future use + uint64_t reserved[4]; // for future use + + uint64_t stamp; }; struct rill_store @@ -108,6 +112,13 @@ static inline void vma_dont_need(struct rill_store *store) // reader // ----------------------------------------------------------------------------- +static bool is_supported_version(uint32_t version) +{ + for (size_t i = 0; i < array_len(supported_versions); ++i) + if (version == supported_versions[i]) return true; + return false; +} + struct rill_store *rill_store_open(const char *file) { struct rill_store *store = calloc(1, sizeof(*store)); @@ -158,15 +169,23 @@ struct rill_store *rill_store_open(const char *file) goto fail_magic; } - if (store->head->version != version) { - fail("unknown version '%du' for '%s'", store->head->version, file); + if (!is_supported_version(store->head->version)) { + fail("invalid version '%du' for '%s'", store->head->version, file); goto fail_version; } + if (store->head->version >= 4) { + if (store->head->stamp != stamp) { + fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); + goto fail_stamp; + } + } + return store; fail_version: fail_magic: + fail_stamp: munmap(store->vma, store->vma_len); fail_mmap: close(store->fd); @@ -254,18 +273,27 @@ static bool writer_open( static void writer_close(struct rill_store *store, size_t len) { - munmap(store->vma, store->vma_len); - if (len) { if (ftruncate(store->fd, len) == -1) fail_errno("unable to resize '%s'", store->file); if (fdatasync(store->fd) == -1) - fail_errno("unable to fsync '%s'", store->file); + fail_errno("unable to fdatasync data '%s'", store->file); + + // Indicate that the file has been fully written and is ready for + // use. An additional sync is required for the stamp to ensure that the + // data is... + // - ... properly persisted before we delete it (durability) + // - ... only persisted after all the data has been persisted (ordering) + store->head->stamp = stamp; + if (fdatasync(store->fd) == -1) + fail_errno("unable to fdatasync stamp '%s'", store->file); + } else if (unlink(store->file) == -1) fail_errno("unable to unlink '%s'", store->file); + munmap(store->vma, store->vma_len); close(store->fd); } @@ -536,6 +564,7 @@ void rill_store_print_head(struct rill_store *store) printf("%s\n", store->file); printf("magic: 0x%x\n", store->head->magic); printf("version: %u\n", store->head->version); + printf("stamp: %p\n", (void *) store->head->stamp); printf("ts: %lu\n", store->head->ts); printf("quant: %lu\n", store->head->quant); printf("keys: %lu\n", store->head->keys); diff --git a/src/utils.h b/src/utils.h index 1a7fb51..d001f3d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -25,6 +25,8 @@ // misc // ----------------------------------------------------------------------------- +#define array_len(arr) (sizeof((arr)) / sizeof((arr)[0])) + enum { mins_in_hour = 60, From fdf244fc443916e9299d6ba3e870b2654638a7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 1 Oct 2017 10:23:18 -0400 Subject: [PATCH 22/91] Improve error handling --- src/acc.c | 24 ++++++------- src/coder.c | 4 +-- src/pairs.c | 10 ++++-- src/query.c | 4 +-- src/rill.h | 23 +++++++++++- src/rill_dump.c | 3 +- src/rill_historical_dump.c | 5 ++- src/rill_load.c | 6 ++-- src/rill_rotate.c | 7 ++-- src/store.c | 42 +++++++++++----------- src/utils.c | 72 ++++++++++++++++++++++++++++++++++++-- src/utils.h | 47 ++++++++++++++++--------- 12 files changed, 181 insertions(+), 66 deletions(-) diff --git a/src/acc.c b/src/acc.c index 0b31085..4ea7810 100644 --- a/src/acc.c +++ b/src/acc.c @@ -67,18 +67,18 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) struct rill_acc *acc = calloc(1, sizeof(*acc)); if (!acc) { - fail("unable to allocate memory for '%s'", dir); + rill_fail("unable to allocate memory for '%s'", dir); goto fail_alloc_struct; } acc->dir = strndup(dir, PATH_MAX); if (!acc->dir) { - fail("unable to allocate memory for '%s'", dir); + rill_fail("unable to allocate memory for '%s'", dir); goto fail_alloc_dir; } if (mkdir(dir, 0775) == -1 && errno != EEXIST) { - fail_errno("unable to open create dir '%s'", dir); + rill_fail_errno("unable to open create dir '%s'", dir); goto fail_mkdir; } @@ -89,7 +89,7 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) struct stat stat_ret = {0}; if (stat(file, &stat_ret) == -1) { if (errno != ENOENT) { - fail_errno("unable to stat '%s'", file); + rill_fail_errno("unable to stat '%s'", file); goto fail_stat; } @@ -101,21 +101,21 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) else acc->fd = open(file, O_RDWR); if (acc->fd == -1) { - fail_errno("unable to create '%s'", file); + rill_fail_errno("unable to create '%s'", file); goto fail_open; } if (create) { acc->vma_len = to_vma_len(sizeof(*acc->head) + cap * sizeof(*acc->data)); if (ftruncate(acc->fd, acc->vma_len) == -1) { - fail_errno("unable to ftruncate '%s' to len '%lu'", file, acc->vma_len); + rill_fail_errno("unable to ftruncate '%s' to len '%lu'", file, acc->vma_len); goto fail_truncate; } } else { size_t len = stat_ret.st_size; if (len < sizeof(struct header)) { - fail("invalid size for '%s'", file); + rill_fail("invalid size for '%s'", file); goto fail_size; } @@ -124,7 +124,7 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) acc->vma = mmap(NULL, acc->vma_len, PROT_READ | PROT_WRITE, MAP_SHARED, acc->fd, 0); if (acc->vma == MAP_FAILED) { - fail_errno("unable to mmap '%s' of len '%lu'", file, acc->vma_len); + rill_fail_errno("unable to mmap '%s' of len '%lu'", file, acc->vma_len); goto fail_mmap; } @@ -138,12 +138,12 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) } else { if (acc->head->magic != magic) { - fail("invalid magic '0x%x' for '%s'", acc->head->magic, file); + rill_fail("invalid magic '0x%x' for '%s'", acc->head->magic, file); goto fail_magic; } if (acc->head->version != version) { - fail("unknown version '%du' for '%s'", acc->head->version, file); + rill_fail("unknown version '%du' for '%s'", acc->head->version, file); goto fail_version; } } @@ -193,7 +193,7 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) { struct rill_pairs *pairs = rill_pairs_new(acc->head->len); if (!pairs) { - fail("unable to allocate pairs for len '%lu'", acc->head->len); + rill_fail("unable to allocate pairs for len '%lu'", acc->head->len); return false; } @@ -221,7 +221,7 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) } if (!rill_store_write(file, now, 0, pairs)) { - fail("unable to write acc file '%s'", file); + rill_fail("unable to write acc file '%s'", file); goto fail_write; } diff --git a/src/coder.c b/src/coder.c index e1f5bc5..03804de 100644 --- a/src/coder.c +++ b/src/coder.c @@ -249,7 +249,7 @@ static struct coder make_encoder(struct vals *vals, uint8_t *it, uint8_t *end) static inline bool coder_read_key(struct coder *coder, rill_key_t *key) { if (rill_unlikely(coder->it + sizeof(*key) > coder->end)) { - fail("unable to decode key: %p + %lu = %p > %p'\n", + rill_fail("unable to decode key: %p + %lu = %p > %p'\n", (void *) coder->it, sizeof(*key), (void *) (coder->it + sizeof(*key)), (void *) coder->end); @@ -265,7 +265,7 @@ static inline bool coder_read_key(struct coder *coder, rill_key_t *key) static inline bool coder_read_val(struct coder *coder, rill_val_t *val) { if (!leb128_decode(&coder->it, coder->end, val)) { - fail("unable to decode value at '%p-%p'\n", + rill_fail("unable to decode value at '%p-%p'\n", (void *) coder->it, (void *) coder->end); return false; } diff --git a/src/pairs.c b/src/pairs.c index 1b3b0d5..304a221 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -35,7 +35,10 @@ struct rill_pairs *rill_pairs_new(size_t cap) struct rill_pairs *pairs = calloc(1, sizeof(*pairs) + cap * sizeof(pairs->data[0])); - if (!pairs) return NULL; + if (!pairs) { + rill_fail("unable to alloc pairs: cap=%lu", cap); + return NULL; + } pairs->cap = cap; return pairs; @@ -62,7 +65,10 @@ struct rill_pairs *rill_pairs_push( size_t cap = adjust_cap(pairs->cap, pairs->len + 1); pairs = realloc(pairs, sizeof(*pairs) + cap * sizeof(pairs->data[0])); - if (!pairs) return NULL; + if (!pairs) { + rill_fail("unable to realloc pairs: cap=%lu", cap); + return NULL; + } pairs->cap = cap; } diff --git a/src/query.c b/src/query.c index 6c87494..dc42d58 100644 --- a/src/query.c +++ b/src/query.c @@ -31,13 +31,13 @@ struct rill_query * rill_query_open(const char *dir) { struct rill_query *query = calloc(1, sizeof(*query)); if (!query) { - fail("unable to allocate memory for '%s'", dir); + rill_fail("unable to allocate memory for '%s'", dir); goto fail_alloc_struct; } query->dir = strndup(dir, PATH_MAX); if (!query->dir) { - fail("unable to allocate memory for '%s'", dir); + rill_fail("unable to allocate memory for '%s'", dir); goto fail_alloc_dir; } diff --git a/src/rill.h b/src/rill.h index dd42c0b..85622a9 100644 --- a/src/rill.h +++ b/src/rill.h @@ -9,6 +9,28 @@ #include #include + +// ----------------------------------------------------------------------------- +// error +// ----------------------------------------------------------------------------- + +enum { rill_err_msg_cap = 1024 }; + +struct rill_error +{ + const char *file; + int line; + + int errno_; // errno can be a macro hence the underscore. + char msg[rill_err_msg_cap]; +}; + +extern __thread struct rill_error rill_errno; + +void rill_perror(struct rill_error *err); +size_t rill_strerror(struct rill_error *err, char *dest, size_t len); + + // ----------------------------------------------------------------------------- // types // ----------------------------------------------------------------------------- @@ -77,7 +99,6 @@ struct rill_pairs *rill_pairs_scan_val( void rill_pairs_print(const struct rill_pairs *pairs); - // ----------------------------------------------------------------------------- // store // ----------------------------------------------------------------------------- diff --git a/src/rill_dump.c b/src/rill_dump.c index b0945e7..f38ec3c 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -4,6 +4,7 @@ */ #include "rill.h" +#include "utils.h" #include #include @@ -31,7 +32,7 @@ int main(int argc, char **argv) } struct rill_store *store = rill_store_open(file); - if (!store) return 1; + if (!store) rill_exit(1); rill_store_print_head(store); if (!header_only) rill_store_print(store); diff --git a/src/rill_historical_dump.c b/src/rill_historical_dump.c index 4d75380..ab333de 100644 --- a/src/rill_historical_dump.c +++ b/src/rill_historical_dump.c @@ -5,6 +5,7 @@ #include "rill.h" +#include "utils.h" #include "htable.h" #include @@ -105,12 +106,14 @@ void dump_vals(const char *file, struct htable *table) printf("values:\n"); for (size_t i = 0; i < len; ++i) print_val(vals[i], table); + + free(vals); } void dump_keys(const char *file, struct htable *table) { struct rill_store *store = rill_store_open(file); - assert(store); + if (!store) rill_exit(1); rill_store_print_head(store); diff --git a/src/rill_load.c b/src/rill_load.c index d1185e4..5b364ad 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -53,7 +53,7 @@ int main(int argc, char **argv) }; struct rill_acc *acc = rill_acc_open("db", acc_cap); - if (!acc) return 1; + if (!acc) rill_abort(); struct rng rng = rng_make(0); for (size_t ts = 0; ts < seconds; ++ts) { @@ -67,12 +67,12 @@ int main(int argc, char **argv) } if (ts % rotation_rate == 0) { - if (!rill_rotate("db", ts)) return 0; + if (!rill_rotate("db", ts)) rill_abort(); } } rill_acc_close(acc); - if (!rill_rotate("db", seconds + 60 * 60)) return 1; + if (!rill_rotate("db", seconds + 60 * 60)) rill_abort(); return 0; } diff --git a/src/rill_rotate.c b/src/rill_rotate.c index 9f55e03..bd2f5db 100644 --- a/src/rill_rotate.c +++ b/src/rill_rotate.c @@ -4,6 +4,7 @@ */ #include "rill.h" +#include "utils.h" #include #include @@ -11,7 +12,7 @@ int main(int argc, const char **argv) { if (argc != 2) { - fprintf(stderr, "./rill_rotate "); + fprintf(stderr, "./rill_rotate \n"); return 1; } @@ -19,6 +20,8 @@ int main(int argc, const char **argv) (void) clock_gettime(CLOCK_REALTIME, &ts); printf("rotating '%s' at '%lu'\n", argv[1], ts.tv_sec); - return rill_rotate(argv[1], ts.tv_sec) ? 0 : 1; + if (!rill_rotate(argv[1], ts.tv_sec)) rill_exit(1); + + return 0; } diff --git a/src/store.c b/src/store.c index a5dd00d..97424b6 100644 --- a/src/store.c +++ b/src/store.c @@ -98,13 +98,13 @@ static struct coder store_decoder(struct rill_store *store) static inline void vma_will_need(struct rill_store *store) { if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) - fail("unable to madvise '%s'", store->file); + rill_fail("unable to madvise '%s'", store->file); } static inline void vma_dont_need(struct rill_store *store) { if (madvise(store->vma, store->vma_len, MADV_DONTNEED) == -1) - fail("unable to madvise '%s'", store->file); + rill_fail("unable to madvise '%s'", store->file); } @@ -123,25 +123,25 @@ struct rill_store *rill_store_open(const char *file) { struct rill_store *store = calloc(1, sizeof(*store)); if (!store) { - fail("unable to allocate memory for '%s'", file); + rill_fail("unable to allocate memory for '%s'", file); goto fail_alloc_struct; } store->file = strndup(file, PATH_MAX); if (!store->file) { - fail("unable to allocate memory for '%s'", file); + rill_fail("unable to allocate memory for '%s'", file); goto fail_alloc_file; } struct stat stat_ret = {0}; if (stat(file, &stat_ret) == -1) { - fail_errno("unable to stat '%s'", file); + rill_fail_errno("unable to stat '%s'", file); goto fail_stat; } size_t len = stat_ret.st_size; if (len < sizeof(struct header)) { - fail("invalid size for '%s'", file); + rill_fail("invalid size for '%s'", file); goto fail_size; } @@ -149,13 +149,13 @@ struct rill_store *rill_store_open(const char *file) store->fd = open(file, O_RDONLY); if (store->fd == -1) { - fail_errno("unable to open '%s'", file); + rill_fail_errno("unable to open '%s'", file); goto fail_open; } store->vma = mmap(NULL, store->vma_len, PROT_READ, MAP_SHARED, store->fd, 0); if (store->vma == MAP_FAILED) { - fail_errno("unable to mmap '%s' of len '%lu'", file, store->vma_len); + rill_fail_errno("unable to mmap '%s' of len '%lu'", file, store->vma_len); goto fail_mmap; } @@ -165,18 +165,18 @@ struct rill_store *rill_store_open(const char *file) store->end = (void *) ((uintptr_t) store->vma + store->vma_len); if (store->head->magic != magic) { - fail("invalid magic '0x%x' for '%s'", store->head->magic, file); + rill_fail("invalid magic '0x%x' for '%s'", store->head->magic, file); goto fail_magic; } if (!is_supported_version(store->head->version)) { - fail("invalid version '%du' for '%s'", store->head->version, file); + rill_fail("invalid version '%du' for '%s'", store->head->version, file); goto fail_version; } if (store->head->version >= 4) { if (store->head->stamp != stamp) { - fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); + rill_fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); goto fail_stamp; } } @@ -210,7 +210,7 @@ void rill_store_close(struct rill_store *store) bool rill_store_rm(struct rill_store *store) { if (unlink(store->file) == -1) { - fail_errno("unable to unlink '%s'", store->file); + rill_fail_errno("unable to unlink '%s'", store->file); return false; } @@ -232,20 +232,20 @@ static bool writer_open( store->fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0640); if (store->fd == -1) { - fail_errno("unable to open '%s'", file); + rill_fail_errno("unable to open '%s'", file); goto fail_open; } size_t len = sizeof(struct header) + cap; if (ftruncate(store->fd, len) == -1) { - fail_errno("unable to resize '%s'", file); + rill_fail_errno("unable to resize '%s'", file); goto fail_truncate; } store->vma_len = to_vma_len(len); store->vma = mmap(NULL, store->vma_len, PROT_WRITE | PROT_READ, MAP_SHARED, store->fd, 0); if (store->vma == MAP_FAILED) { - fail_errno("unable to mmap '%s'", file); + rill_fail_errno("unable to mmap '%s'", file); goto fail_mmap; } @@ -275,10 +275,10 @@ static void writer_close(struct rill_store *store, size_t len) { if (len) { if (ftruncate(store->fd, len) == -1) - fail_errno("unable to resize '%s'", store->file); + rill_fail_errno("unable to resize '%s'", store->file); if (fdatasync(store->fd) == -1) - fail_errno("unable to fdatasync data '%s'", store->file); + rill_fail_errno("unable to fdatasync data '%s'", store->file); // Indicate that the file has been fully written and is ready for // use. An additional sync is required for the stamp to ensure that the @@ -287,11 +287,11 @@ static void writer_close(struct rill_store *store, size_t len) // - ... only persisted after all the data has been persisted (ordering) store->head->stamp = stamp; if (fdatasync(store->fd) == -1) - fail_errno("unable to fdatasync stamp '%s'", store->file); + rill_fail_errno("unable to fdatasync stamp '%s'", store->file); } else if (unlink(store->file) == -1) - fail_errno("unable to unlink '%s'", store->file); + rill_fail_errno("unable to unlink '%s'", store->file); munmap(store->vma, store->vma_len); close(store->fd); @@ -329,7 +329,7 @@ bool rill_store_write( struct rill_store store = {0}; if (!writer_open(&store, file, cap, ts, quant)) { - fail("unable to create '%s'", file); + rill_fail("unable to create '%s'", file); goto fail_open; } @@ -385,7 +385,7 @@ bool rill_store_merge( struct rill_store store = {0}; if (!writer_open(&store, file, cap, ts, quant)) { - fail("unable to create '%s'", file); + rill_fail("unable to create '%s'", file); goto fail_open; } diff --git a/src/utils.c b/src/utils.c index 76c17d5..222f044 100644 --- a/src/utils.c +++ b/src/utils.c @@ -6,14 +6,80 @@ #include "rill.h" #include "utils.h" +#include +#include +#include #include +#include + + +// ----------------------------------------------------------------------------- +// error +// ----------------------------------------------------------------------------- + +__thread struct rill_error rill_errno = { 0 }; + +void rill_abort() +{ + rill_perror(&rill_errno); + abort(); +} + +void rill_exit(int code) +{ + rill_perror(&rill_errno); + exit(code); +} + +size_t rill_strerror(struct rill_error *err, char *dest, size_t len) +{ + if (!err->errno_) { + return snprintf(dest, len, "%s:%d: %s\n", + err->file, err->line, err->msg); + } + else { + return snprintf(dest, len, "%s:%d: %s - %s(%d)\n", + err->file, err->line, err->msg, + strerror(err->errno_), err->errno_); + } +} + +void rill_perror(struct rill_error *err) +{ + char buf[128 + rill_err_msg_cap]; + size_t len = rill_strerror(err, buf, sizeof(buf)); + + if (write(2, buf, len) == -1) + fprintf(stderr, "rill_perror failed: %s", strerror(errno)); +} + + +void rill_vfail(const char *file, int line, const char *fmt, ...) +{ + rill_errno = (struct rill_error) { .errno_ = 0, .file = file, .line = line }; + + va_list args; + va_start(args, fmt); + (void) vsnprintf(rill_errno.msg, rill_err_msg_cap, fmt, args); + va_end(args); +} + +void rill_vfail_errno(const char *file, int line, const char *fmt, ...) +{ + rill_errno = (struct rill_error) { .errno_ = errno, .file = file, .line = line }; + + va_list args; + va_start(args, fmt); + (void) vsnprintf(rill_errno.msg, rill_err_msg_cap, fmt, args); + va_end(args); +} // ----------------------------------------------------------------------------- // scan_dir // ----------------------------------------------------------------------------- -bool is_rill_file(const char *name) +static bool is_rill_file(const char *name) { static const char ext[] = ".rill"; @@ -28,7 +94,7 @@ size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) DIR *dir_handle = opendir(dir); if (!dir_handle) { if (errno == ENOENT) return 0; - fail_errno("unable to open dir '%s'", dir); + rill_fail_errno("unable to open dir '%s'", dir); return 0; } @@ -46,7 +112,7 @@ size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) len++; if (len == cap) { - fail("rotate: too many files to rotate in '%s'", dir); + rill_fail("rotate: too many files to rotate in '%s'", dir); break; } } diff --git a/src/utils.h b/src/utils.h index d001f3d..c3be9f3 100644 --- a/src/utils.h +++ b/src/utils.h @@ -13,10 +13,12 @@ // ----------------------------------------------------------------------------- -// attributes +// compiler // ----------------------------------------------------------------------------- #define rill_packed __attribute__((__packed__)) +#define rill_noreturn __attribute__((noreturn)) +#define rill_printf(x,y) __attribute__((format(printf, x, y))) #define rill_likely(x) __builtin_expect(x, 1) #define rill_unlikely(x) __builtin_expect(x, 0) @@ -27,6 +29,31 @@ #define array_len(arr) (sizeof((arr)) / sizeof((arr)[0])) + +// ----------------------------------------------------------------------------- +// err +// ----------------------------------------------------------------------------- + +void rill_abort() rill_noreturn; +void rill_exit(int code) rill_noreturn; + +void rill_vfail(const char *file, int line, const char *fmt, ...) + rill_printf(3, 4); + +void rill_vfail_errno(const char *file, int line, const char *fmt, ...) + rill_printf(3, 4); + +#define rill_fail(...) \ + rill_vfail(__FILE__, __LINE__, __VA_ARGS__) + +#define rill_fail_errno(...) \ + rill_vfail_errno(__FILE__, __LINE__, __VA_ARGS__) + + +// ----------------------------------------------------------------------------- +// time +// ----------------------------------------------------------------------------- + enum { mins_in_hour = 60, @@ -45,25 +72,13 @@ enum }; -enum { page_len_s = 4096 }; -static const size_t page_len = page_len_s; - - -// ----------------------------------------------------------------------------- -// err -// ----------------------------------------------------------------------------- - -#define fail(fmt, ...) \ - fprintf(stderr, "[fail] "fmt"\n", __VA_ARGS__) - -#define fail_errno(fmt, ...) \ - fprintf(stderr, "[fail] "fmt"(%d): %s\n", __VA_ARGS__, errno, strerror(errno)) - - // ----------------------------------------------------------------------------- // vma // ----------------------------------------------------------------------------- +enum { page_len_s = 4096 }; +static const size_t page_len = page_len_s; + static inline size_t to_vma_len(size_t len) { if (!(len % page_len)) return len; From 2035a7c4aae53583da4e56e19eb6feaa73d35393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 24 Sep 2017 09:52:03 -0400 Subject: [PATCH 23/91] Add index to store file --- src/coder.c | 206 +++++++++++++-------------------------------- src/index.c | 106 +++++++++++++++++++++++ src/query.c | 21 ++++- src/rill.h | 11 ++- src/rill_load.c | 2 +- src/store.c | 146 ++++++++++++++++++++++---------- src/vals.c | 108 ++++++++++++++++++++++++ test/coder_test.c | 30 ++++++- test/rotate_test.c | 4 +- 9 files changed, 428 insertions(+), 206 deletions(-) create mode 100644 src/index.c create mode 100644 src/vals.c diff --git a/src/coder.c b/src/coder.c index 03804de..c9897d6 100644 --- a/src/coder.c +++ b/src/coder.c @@ -44,130 +44,38 @@ static inline bool leb128_decode(uint8_t **it, uint8_t *end, uint64_t *val) } + // ----------------------------------------------------------------------------- -// vals +// encode // ----------------------------------------------------------------------------- -struct rill_packed vals -{ - uint64_t len; - uint64_t data[]; -}; - -typedef struct htable vals_rev_t; - -static rill_val_t vals_itov(struct vals *vals, size_t index) -{ - assert(index <= vals->len); - return vals->data[index - 1]; -} +static const size_t coder_max_val_len = sizeof(rill_val_t) + 2 + 1; -static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) +struct encoder { - if (!val) return 0; // \todo giant hack for coder_finish + uint8_t *it, *start, *end; - struct htable_ret ret = htable_get(rev, val); - assert(ret.ok); - return ret.value; -} + size_t keys; + rill_key_t key; -static void vals_rev_make(struct vals *vals, vals_rev_t *rev) -{ - htable_reset(rev); - htable_reserve(rev, vals->len); + vals_rev_t rev; + struct indexer *indexer; - for (size_t index = 1; index <= vals->len; ++index) { - struct htable_ret ret = htable_put(rev, vals->data[index-1], index); - assert(ret.ok); - } -} + size_t pairs; +}; -static int val_cmp(const void *l, const void *r) +static size_t coder_cap(size_t pairs) { - rill_val_t lhs = *((rill_val_t *) l); - rill_val_t rhs = *((rill_val_t *) r); - - if (lhs < rhs) return -1; - if (lhs > rhs) return 1; - return 0; + return coder_max_val_len * (pairs + 1); } -static void vals_compact(struct vals *vals) +static uint64_t coder_off(struct encoder *coder) { - assert(vals->len); - qsort(vals->data, vals->len, sizeof(vals->data[0]), &val_cmp); - - size_t j = 0; - for (size_t i = 1; i < vals->len; ++i) { - if (vals->data[j] == vals->data[i]) continue; - vals->data[++j] = vals->data[i]; - } - - assert(j + 1 <= vals->len); - vals->len = j + 1; + return coder->it - coder->start; } -static struct vals *vals_from_pairs(struct rill_pairs *pairs) -{ - struct vals *vals = - calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * pairs->len); - if (!vals) return NULL; - - vals->len = pairs->len; - for (size_t i = 0; i < pairs->len; ++i) - vals->data[i] = pairs->data[i].val; - vals_compact(vals); - return vals; -} - -static struct vals *vals_merge(struct vals *vals, struct vals *merge) -{ - if (!vals) { - size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; - vals = calloc(1, len); - memcpy(vals, merge, len); - return vals; - } - - vals = realloc(vals, - sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); - if (!vals) return NULL; - - memcpy( vals->data + vals->len, - merge->data, - sizeof(merge->data[0]) * merge->len); - vals->len += merge->len; - - vals_compact(vals); - return vals; -} - - -// ----------------------------------------------------------------------------- -// coder -// ----------------------------------------------------------------------------- - -static const size_t coder_max_val_len = sizeof(rill_val_t) + 2 + 1; - -struct coder -{ - struct vals *vals; - vals_rev_t rev; - - rill_key_t key; - uint8_t *it; - uint8_t *end; - - size_t keys; - size_t pairs; -}; - -// ----------------------------------------------------------------------------- -// encode -// ----------------------------------------------------------------------------- - -static inline bool coder_write_sep(struct coder *coder) +static inline bool coder_write_sep(struct encoder *coder) { if (rill_unlikely(coder->it + 1 > coder->end)) return false; @@ -177,17 +85,7 @@ static inline bool coder_write_sep(struct coder *coder) return true; } -static inline bool coder_write_key(struct coder *coder, rill_key_t key) -{ - if (rill_unlikely(coder->it + sizeof(key) > coder->end)) return false; - - memcpy(coder->it, &key, sizeof(key)); - coder->it += sizeof(key); - - return true; -} - -static inline bool coder_write_val(struct coder *coder, rill_val_t val) +static inline bool coder_write_val(struct encoder *coder, rill_val_t val) { val = vals_vtoi(&coder->rev, val); @@ -202,15 +100,15 @@ static inline bool coder_write_val(struct coder *coder, rill_val_t val) return true; } -static bool coder_encode(struct coder *coder, const struct rill_kv *kv) +static bool coder_encode(struct encoder *coder, const struct rill_kv *kv) { if (coder->key != kv->key) { if (rill_likely(coder->key)) { if (!coder_write_sep(coder)) return false; } + indexer_put(coder->indexer, kv->key, coder_off(coder)); coder->key = kv->key; - if (!coder_write_key(coder, kv->key)) return false; coder->keys++; } @@ -220,49 +118,44 @@ static bool coder_encode(struct coder *coder, const struct rill_kv *kv) return true; } -static bool coder_finish(struct coder *coder) +static bool coder_finish(struct encoder *coder) { if (!coder_write_sep(coder)) return false; - if (!coder_write_key(coder, 0)) return false; + if (!coder_write_sep(coder)) return false; htable_reset(&coder->rev); return true; } -static struct coder make_encoder(struct vals *vals, uint8_t *it, uint8_t *end) +static struct encoder make_encoder( + uint8_t *start, uint8_t *end, struct vals *vals, struct indexer *indexer) { - struct coder coder = { - .vals = vals, - .it = it, - .end = end, + struct encoder coder = { + .it = start, .start = start, .end = end, + .indexer = indexer, }; - vals_rev_make(coder.vals, &coder.rev); + vals_rev_make(vals, &coder.rev); return coder; } // ----------------------------------------------------------------------------- -// decode +// decoder // ----------------------------------------------------------------------------- -static inline bool coder_read_key(struct coder *coder, rill_key_t *key) +struct decoder { - if (rill_unlikely(coder->it + sizeof(*key) > coder->end)) { - rill_fail("unable to decode key: %p + %lu = %p > %p'\n", - (void *) coder->it, sizeof(*key), - (void *) (coder->it + sizeof(*key)), - (void *) coder->end); - return false; - } + uint8_t *it, *end; - memcpy(key, coder->it, sizeof(*key)); - coder->it += sizeof(*key); + size_t keys; + rill_key_t key; - return true; -} + struct vals *vals; + struct index *index; +}; -static inline bool coder_read_val(struct coder *coder, rill_val_t *val) +static inline bool coder_read_val(struct decoder *coder, rill_val_t *val) { if (!leb128_decode(&coder->it, coder->end, val)) { rill_fail("unable to decode value at '%p-%p'\n", @@ -274,7 +167,7 @@ static inline bool coder_read_val(struct coder *coder, rill_val_t *val) return true; } -static bool coder_decode(struct coder *coder, struct rill_kv *kv) +static bool coder_decode(struct decoder *coder, struct rill_kv *kv) { if (rill_likely(coder->key)) { kv->key = coder->key; @@ -282,18 +175,33 @@ static bool coder_decode(struct coder *coder, struct rill_kv *kv) if (kv->val) return true; } - if (!coder_read_key(coder, &coder->key)) return false; + coder->key = index_get(coder->index, coder->keys); + coder->keys++; + kv->key = coder->key; if (!kv->key) return true; // eof return coder_read_val(coder, &kv->val); } -static struct coder make_decoder(struct vals *vals, uint8_t *it, uint8_t *end) +static struct decoder make_decoder( + uint8_t *it, uint8_t *end, struct vals *vals, struct index *index) +{ + return (struct decoder) { + .it = it, .end = end, + .vals = vals, + .index = index, + }; +} + +static struct decoder make_decoder_at( + uint8_t *it, uint8_t *end, + struct vals *vals, struct index *index, size_t key_idx) { - return (struct coder) { + return (struct decoder) { + .it = it, .end = end, + .keys = key_idx, .vals = vals, - .it = it, - .end = end, + .index = index, }; } diff --git a/src/index.c b/src/index.c new file mode 100644 index 0000000..cfc0028 --- /dev/null +++ b/src/index.c @@ -0,0 +1,106 @@ +/* index.c + Rémi Attab (remi.attab@gmail.com), 24 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + + +// ----------------------------------------------------------------------------- +// config +// ----------------------------------------------------------------------------- + +struct rill_packed index_kv +{ + rill_key_t key; + uint64_t off; +}; + +struct rill_packed index +{ + uint64_t len; + uint64_t slope; + struct index_kv data[]; +}; + +// ----------------------------------------------------------------------------- +// indexer +// ----------------------------------------------------------------------------- + +struct indexer +{ + size_t len, cap; + struct index_kv kvs[]; +}; + +static size_t indexer_cap(size_t pairs) +{ + return sizeof(struct index) + pairs * sizeof(struct index_kv); +} + + +static struct indexer *indexer_alloc(size_t cap) +{ + assert(cap); + + struct indexer *indexer = calloc(1, sizeof(*indexer) + cap * sizeof(indexer->kvs[0])); + if (!indexer) return NULL; + + indexer->cap = cap; + return indexer; +} + +static void indexer_free(struct indexer *indexer) +{ + free(indexer); +} + +static void indexer_put(struct indexer *indexer, rill_key_t key, uint64_t off) +{ + indexer->kvs[indexer->len] = (struct index_kv) { .key = key, .off = off }; + indexer->len++; + + assert(indexer->len <= indexer->cap); +} + +static size_t indexer_write(struct indexer *indexer, struct index *index) +{ + index->len = indexer->len; + + uint64_t min = indexer->kvs[0].key; + uint64_t max = indexer->kvs[indexer->len - 1].key; + index->slope = (max - min) / indexer->len; + if (!index->slope) index->slope = 1; + + size_t len = indexer->len * sizeof(indexer->kvs[0]); + memcpy(index->data, indexer->kvs, len); + + return sizeof(*index) + len; +} + +// One pass interpolation search. We assume that the keys are hashes and +// therefore uniformly distributed. So a single jump should get us close enough +// to our goal. +static bool index_find( + struct index *index, rill_key_t key, size_t *key_idx, uint64_t *off) +{ + size_t i = (key - index->data[0].key) / index->slope; + if (i >= index->len) i = index->len - 1; + + while (i && key < index->data[i].key) i--; + + for (; i < index->len; ++i) { + struct index_kv *kv = &index->data[i]; + if (key > kv->key) break; + if (key != kv->key) continue; + + *key_idx = i; + *off = kv->off; + return true; + } + + return false; +} + +static rill_key_t index_get(struct index *index, size_t i) +{ + return i < index->len ? index->data[i].key : 0; +} diff --git a/src/query.c b/src/query.c index dc42d58..6c57b3e 100644 --- a/src/query.c +++ b/src/query.c @@ -63,6 +63,21 @@ void rill_query_close(struct rill_query *query) } struct rill_pairs *rill_query_key( + const struct rill_query *query, rill_key_t key, struct rill_pairs *out) +{ + if (!key) return out; + + struct rill_pairs *result = out; + for (size_t i = 0; i < query->len; ++i) { + result = rill_store_query_key(query->list[i], key, result); + if (!result) return NULL; + } + + rill_pairs_compact(result); + return result; +} + +struct rill_pairs *rill_query_keys( const struct rill_query *query, const rill_key_t *keys, size_t len, struct rill_pairs *out) @@ -71,7 +86,7 @@ struct rill_pairs *rill_query_key( struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_scan_key(query->list[i], keys, len, result); + result = rill_store_scan_keys(query->list[i], keys, len, result); if (!result) return NULL; } @@ -79,7 +94,7 @@ struct rill_pairs *rill_query_key( return result; } -struct rill_pairs *rill_query_val( +struct rill_pairs *rill_query_vals( const struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out) @@ -88,7 +103,7 @@ struct rill_pairs *rill_query_val( struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_scan_val(query->list[i], vals, len, result); + result = rill_store_scan_vals(query->list[i], vals, len, result); if (!result) return result; } diff --git a/src/rill.h b/src/rill.h index 85622a9..ee03d0b 100644 --- a/src/rill.h +++ b/src/rill.h @@ -126,11 +126,13 @@ rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); size_t rill_store_vals(const struct rill_store *store); -struct rill_pairs *rill_store_scan_key( +struct rill_pairs *rill_store_query_key( + struct rill_store *store, rill_key_t key, struct rill_pairs *out); +struct rill_pairs *rill_store_scan_keys( struct rill_store *store, const rill_key_t *keys, size_t len, struct rill_pairs *out); -struct rill_pairs *rill_store_scan_val( +struct rill_pairs *rill_store_scan_vals( struct rill_store *store, const rill_val_t *vals, size_t len, struct rill_pairs *out); @@ -178,11 +180,14 @@ struct rill_query * rill_query_open(const char *dir); void rill_query_close(struct rill_query *db); struct rill_pairs *rill_query_key( + const struct rill_query *query, rill_key_t key, struct rill_pairs *out); + +struct rill_pairs *rill_query_keys( const struct rill_query *query, const rill_key_t *keys, size_t len, struct rill_pairs *out); -struct rill_pairs *rill_query_val( +struct rill_pairs *rill_query_vals( const struct rill_query *query, const rill_val_t *vals, size_t len, struct rill_pairs *out); diff --git a/src/rill_load.c b/src/rill_load.c index 5b364ad..ea451cd 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -42,7 +42,7 @@ int main(int argc, char **argv) enum { keys_per_sec = 200, - seconds = 3 * month_secs, + seconds = 1 * month_secs, rotation_rate = 10 * min_secs, keys_range = 20 * 1000 * 1000, diff --git a/src/store.c b/src/store.c index 97424b6..1fe0f87 100644 --- a/src/store.c +++ b/src/store.c @@ -24,6 +24,8 @@ // impl // ----------------------------------------------------------------------------- +#include "vals.c" +#include "index.c" #include "coder.c" @@ -33,8 +35,8 @@ static const uint32_t magic = 0x4C4C4952; static const uint64_t stamp = 0xFFFFFFFFFFFFFFFFUL; -static const uint32_t version = 4; -static const uint32_t supported_versions[] = { 3, 4 }; +static const uint32_t version = 5; +static const uint32_t supported_versions[] = { 5 }; struct rill_packed header { @@ -49,8 +51,9 @@ struct rill_packed header uint64_t vals_off; uint64_t data_off; + uint64_t index_off; - uint64_t reserved[4]; // for future use + uint64_t reserved[3]; // for future use uint64_t stamp; }; @@ -66,6 +69,7 @@ struct rill_store struct header *head; struct vals *vals; uint8_t *data; + struct index *index; uint8_t *end; }; @@ -74,20 +78,30 @@ struct rill_store // coder // ----------------------------------------------------------------------------- -static struct coder store_encoder(struct rill_store *store) +static struct encoder store_encoder( + struct rill_store *store, struct indexer *indexer) { return make_encoder( - store->vals, store->vma + store->head->data_off, - store->vma + store->vma_len); + store->vma + store->vma_len, + store->vals, indexer); } -static struct coder store_decoder(struct rill_store *store) +static struct decoder store_decoder(struct rill_store *store) { return make_decoder( - store->vals, store->vma + store->head->data_off, - store->vma + store->vma_len); + store->vma + store->vma_len, + store->vals, store->index); +} + +static struct decoder store_decoder_at( + struct rill_store *store, size_t key_idx, uint64_t off) +{ + return make_decoder_at( + store->vma + store->head->data_off + off, + store->vma + store->vma_len, + store->vals, store->index, key_idx); } @@ -162,6 +176,7 @@ struct rill_store *rill_store_open(const char *file) store->head = store->vma; store->vals = (void *) ((uintptr_t) store->vma + store->head->vals_off); store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); + store->index = (void *) ((uintptr_t) store->vma + store->head->index_off); store->end = (void *) ((uintptr_t) store->vma + store->vma_len); if (store->head->magic != magic) { @@ -271,9 +286,30 @@ static bool writer_open( return false; } -static void writer_close(struct rill_store *store, size_t len) +static struct encoder writer_begin( + struct rill_store *store, + const struct vals *vals, + struct indexer *indexer) +{ + size_t len = sizeof(*vals) + sizeof(vals->data[0]) * vals->len; + assert(store->head->vals_off + len < store->vma_len); + + memcpy(store->vals, vals, len); + + store->head->data_off = store->head->vals_off + len; + store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); + + return store_encoder(store, indexer); +} + +static void writer_close( + struct rill_store *store, struct indexer *indexer, size_t len) { if (len) { + store->head->index_off = len; + store->index = (void *) ((uintptr_t) store->vma + store->head->index_off); + len += indexer_write(indexer, store->index); + if (ftruncate(store->fd, len) == -1) rill_fail_errno("unable to resize '%s'", store->file); @@ -297,20 +333,6 @@ static void writer_close(struct rill_store *store, size_t len) close(store->fd); } -static struct coder writer_begin( - struct rill_store *store, const struct vals *vals) -{ - size_t len = sizeof(*vals) + sizeof(vals->data[0]) * vals->len; - assert(store->head->vals_off + len < store->vma_len); - - memcpy(store->vals, vals, len); - - store->head->data_off = store->head->vals_off + len; - store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); - - return store_encoder(store); -} - bool rill_store_write( const char *file, rill_ts_t ts, size_t quant, @@ -322,10 +344,7 @@ bool rill_store_write( struct vals *vals = vals_from_pairs(pairs); if (!vals) goto fail_vals; - size_t cap = - sizeof(struct vals) + (sizeof(vals->data[0]) * vals->len) + - (sizeof(rill_key_t) * (pairs->len + 1)) + - (coder_max_val_len * (pairs->len + 1)); + size_t cap = vals_cap(vals) + coder_cap(pairs->len) + indexer_cap(pairs->len); struct rill_store store = {0}; if (!writer_open(&store, file, cap, ts, quant)) { @@ -333,7 +352,8 @@ bool rill_store_write( goto fail_open; } - struct coder coder = writer_begin(&store, vals); + struct indexer *indexer = indexer_alloc(pairs->len); + struct encoder coder = writer_begin(&store, vals, indexer); for (size_t i = 0; i < pairs->len; ++i) { if (!coder_encode(&coder, &pairs->data[i])) goto fail_encode; @@ -343,12 +363,15 @@ bool rill_store_write( store.head->keys = coder.keys; store.head->pairs = coder.pairs; - writer_close(&store, (uintptr_t) coder.it - (uintptr_t) store.vma); + writer_close(&store, indexer, store.head->data_off + coder_off(&coder)); + + indexer_free(indexer); free(vals); return true; fail_encode: - writer_close(&store, 0); + writer_close(&store, indexer, 0); + indexer_free(indexer); fail_open: free(vals); fail_vals: @@ -362,11 +385,11 @@ bool rill_store_merge( { assert(list_len > 1); - size_t cap = 0; + size_t cap = 0, pairs = 0; struct vals *vals = NULL; struct it { struct rill_kv kv; - struct coder decoder; + struct decoder decoder; } its[list_len]; size_t it_len = 0; @@ -378,6 +401,7 @@ bool rill_store_merge( its[it_len].decoder = store_decoder(list[i]); cap += list[i]->vma_len; + pairs += list[i]->head->keys; it_len++; } @@ -389,7 +413,8 @@ bool rill_store_merge( goto fail_open; } - struct coder encoder = writer_begin(&store, vals); + struct indexer *indexer = indexer_alloc(pairs); + struct encoder encoder = writer_begin(&store, vals, indexer); for (size_t i = 0; i < it_len; ++i) { if (!(coder_decode(&its[i].decoder, &its[i].kv))) goto fail_coder; @@ -423,17 +448,19 @@ bool rill_store_merge( store.head->pairs = encoder.pairs; if (!coder_finish(&encoder)) goto fail_coder; - writer_close(&store, (uintptr_t) encoder.it - (uintptr_t) store.vma); + writer_close(&store, indexer, store.head->data_off + coder_off(&encoder)); for (size_t i = 0; i < list_len; ++i) { if (list[i]) vma_dont_need(list[i]); } + indexer_free(indexer); free(vals); return true; fail_coder: - writer_close(&store, 0); + writer_close(&store, indexer, 0); + indexer_free(indexer); fail_open: free(vals); fail_vals: @@ -466,7 +493,36 @@ size_t rill_store_vals(const struct rill_store *store) } -struct rill_pairs *rill_store_scan_key( +struct rill_pairs *rill_store_query_key( + struct rill_store *store, rill_key_t key, struct rill_pairs *out) +{ + struct rill_pairs *result = out; + + size_t key_idx = 0; + uint64_t off = 0; + if (!index_find(store->index, key, &key_idx, &off)) return result; + + struct rill_kv kv = {0}; + struct decoder coder = store_decoder_at(store, key_idx, off); + + while (true) { + if (!coder_decode(&coder, &kv)) goto fail; + if (rill_kv_nil(&kv)) break; + if (kv.key != key) break; + + result = rill_pairs_push(result, kv.key, kv.val); + if (!result) goto fail; + } + + return result; + + fail: + // \todo potentially leaking result + return NULL; +} + + +struct rill_pairs *rill_store_scan_keys( struct rill_store *store, const rill_key_t *keys, size_t len, struct rill_pairs *out) @@ -475,7 +531,7 @@ struct rill_pairs *rill_store_scan_key( struct rill_kv kv = {0}; struct rill_pairs *result = out; - struct coder coder = store_decoder(store); + struct decoder coder = store_decoder(store); while (true) { if (!coder_decode(&coder, &kv)) goto fail; @@ -485,7 +541,7 @@ struct rill_pairs *rill_store_scan_key( if (kv.key != keys[j]) continue; result = rill_pairs_push(result, kv.key, kv.val); - if (!result) return NULL; + if (!result) goto fail; } } @@ -493,11 +549,12 @@ struct rill_pairs *rill_store_scan_key( return result; fail: + // \todo potentially leaking result vma_dont_need(store); return NULL; } -struct rill_pairs *rill_store_scan_val( +struct rill_pairs *rill_store_scan_vals( struct rill_store *store, const rill_val_t *vals, size_t len, struct rill_pairs *out) @@ -506,7 +563,7 @@ struct rill_pairs *rill_store_scan_val( struct rill_kv kv = {0}; struct rill_pairs *result = out; - struct coder coder = store_decoder(store); + struct decoder coder = store_decoder(store); for (size_t i = 0; i < store->head->pairs; ++i) { if (!coder_decode(&coder, &kv)) goto fail; @@ -516,7 +573,7 @@ struct rill_pairs *rill_store_scan_val( if (kv.val != vals[j]) continue; result = rill_pairs_push(result, kv.key, kv.val); - if (!result) return NULL; + if (!result) goto fail; } } @@ -524,6 +581,7 @@ struct rill_pairs *rill_store_scan_val( return result; fail: + // \todo potentially leaking result vma_dont_need(store); return NULL; } @@ -537,7 +595,7 @@ size_t rill_store_dump_vals( } -struct rill_store_it { struct coder decoder; }; +struct rill_store_it { struct decoder decoder; }; struct rill_store_it *rill_store_begin(struct rill_store *store) { @@ -577,7 +635,7 @@ void rill_store_print(struct rill_store *store) vma_will_need(store); struct rill_kv kv = {0}; - struct coder coder = store_decoder(store); + struct decoder coder = store_decoder(store); const rill_key_t no_key = -1ULL; rill_key_t key = no_key; diff --git a/src/vals.c b/src/vals.c new file mode 100644 index 0000000..259e70d --- /dev/null +++ b/src/vals.c @@ -0,0 +1,108 @@ +/* vals.c + Rémi Attab (remi.attab@gmail.com), 01 Oct 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +// ----------------------------------------------------------------------------- +// vals +// ----------------------------------------------------------------------------- + +struct rill_packed vals +{ + uint64_t len; + uint64_t data[]; +}; + +typedef struct htable vals_rev_t; + +static size_t vals_cap(struct vals *vals) +{ + return sizeof(*vals) + vals->len * sizeof(vals->data[0]); +} + +static rill_val_t vals_itov(struct vals *vals, size_t index) +{ + assert(index <= vals->len); + return vals->data[index - 1]; +} + +static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) +{ + if (!val) return 0; // \todo giant hack for coder_finish + + struct htable_ret ret = htable_get(rev, val); + assert(ret.ok); + return ret.value; +} + +static void vals_rev_make(struct vals *vals, vals_rev_t *rev) +{ + htable_reset(rev); + htable_reserve(rev, vals->len); + + for (size_t index = 1; index <= vals->len; ++index) { + struct htable_ret ret = htable_put(rev, vals->data[index-1], index); + assert(ret.ok); + } +} + +static int val_cmp(const void *l, const void *r) +{ + rill_val_t lhs = *((rill_val_t *) l); + rill_val_t rhs = *((rill_val_t *) r); + + if (lhs < rhs) return -1; + if (lhs > rhs) return 1; + return 0; +} + +static void vals_compact(struct vals *vals) +{ + assert(vals->len); + qsort(vals->data, vals->len, sizeof(vals->data[0]), &val_cmp); + + size_t j = 0; + for (size_t i = 1; i < vals->len; ++i) { + if (vals->data[j] == vals->data[i]) continue; + vals->data[++j] = vals->data[i]; + } + + assert(j + 1 <= vals->len); + vals->len = j + 1; +} + +static struct vals *vals_from_pairs(struct rill_pairs *pairs) +{ + struct vals *vals = + calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * pairs->len); + if (!vals) return NULL; + + vals->len = pairs->len; + for (size_t i = 0; i < pairs->len; ++i) + vals->data[i] = pairs->data[i].val; + + vals_compact(vals); + return vals; +} + +static struct vals *vals_merge(struct vals *vals, struct vals *merge) +{ + if (!vals) { + size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; + vals = calloc(1, len); + memcpy(vals, merge, len); + return vals; + } + + vals = realloc(vals, + sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); + if (!vals) return NULL; + + memcpy( vals->data + vals->len, + merge->data, + sizeof(merge->data[0]) * merge->len); + vals->len += merge->len; + + vals_compact(vals); + return vals; +} diff --git a/test/coder_test.c b/test/coder_test.c index 4319f75..4b4b941 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -5,7 +5,7 @@ #include "test.h" -#include "coder.c" +#include "store.c" // ----------------------------------------------------------------------------- @@ -94,6 +94,7 @@ static void check_vals(struct rill_pairs *pairs, struct vals *exp) free(vals); free(exp); free(pairs); + htable_reset(&rev); } static void check_vals_merge(struct vals *a, struct vals *b, struct vals *exp) @@ -104,8 +105,9 @@ static void check_vals_merge(struct vals *a, struct vals *b, struct vals *exp) for (size_t i = 0; i < exp->len; ++i) assert(result->data[i] == exp->data[i]); - free(a); + free(result); free(b); + free(exp); } bool test_vals(void) @@ -142,16 +144,21 @@ void check_coder(struct rill_pairs *pairs) size_t cap = (pairs->len + 1) * (sizeof(pairs->data[0]) + 3); uint8_t *buffer = calloc(1, cap); struct vals *vals = vals_from_pairs(pairs); + struct index *index = calloc(1, sizeof(*index) + pairs->len * sizeof(index->data[0])); size_t len = 0; { - struct coder coder = make_encoder(vals, buffer, buffer + cap); + struct indexer *indexer = indexer_alloc(pairs->len); + struct encoder coder = make_encoder(buffer, buffer + cap, vals, indexer); for (size_t i = 0; i < pairs->len; ++i) assert(coder_encode(&coder, &pairs->data[i])); assert(coder_finish(&coder)); len = coder.it - buffer; assert(len <= cap); + + indexer_write(indexer, index); + indexer_free(indexer); } if (false) { // hex dump for debuging @@ -167,7 +174,7 @@ void check_coder(struct rill_pairs *pairs) } { - struct coder coder = make_decoder(vals, buffer, buffer + len); + struct decoder coder = make_decoder(buffer, buffer + len, vals, index); struct rill_kv kv = {0}; for (size_t i = 0; i < pairs->len; ++i) { @@ -179,6 +186,21 @@ void check_coder(struct rill_pairs *pairs) assert(rill_kv_nil(&kv)); } + for (size_t i = 0; i < pairs->len; ++i) { + size_t key_idx; uint64_t off; + assert(index_find(index, pairs->data[i].key, &key_idx, &off)); + struct decoder coder = make_decoder_at( + buffer + off, buffer + len, vals, index, key_idx); + + struct rill_kv kv = {0}; + do { + assert(coder_decode(&coder, &kv)); + assert(kv.key == pairs->data[i].key); + } while (kv.val != pairs->data[i].val); + } + + free(buffer); + free(index); free(vals); free(pairs); } diff --git a/test/rotate_test.c b/test/rotate_test.c index 6758073..e8bfe0a 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -33,7 +33,7 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_pairs *pairs = rill_query_key(query, &key, 1, rill_pairs_new(1)); + struct rill_pairs *pairs = rill_query_keys(query, &key, 1, rill_pairs_new(1)); rill_query_close(query); size_t i = 0; @@ -51,7 +51,7 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_pairs *pairs = rill_query_key(query, &key, 1, rill_pairs_new(1)); + struct rill_pairs *pairs = rill_query_keys(query, &key, 1, rill_pairs_new(1)); rill_query_close(query); for (size_t i = 0; i < pairs->len; ++i) { From a6298c8fe599c059515283a7df2c3ba3de51dacd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 1 Oct 2017 20:26:05 -0400 Subject: [PATCH 24/91] Add rill_query utils --- compile.sh | 1 + src/rill_query.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 src/rill_query.c diff --git a/compile.sh b/compile.sh index 9a305cd..5f4f410 100755 --- a/compile.sh +++ b/compile.sh @@ -31,6 +31,7 @@ ar rcs librill.a $OBJ gcc -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS gcc -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS +gcc -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS gcc -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS gcc -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS diff --git a/src/rill_query.c b/src/rill_query.c new file mode 100644 index 0000000..907c0bf --- /dev/null +++ b/src/rill_query.c @@ -0,0 +1,78 @@ +/* rill_query.c + Rémi Attab (remi.attab@gmail.com), 01 Oct 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include + +#include +#include +#include + +bool is_file(const char *path) +{ + struct stat st = {0}; + stat(path, &st); + return S_ISREG(st.st_mode); +} + +void usage() +{ + fprintf(stderr, "rill_query [-k |-v ] \n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + rill_key_t key = 0; + rill_val_t val = 0; + + int opt = 0; + while ((opt = getopt(argc, argv, "k:v:")) != -1) { + switch (opt) { + case 'k': key = atoi(optarg); break; + case 'v': val = atoi(optarg); break; + default: usage(); exit(1); + } + } + + if (key && val) { usage(); } + if (!key && !val) { usage(); } + if (optind >= argc) { usage(); } + + const char *db = argv[optind]; + struct rill_pairs *pairs = rill_pairs_new(100); + + if (is_file(db)) { + struct rill_store *store = rill_store_open(db); + if (!store) rill_exit(1); + + if (key) pairs = rill_store_query_key(store, key, pairs); + else pairs = rill_store_scan_vals(store, &val, 1, pairs); + + rill_store_close(store); + } + else { + struct rill_query *query = rill_query_open(db); + if (!query) rill_exit(1); + + if (key) pairs = rill_query_key(query, key, pairs); + else pairs = rill_query_vals(query, &val, 1, pairs); + + rill_query_close(query); + } + + if (!pairs) rill_exit(1); + + for (size_t i = 0; i < pairs->len; ++i) { + if (key) printf("%lu\n", pairs->data[i].val); + else printf("%p\n", (void *) pairs->data[i].key); + } + + rill_pairs_free(pairs); + return 0; +} From a610346b2651aa1843b0f78849144eb9fd538d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 9 Oct 2017 08:58:01 -0400 Subject: [PATCH 25/91] Fix various potential leaks Mostly for error conditions --- src/acc.c | 25 +++++++++++-------------- src/coder.c | 5 ++++- src/index.c | 5 ++++- src/store.c | 10 ++++++++-- src/vals.c | 6 +++++- 5 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/acc.c b/src/acc.c index 4ea7810..3795294 100644 --- a/src/acc.c +++ b/src/acc.c @@ -93,7 +93,7 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) goto fail_stat; } - if (cap == rill_acc_read_only) return false; + if (cap == rill_acc_read_only) goto fail_read_only; create = true; acc->fd = open(file, O_RDWR | O_CREAT | O_EXCL | O_NOATIME, 0644); @@ -158,6 +158,7 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) fail_truncate: close(acc->fd); fail_open: + fail_read_only: fail_stat: fail_mkdir: free((char *) acc->dir); @@ -191,15 +192,9 @@ void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val) bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) { - struct rill_pairs *pairs = rill_pairs_new(acc->head->len); - if (!pairs) { - rill_fail("unable to allocate pairs for len '%lu'", acc->head->len); - return false; - } - size_t start = atomic_load_explicit(&acc->head->read, memory_order_acquire); size_t end = atomic_load_explicit(&acc->head->write, memory_order_acquire); - if (start == end) goto done; + if (start == end) return true; assert(start < end); if (end - start > acc->head->len) { @@ -208,15 +203,17 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) start = end - acc->head->len; } - struct rill_pairs *ret = NULL; + struct rill_pairs *pairs = rill_pairs_new(end - start); + if (!pairs) { + rill_fail("unable to allocate pairs for len '%lu'", acc->head->len); + goto fail_pairs_alloc; + } + for (size_t i = start; i < end; ++i) { size_t index = i % acc->head->len; struct kv *kv = &acc->data[index]; - /* printf("read: [%lu] %lu/%lu -> %p{%lu, %lu}\n", */ - /* i, index, acc->head->len, (void *) kv, kv->key, kv->val); */ - - ret = rill_pairs_push(pairs, kv->key, kv->val); + struct rill_pairs *ret = rill_pairs_push(pairs, kv->key, kv->val); assert(ret == pairs); } @@ -227,11 +224,11 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) atomic_store_explicit(&acc->head->read, end, memory_order_release); - done: rill_pairs_free(pairs); return true; fail_write: rill_pairs_free(pairs); + fail_pairs_alloc: return false; } diff --git a/src/coder.c b/src/coder.c index c9897d6..98e7e13 100644 --- a/src/coder.c +++ b/src/coder.c @@ -122,9 +122,12 @@ static bool coder_finish(struct encoder *coder) { if (!coder_write_sep(coder)) return false; if (!coder_write_sep(coder)) return false; + return true; +} +static void coder_close(struct encoder *coder) +{ htable_reset(&coder->rev); - return true; } static struct encoder make_encoder( diff --git a/src/index.c b/src/index.c index cfc0028..09a60e7 100644 --- a/src/index.c +++ b/src/index.c @@ -42,7 +42,10 @@ static struct indexer *indexer_alloc(size_t cap) assert(cap); struct indexer *indexer = calloc(1, sizeof(*indexer) + cap * sizeof(indexer->kvs[0])); - if (!indexer) return NULL; + if (!indexer) { + rill_fail("unable to allocate indexer: %lu", cap); + return NULL; + } indexer->cap = cap; return indexer; diff --git a/src/store.c b/src/store.c index 1fe0f87..6681b2a 100644 --- a/src/store.c +++ b/src/store.c @@ -365,11 +365,13 @@ bool rill_store_write( writer_close(&store, indexer, store.head->data_off + coder_off(&coder)); + coder_close(&coder); indexer_free(indexer); free(vals); return true; fail_encode: + coder_close(&coder); writer_close(&store, indexer, 0); indexer_free(indexer); fail_open: @@ -397,7 +399,9 @@ bool rill_store_merge( if (!list[i]) continue; vma_will_need(list[i]); - if (!(vals = vals_merge(vals, list[i]->vals))) goto fail_vals; + struct vals *ret = vals_merge(vals, list[i]->vals); + if (ret) { vals = ret; } else { goto fail_vals; } + its[it_len].decoder = store_decoder(list[i]); cap += list[i]->vma_len; @@ -454,16 +458,18 @@ bool rill_store_merge( if (list[i]) vma_dont_need(list[i]); } + coder_close(&encoder); indexer_free(indexer); free(vals); return true; fail_coder: + coder_close(&encoder); writer_close(&store, indexer, 0); indexer_free(indexer); fail_open: - free(vals); fail_vals: + free(vals); return false; } diff --git a/src/vals.c b/src/vals.c index 259e70d..edc99b6 100644 --- a/src/vals.c +++ b/src/vals.c @@ -96,7 +96,11 @@ static struct vals *vals_merge(struct vals *vals, struct vals *merge) vals = realloc(vals, sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); - if (!vals) return NULL; + if (!vals) { + rill_fail("unable to allocate memory for vals: %lu + %lu", + vals->len, merge->len); + return NULL; + } memcpy( vals->data + vals->len, merge->data, From 63fbb402d319ddbf1c9368edc435b095f6a72d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 10 Oct 2017 09:57:03 -0400 Subject: [PATCH 26/91] Add MAP_POPULATE to acc's mmap --- src/acc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/acc.c b/src/acc.c index 3795294..0a05292 100644 --- a/src/acc.c +++ b/src/acc.c @@ -122,7 +122,9 @@ struct rill_acc *rill_acc_open(const char *dir, size_t cap) acc->vma_len = to_vma_len(len); } - acc->vma = mmap(NULL, acc->vma_len, PROT_READ | PROT_WRITE, MAP_SHARED, acc->fd, 0); + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_SHARED | MAP_POPULATE; + acc->vma = mmap(NULL, acc->vma_len, prot, flags, acc->fd, 0); if (acc->vma == MAP_FAILED) { rill_fail_errno("unable to mmap '%s' of len '%lu'", file, acc->vma_len); goto fail_mmap; From 1313c735586114024b0d46a303c86d8dad8501d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 10 Oct 2017 16:56:13 -0400 Subject: [PATCH 27/91] Remove last use of readdir_r it's deprecated so yay! --- src/rill_load.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rill_load.c b/src/rill_load.c index ea451cd..bc2f578 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -20,11 +20,9 @@ void rm(const char *path) DIR *dir = opendir(path); if (!dir) return; - struct dirent stream, *entry; - while (true) { - if (readdir_r(dir, &stream, &entry) == -1) abort(); - else if (!entry) break; - else if (entry->d_type != DT_REG) continue; + struct dirent *entry = NULL; + while ((entry = readdir(dir))) { + if (entry->d_type != DT_REG) continue; char file[PATH_MAX]; snprintf(file, sizeof(file), "%s/%s", path, entry->d_name); From 04504cd8232a71b88567039587e8fc601df6d603 Mon Sep 17 00:00:00 2001 From: Jing Peng Date: Tue, 10 Oct 2017 11:54:24 -0400 Subject: [PATCH 28/91] Fix interpolation search Fix the comparison after the initial interpolation jump so that we no longer miss the target key. Add unit tests for indexer functions. --- compile.sh | 1 + src/index.c | 2 +- test/indexer_test.c | 111 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 test/indexer_test.c diff --git a/compile.sh b/compile.sh index 5f4f410..a79dee0 100755 --- a/compile.sh +++ b/compile.sh @@ -36,4 +36,5 @@ gcc -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CF gcc -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder +gcc -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS diff --git a/src/index.c b/src/index.c index 09a60e7..64fa52e 100644 --- a/src/index.c +++ b/src/index.c @@ -92,7 +92,7 @@ static bool index_find( for (; i < index->len; ++i) { struct index_kv *kv = &index->data[i]; - if (key > kv->key) break; + if (key < kv->key) break; if (key != kv->key) continue; *key_idx = i; diff --git a/test/indexer_test.c b/test/indexer_test.c new file mode 100644 index 0000000..05026b4 --- /dev/null +++ b/test/indexer_test.c @@ -0,0 +1,111 @@ +#include "test.h" + +#include "index.c" + +enum { + CAP = 10 +}; + +bool test_indexer_build(void) +{ + struct indexer *indexer = indexer_alloc(CAP); + assert(indexer); + assert(indexer->len == 0); + assert(indexer->cap == CAP); + + rill_key_t data[CAP] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + for (size_t i = 0; i < CAP; i++) + indexer_put(indexer, data[i], i); + assert(indexer->len == indexer->cap); + + const size_t size = indexer_cap(CAP); + struct index *index = calloc(1, size); + assert(index); + + size_t n_written = indexer_write(indexer, index); + assert(n_written == size); + assert(index->slope == (data[CAP - 1] - data[0]) / CAP); + + indexer_free(indexer); + + for (size_t i = 0; i < index->len; i++) + assert(index_get(index, i) == data[i]); + assert(index_get(index, index->len) == 0); + + free(index); + + return true; +} + +static struct index *make_index(rill_key_t *data, size_t n) +{ + struct indexer *indexer = indexer_alloc(n); + for (size_t i = 0; i < n; i++) + indexer_put(indexer, data[i], i); + + struct index *index = calloc(1, indexer_cap(n)); + indexer_write(indexer, index); + indexer_free(indexer); + + return index; +} + +#define index_from_keys(...) \ + ({ \ + rill_key_t keys[] = { __VA_ARGS__ }; \ + make_index(keys, sizeof(keys) / sizeof(keys[0])); \ + }) + +#define assert_found(index, ...) { \ + rill_key_t keys[] = { __VA_ARGS__ }; \ + size_t key_idx; \ + uint64_t val; \ + for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); i++) { \ + assert(index_find(index, keys[i], &key_idx, &val)); \ + assert(key_idx == i); \ + assert(val == i); \ + } \ +} + +#define assert_not_found(index, ...) { \ + rill_key_t keys[] = { __VA_ARGS__ }; \ + size_t key_idx; \ + uint64_t val; \ + for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); i++) \ + assert(!index_find(index, keys[i], &key_idx, &val)); \ +} + +bool test_indexer_lookup(void) +{ + struct index *index; + + index = index_from_keys(0, 3, 6, 9, 12, 15, 18, 21, 24, 27); + assert_found(index, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27); + assert_not_found(index, 1, 5, 8, 10, 14, 17, 20, 22, 25, 100); + free(index); + + index = index_from_keys(0, 3, 4, 5, 6, 7, 8, 9, 12, 27); + assert_found(index, 0, 3, 4, 5, 6, 7, 8, 9, 12, 27); + free(index); + + index = index_from_keys(0, 3, 12, 13, 14, 15, 16, 17, 18, 27); + assert_found(index, 0, 3, 12, 13, 14, 15, 16, 17, 18, 27); + free(index); + + return true; +} + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + bool ret = true; + + ret = ret && test_indexer_build(); + ret = ret && test_indexer_lookup(); + + return ret ? 0 : 1; +} From 95ce749f2139f4f58dc97074c26a868e49b24c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 13 Oct 2017 10:45:46 -0400 Subject: [PATCH 29/91] Make database globally readable This really should be configurable but this is just a short term fix --- src/store.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/store.c b/src/store.c index 6681b2a..22ce184 100644 --- a/src/store.c +++ b/src/store.c @@ -245,7 +245,7 @@ static bool writer_open( { store->file = file; - store->fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0640); + store->fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); if (store->fd == -1) { rill_fail_errno("unable to open '%s'", file); goto fail_open; From b65abbc12c894e4b4e37476863bd6d1e2c78b1a9 Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Tue, 10 Oct 2017 17:27:36 -0400 Subject: [PATCH 30/91] Add more flexible compile options --- compile.sh | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/compile.sh b/compile.sh index a79dee0..6a521f7 100755 --- a/compile.sh +++ b/compile.sh @@ -6,6 +6,7 @@ set -o errexit -o nounset -o pipefail -o xtrace declare -a SRC SRC=(htable rng utils pairs store acc rotate query) +CC=${OTHERC:-gcc} CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -I${PREFIX}/src" @@ -21,20 +22,21 @@ CFLAGS="$CFLAGS -Wswitch-default" CFLAGS="$CFLAGS -Winit-self" CFLAGS="$CFLAGS -Wno-strict-aliasing" CFLAGS="$CFLAGS -fno-strict-aliasing" +CFLAGS="$CFLAGS -Wno-implicit-fallthrough" OBJ="" for src in "${SRC[@]}"; do - gcc -c -o "$src.o" "${PREFIX}/src/$src.c" $CFLAGS + $CC -c -o "$src.o" "${PREFIX}/src/$src.c" $CFLAGS OBJ="$OBJ $src.o" done ar rcs librill.a $OBJ -gcc -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS -gcc -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS -gcc -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS -gcc -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS -gcc -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS +$CC -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS +$CC -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS +$CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS +$CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS +$CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS -gcc -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder -gcc -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer -gcc -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS +$CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer +$CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder +$CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS From e3be08ec8b227f9df3def90c42b1f9fa94de1a23 Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Tue, 10 Oct 2017 17:28:04 -0400 Subject: [PATCH 31/91] Fix gcc-7 warnings --- src/rill_historical_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rill_historical_dump.c b/src/rill_historical_dump.c index ab333de..9038784 100644 --- a/src/rill_historical_dump.c +++ b/src/rill_historical_dump.c @@ -54,7 +54,7 @@ static void read_table(const char *file, struct htable *table) ssize_t ret; while (true) { uint64_t len = 0; - assert(ret = read(fd, &len, sizeof(len)) >= 0); + assert((ret = read(fd, &len, sizeof(len))) >= 0); if (!ret || !len) break; char *name = calloc(len * 4 + 1, sizeof(*name)); From d1eb2874776ca254f7d321a9865bc45bf1bf1e28 Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Wed, 11 Oct 2017 11:21:07 -0400 Subject: [PATCH 32/91] Remove use of readdir_r in tests --- test/test.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test.h b/test/test.h index 66ef039..34ee09d 100644 --- a/test/test.h +++ b/test/test.h @@ -53,9 +53,9 @@ void rm(const char *path) DIR *dir = opendir(path); if (!dir) return; - struct dirent stream, *entry; + struct dirent *entry; while (true) { - if (readdir_r(dir, &stream, &entry) == -1) abort(); + if ((entry = readdir(dir))) abort(); else if (!entry) break; else if (entry->d_type != DT_REG) continue; From a75c7c8a0e6aa74719c2059d32ed49f9b0a6344f Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Wed, 11 Oct 2017 18:14:01 -0400 Subject: [PATCH 33/91] Use set intersection to improve lookup speed --- src/query.c | 20 ++++++++++++++++++-- src/store.c | 12 ++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/query.c b/src/query.c index 6c57b3e..1e0b3a3 100644 --- a/src/query.c +++ b/src/query.c @@ -94,6 +94,15 @@ struct rill_pairs *rill_query_keys( return result; } +static int compare_rill_values(const void *v1, const void *v2) { + const rill_val_t rv1 = *(rill_val_t*)v1; + const rill_val_t rv2 = *(rill_val_t*)v2; + + if (rv1 > rv2) return 1; + if (rv1 < rv2) return -1; + return 0; +} + struct rill_pairs *rill_query_vals( const struct rill_query *query, const rill_val_t *vals, size_t len, @@ -101,12 +110,19 @@ struct rill_pairs *rill_query_vals( { if (!len) return out; + rill_val_t *sorted = malloc(sizeof(vals[0]) * len); + memcpy(sorted, vals, len); + qsort(sorted, len, sizeof(vals[0]), compare_rill_values); + struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_scan_vals(query->list[i], vals, len, result); - if (!result) return result; + result = rill_store_scan_vals(query->list[i], sorted, len, result); + if (!result) goto cleanup; } rill_pairs_compact(result); + +cleanup: + free(sorted); return result; } diff --git a/src/store.c b/src/store.c index 22ce184..751d525 100644 --- a/src/store.c +++ b/src/store.c @@ -571,13 +571,21 @@ struct rill_pairs *rill_store_scan_vals( struct rill_pairs *result = out; struct decoder coder = store_decoder(store); + rill_key_t current = 0; + size_t ix = 0; + for (size_t i = 0; i < store->head->pairs; ++i) { if (!coder_decode(&coder, &kv)) goto fail; if (rill_kv_nil(&kv)) break; - for (size_t j = 0; j < len; ++j) { - if (kv.val != vals[j]) continue; + if (current != kv.key) { + ix = 0; + current = kv.key; + } + + while (ix < len && vals[ix] < kv.val) ++ix; + if (vals[ix] == kv.val) { result = rill_pairs_push(result, kv.key, kv.val); if (!result) goto fail; } From 19bf2172c4f218c489c8ac052896158900654a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 19 Oct 2017 14:05:38 -0400 Subject: [PATCH 34/91] Fix memcpy in rill_query_vals --- src/query.c | 14 ++++++++++---- src/store.c | 14 +++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/query.c b/src/query.c index 1e0b3a3..2f5470e 100644 --- a/src/query.c +++ b/src/query.c @@ -111,18 +111,24 @@ struct rill_pairs *rill_query_vals( if (!len) return out; rill_val_t *sorted = malloc(sizeof(vals[0]) * len); - memcpy(sorted, vals, len); + if (!sorted) goto fail_alloc; + + memcpy(sorted, vals, sizeof(vals[0]) * len); qsort(sorted, len, sizeof(vals[0]), compare_rill_values); struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { result = rill_store_scan_vals(query->list[i], sorted, len, result); - if (!result) goto cleanup; + if (!result) goto fail_scan; } rill_pairs_compact(result); - -cleanup: free(sorted); return result; + + fail_scan: + free(sorted); + fail_alloc: + // \todo potentially leaking result + return NULL; } diff --git a/src/store.c b/src/store.c index 751d525..708cafb 100644 --- a/src/store.c +++ b/src/store.c @@ -571,21 +571,17 @@ struct rill_pairs *rill_store_scan_vals( struct rill_pairs *result = out; struct decoder coder = store_decoder(store); + size_t i = 0; rill_key_t current = 0; - size_t ix = 0; - for (size_t i = 0; i < store->head->pairs; ++i) { + while (true) { if (!coder_decode(&coder, &kv)) goto fail; if (rill_kv_nil(&kv)) break; - if (current != kv.key) { - ix = 0; - current = kv.key; - } - - while (ix < len && vals[ix] < kv.val) ++ix; + if (current != kv.key) { i = 0; current = kv.key; } + while (i < len && vals[i] < kv.val) ++i; - if (vals[ix] == kv.val) { + if (vals[i] == kv.val) { result = rill_pairs_push(result, kv.key, kv.val); if (!result) goto fail; } From 4636ca1174380495fd215805571d30bafe9800ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 22 Oct 2017 10:22:59 -0400 Subject: [PATCH 35/91] Add store tests --- compile.sh | 1 + test/coder_test.c | 16 +-- test/store_test.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++ test/test.h | 17 +++ 4 files changed, 295 insertions(+), 13 deletions(-) create mode 100644 test/store_test.c diff --git a/compile.sh b/compile.sh index 6a521f7..5928a4c 100755 --- a/compile.sh +++ b/compile.sh @@ -39,4 +39,5 @@ $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder +$CC -o test_store "${PREFIX}/test/store_test.c" librill.a $CFLAGS && ./test_store $CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS diff --git a/test/coder_test.c b/test/coder_test.c index 4b4b941..8cff48d 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -69,7 +69,7 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) struct vals *vals = calloc(1, sizeof(struct vals) + sizeof(list[0]) * len); vals->len = len; - for (size_t i = 0; i < len; ++i) vals->data[i] = list[i]; + memcpy(vals->data, list, sizeof(list[0]) * len); vals_compact(vals); return vals; @@ -215,23 +215,13 @@ bool test_coder(void) check_coder(make_pair(kv(1, 10), kv(1, 20), kv(2, 10))); struct rng rng = rng_make(0); - for (size_t iterations = 0; iterations < 100; ++iterations) { - - struct rill_pairs *pairs = rill_pairs_new(1000); - for (size_t i = 0; i < 1000; ++i) { - uint64_t key = rng_gen_range(&rng, 1, 500); - uint64_t val = rng_gen_range(&rng, 1, 100); - pairs = rill_pairs_push(pairs, key, val); - } - - check_coder(pairs); - } + for (size_t iterations = 0; iterations < 100; ++iterations) + check_coder(make_rng_pairs(&rng)); return true; } - // ----------------------------------------------------------------------------- // main // ----------------------------------------------------------------------------- diff --git a/test/store_test.c b/test/store_test.c new file mode 100644 index 0000000..da234d9 --- /dev/null +++ b/test/store_test.c @@ -0,0 +1,274 @@ +/* coder_test.c + Rémi Attab (remi.attab@gmail.com), 11 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "test.h" + + +// ----------------------------------------------------------------------------- +// utils +// ----------------------------------------------------------------------------- + +static struct rill_store *make_store(const char *name, struct rill_pairs *pairs) +{ + unlink(name); + assert(rill_store_write(name, 0, 0, pairs)); + + struct rill_store *store = rill_store_open(name); + assert(store); + + return store; +} + +struct list { size_t len; uint64_t data[]; }; + +#define make_list(...) \ + ({ \ + uint64_t list[] = { __VA_ARGS__ }; \ + make_list_impl(list, sizeof(list) / sizeof(list[0])); \ + }) + +static struct list *make_list_impl(uint64_t *data, size_t len) +{ + struct list *list = calloc(1, sizeof(struct list) + sizeof(data[0]) * len); + + list->len = len; + memcpy(list->data, data, sizeof(data[0]) * len); + + return list; +} + +static struct list *make_rng_list(struct rng *rng, uint64_t max) +{ + struct list *list = calloc(1, sizeof(struct list) + sizeof(list->data[0]) * max); + + for (uint64_t val = 0; val < max; ++val) { + if (rng_gen(rng) > rng_max() / 2) continue; + + list->data[list->len] = val; + list->len++; + } + + return list; +} + + +// ----------------------------------------------------------------------------- +// query_key +// ----------------------------------------------------------------------------- + +static void check_query_key(struct rill_pairs *pairs) +{ + struct rill_store *store = make_store("test.store.query_key", pairs); + + struct rill_pairs *result = rill_pairs_new(128); + rill_pairs_compact(pairs); + + for (size_t i = 0; i < pairs->len;) { + rill_pairs_clear(result); + result = rill_store_query_key(store, pairs->data[i].key, result); + + assert(pairs->len - i >= result->len); + for (size_t j = 0; j < result->len; ++j, ++i) + assert(!rill_kv_cmp(&pairs->data[i], &result->data[j])); + } + + free(result); + free(store); + free(pairs); +} + +bool test_query_key(void) +{ + check_query_key(make_pair(kv(1, 10))); + check_query_key(make_pair(kv(1, 10), kv(2, 20))); + check_query_key(make_pair(kv(1, 10), kv(1, 20), kv(2, 20))); + check_query_key(make_pair(kv(1, 10), kv(1, 20), kv(1, 20), kv(1, 30))); + + struct rng rng = rng_make(0); + for (size_t iterations = 0; iterations < 10; ++iterations) + check_query_key(make_rng_pairs(&rng)); + + return true; +} + + +// ----------------------------------------------------------------------------- +// scan_keys +// ----------------------------------------------------------------------------- + +static void check_scan_keys( + struct rill_store *store, struct rill_pairs *pairs, struct list *keys) +{ + struct rill_pairs *result = rill_pairs_new(128); + rill_pairs_compact(pairs); + + result = rill_store_scan_keys(store, keys->data, keys->len, result); + + struct rill_pairs *exp = rill_pairs_new(128); + for (size_t i = 0; i < pairs->len; ++i) { + for (size_t j = 0; j < keys->len; ++j) { + struct rill_kv *kv = &pairs->data[i]; + if (kv->key == keys->data[j]) exp = rill_pairs_push(exp, kv->key, kv->val); + } + } + + assert(exp->len == result->len); + for (size_t i = 0; i < exp->len; ++i) + assert(!rill_kv_cmp(&exp->data[i], &result->data[i])); + + free(exp); + free(result); + free(keys); +} + +bool test_scan_keys(void) +{ + static const char *name = "test.store.scan_keys"; + + { + struct rill_pairs *pairs = make_pair(kv(2, 10)); + struct rill_store *store = make_store(name, pairs); + + check_scan_keys(store, pairs, make_list(1)); + check_scan_keys(store, pairs, make_list(2)); + check_scan_keys(store, pairs, make_list(3)); + check_scan_keys(store, pairs, make_list(1, 2)); + check_scan_keys(store, pairs, make_list(2, 3)); + check_scan_keys(store, pairs, make_list(1, 3)); + + free(store); + free(pairs); + } + + + { + struct rill_pairs *pairs = make_pair(kv(2, 10), kv(3, 10), kv(3, 20), kv(4, 30)); + struct rill_store *store = make_store(name, pairs); + + check_scan_keys(store, pairs, make_list(1)); + check_scan_keys(store, pairs, make_list(3)); + check_scan_keys(store, pairs, make_list(5)); + check_scan_keys(store, pairs, make_list(1, 3)); + check_scan_keys(store, pairs, make_list(3, 5)); + check_scan_keys(store, pairs, make_list(2, 3)); + check_scan_keys(store, pairs, make_list(2, 3, 4)); + + free(store); + free(pairs); + } + + { + struct rng rng = rng_make(0); + struct rill_pairs *pairs = make_rng_pairs(&rng); + struct rill_store *store = make_store(name, pairs); + + for (size_t iterations = 0; iterations < 10; ++iterations) + check_scan_keys(store, pairs, make_rng_list(&rng, rng_range_key)); + + free(store); + free(pairs); + } + + return true; +} + + +// ----------------------------------------------------------------------------- +// scan_vals +// ----------------------------------------------------------------------------- + +static void check_scan_vals( + struct rill_store *store, struct rill_pairs *pairs, struct list *vals) +{ + struct rill_pairs *result = rill_pairs_new(128); + rill_pairs_compact(pairs); + + result = rill_store_scan_vals(store, vals->data, vals->len, result); + + struct rill_pairs *exp = rill_pairs_new(128); + for (size_t i = 0; i < pairs->len; ++i) { + for (size_t j = 0; j < vals->len; ++j) { + struct rill_kv *kv = &pairs->data[i]; + if (kv->val == vals->data[j]) exp = rill_pairs_push(exp, kv->key, kv->val); + } + } + + assert(exp->len == result->len); + for (size_t i = 0; i < exp->len; ++i) + assert(!rill_kv_cmp(&exp->data[i], &result->data[i])); + + free(exp); + free(result); + free(vals); +} + +bool test_scan_vals(void) +{ + static const char *name = "test.store.scan_vals"; + + { + struct rill_pairs *pairs = make_pair(kv(2, 20)); + struct rill_store *store = make_store(name, pairs); + + check_scan_vals(store, pairs, make_list(10)); + check_scan_vals(store, pairs, make_list(20)); + check_scan_vals(store, pairs, make_list(30)); + check_scan_vals(store, pairs, make_list(10, 20)); + check_scan_vals(store, pairs, make_list(20, 30)); + check_scan_vals(store, pairs, make_list(10, 30)); + + free(store); + free(pairs); + } + + + { + struct rill_pairs *pairs = make_pair(kv(2, 20), kv(3, 20), kv(3, 30), kv(4, 40)); + struct rill_store *store = make_store(name, pairs); + + check_scan_vals(store, pairs, make_list(10)); + check_scan_vals(store, pairs, make_list(20)); + check_scan_vals(store, pairs, make_list(30)); + check_scan_vals(store, pairs, make_list(50)); + check_scan_vals(store, pairs, make_list(10, 20)); + check_scan_vals(store, pairs, make_list(20, 40)); + check_scan_vals(store, pairs, make_list(20, 50)); + check_scan_vals(store, pairs, make_list(20, 30, 40)); + + free(store); + free(pairs); + } + + { + struct rng rng = rng_make(0); + struct rill_pairs *pairs = make_rng_pairs(&rng); + struct rill_store *store = make_store(name, pairs); + + for (size_t iterations = 0; iterations < 10; ++iterations) + check_scan_vals(store, pairs, make_rng_list(&rng, rng_range_val)); + + free(store); + free(pairs); + } + + return true; +} + + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + bool ret = true; + + ret = ret && test_query_key(); + ret = ret && test_scan_keys(); + ret = ret && test_scan_vals(); + + return ret ? 0 : 1; +} diff --git a/test/test.h b/test/test.h index 34ee09d..887cd5e 100644 --- a/test/test.h +++ b/test/test.h @@ -43,6 +43,23 @@ struct rill_pairs *make_pair_impl(const struct rill_kv *kv, size_t len) return pairs; } +enum { rng_range_key = 500, rng_range_val = 100 }; + +struct rill_pairs *make_rng_pairs(struct rng *rng) +{ + enum { len = 1000 }; + struct rill_pairs *pairs = rill_pairs_new(len); + + for (size_t i = 0; i < len; ++i) { + uint64_t key = rng_gen_range(rng, 1, rng_range_key); + uint64_t val = rng_gen_range(rng, 1, rng_range_val); + pairs = rill_pairs_push(pairs, key, val); + assert(pairs); + } + + return pairs; +} + // ----------------------------------------------------------------------------- // rm From 328d1e5bbfaa463968dbaba0f45c912f8f223d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 24 Oct 2017 11:21:56 -0400 Subject: [PATCH 36/91] Pre-check vals before store decode --- src/store.c | 2 ++ src/vals.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/store.c b/src/store.c index 708cafb..bf87608 100644 --- a/src/store.c +++ b/src/store.c @@ -565,6 +565,8 @@ struct rill_pairs *rill_store_scan_vals( const rill_val_t *vals, size_t len, struct rill_pairs *out) { + if (!vals_contains(store->vals, vals, len)) return out; + vma_will_need(store); struct rill_kv kv = {0}; diff --git a/src/vals.c b/src/vals.c index edc99b6..bb078d9 100644 --- a/src/vals.c +++ b/src/vals.c @@ -110,3 +110,14 @@ static struct vals *vals_merge(struct vals *vals, struct vals *merge) vals_compact(vals); return vals; } + +static bool vals_contains( + struct vals *vals, const rill_val_t *items, size_t len) +{ + for (size_t i = 0, j = 0; i < len; ++i) { + while (j < vals->len && vals->data[j] < items[i]) j++; + if (vals->data[j] == items[i]) return true; + } + + return false; +} From cf14210999a3713464815863a7828ecc4dd883b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 2 Nov 2017 16:24:03 -0400 Subject: [PATCH 37/91] Add all function to query --- src/pairs.c | 70 +++++++++++++---------------------------------------- src/query.c | 34 ++++++++++++++++++++++++++ src/rill.h | 15 ++++-------- src/store.c | 5 ++++ 4 files changed, 61 insertions(+), 63 deletions(-) diff --git a/src/pairs.c b/src/pairs.c index 304a221..db8acc0 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -56,22 +56,28 @@ void rill_pairs_clear(struct rill_pairs *pairs) pairs->len = 0; } +struct rill_pairs *rill_pairs_reserve(struct rill_pairs *pairs, size_t cap) +{ + if (rill_likely(cap <= pairs->cap)) return pairs; + cap = adjust_cap(pairs->cap, cap); + + pairs = realloc(pairs, sizeof(*pairs) + cap * sizeof(pairs->data[0])); + if (!pairs) { + rill_fail("unable to realloc pairs: cap=%lu", cap); + return NULL; + } + + pairs->cap = cap; + return pairs; +} + struct rill_pairs *rill_pairs_push( struct rill_pairs *pairs, rill_key_t key, rill_val_t val) { assert(key && val); - if (rill_unlikely(pairs->len + 1 > pairs->cap)) { - size_t cap = adjust_cap(pairs->cap, pairs->len + 1); - - pairs = realloc(pairs, sizeof(*pairs) + cap * sizeof(pairs->data[0])); - if (!pairs) { - rill_fail("unable to realloc pairs: cap=%lu", cap); - return NULL; - } - - pairs->cap = cap; - } + pairs = rill_pairs_reserve(pairs, pairs->len + 1); + if (!pairs) return NULL; pairs->data[pairs->len] = (struct rill_kv) { .key = key, .val = val }; pairs->len++; @@ -100,48 +106,6 @@ void rill_pairs_compact(struct rill_pairs *pairs) pairs->len = j + 1; } -struct rill_pairs *rill_pairs_scan_key( - const struct rill_pairs *pairs, - const rill_key_t *keys, size_t len, - struct rill_pairs *out) -{ - struct rill_pairs *result = out; - - for (size_t i = 0; i < pairs->len; ++i) { - const struct rill_kv *kv = &pairs->data[i]; - - for (size_t j = 0; j < len; ++j) { - if (kv->key != keys[j]) continue; - - result = rill_pairs_push(result, kv->key, kv->val); - if (!result) return NULL; - } - } - - return result; -} - -struct rill_pairs *rill_pairs_scan_val( - const struct rill_pairs *pairs, - const rill_val_t *vals, size_t len, - struct rill_pairs *out) -{ - struct rill_pairs *result = out; - - for (size_t i = 0; i < pairs->len; ++i) { - const struct rill_kv *kv = &pairs->data[i]; - - for (size_t j = 0; j < len; ++j) { - if (kv->val != vals[j]) continue; - - result = rill_pairs_push(result, kv->key, kv->val); - if (!result) return NULL; - } - } - - return result; -} - void rill_pairs_print(const struct rill_pairs *pairs) { const rill_key_t no_key = -1ULL; diff --git a/src/query.c b/src/query.c index 2f5470e..2e7373f 100644 --- a/src/query.c +++ b/src/query.c @@ -132,3 +132,37 @@ struct rill_pairs *rill_query_vals( // \todo potentially leaking result return NULL; } + +struct rill_pairs *rill_query_all(const struct rill_query *query) +{ + struct rill_pairs *result = rill_pairs_new(1); + for (size_t i = 0; i < query->len; ++i) { + + size_t pairs = rill_store_pairs(query->list[i]); + result = rill_pairs_reserve(result, result->len + pairs); + if (!result) goto fail_scan; + + struct rill_store_it *it = rill_store_begin(query->list[i]); + if (!it) goto fail_scan; + + struct rill_kv kv; + while (true) { + if (!rill_store_it_next(it, &kv)) { + rill_store_it_free(it); + goto fail_scan; + } + if (rill_kv_nil(&kv)) break; + + result = rill_pairs_push(result, kv.key, kv.val); + } + rill_store_it_free(it); + } + + rill_pairs_compact(result); + return result; + + fail_scan: + free(result); + return NULL; + +} diff --git a/src/rill.h b/src/rill.h index ee03d0b..0c42dd4 100644 --- a/src/rill.h +++ b/src/rill.h @@ -81,21 +81,13 @@ struct rill_pairs *rill_pairs_new(size_t cap); void rill_pairs_free(struct rill_pairs *pairs); void rill_pairs_clear(struct rill_pairs *pairs); +struct rill_pairs *rill_pairs_reserve(struct rill_pairs *pairs, size_t cap); + struct rill_pairs *rill_pairs_push( struct rill_pairs *pairs, rill_key_t key, rill_val_t val); void rill_pairs_compact(struct rill_pairs *pairs); -struct rill_pairs *rill_pairs_scan_key( - const struct rill_pairs *pairs, - const rill_key_t *keys, size_t len, - struct rill_pairs *out); - -struct rill_pairs *rill_pairs_scan_val( - const struct rill_pairs *pairs, - const rill_val_t *vals, size_t len, - struct rill_pairs *out); - void rill_pairs_print(const struct rill_pairs *pairs); @@ -125,6 +117,7 @@ const char * rill_store_file(const struct rill_store *store); rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); size_t rill_store_vals(const struct rill_store *store); +size_t rill_store_pairs(const struct rill_store *store); struct rill_pairs *rill_store_query_key( struct rill_store *store, rill_key_t key, struct rill_pairs *out); @@ -192,6 +185,8 @@ struct rill_pairs *rill_query_vals( const rill_val_t *vals, size_t len, struct rill_pairs *out); +struct rill_pairs *rill_query_all(const struct rill_query *query); + // ----------------------------------------------------------------------------- // misc diff --git a/src/store.c b/src/store.c index bf87608..b405367 100644 --- a/src/store.c +++ b/src/store.c @@ -498,6 +498,11 @@ size_t rill_store_vals(const struct rill_store *store) return store->vals->len; } +size_t rill_store_pairs(const struct rill_store *store) +{ + return store->head->pairs; +} + struct rill_pairs *rill_store_query_key( struct rill_store *store, rill_key_t key, struct rill_pairs *out) From 0dfb73a39c36f18e0beb49e7c503256bc03d56a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 3 Nov 2017 08:28:49 -0400 Subject: [PATCH 38/91] Add invert utility --- compile.sh | 1 + src/rill_invert.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/rill_invert.c diff --git a/compile.sh b/compile.sh index 5928a4c..9bb2626 100755 --- a/compile.sh +++ b/compile.sh @@ -36,6 +36,7 @@ $CC -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS $CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS $CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS +$CC -o rill_invert "${PREFIX}/src/rill_invert.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder diff --git a/src/rill_invert.c b/src/rill_invert.c new file mode 100644 index 0000000..03544d2 --- /dev/null +++ b/src/rill_invert.c @@ -0,0 +1,86 @@ +/* rill_invert.c + Rémi Attab (remi.attab@gmail.com), 02 Nov 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include +#include + +enum { shards = 10 }; + +static struct rill_pairs *make_pairs(struct rill_store *store) +{ + size_t cap = (rill_store_pairs(store) - 1) / shards + 1; + size_t len = sizeof(struct rill_pairs) + cap * sizeof(struct rill_kv); + len = (len / page_len + 1) * page_len; + + // glibc's malloc chokes on large allocation for whatever reason... + struct rill_pairs *pairs = + mmap(0, len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (pairs == MAP_FAILED) { + rill_fail_errno("unable mmap '%p' bytes", (void *) len); + rill_exit(1); + } + + pairs->cap = cap; + return pairs; +} + +int main(int argc, const char **argv) +{ + if (argc != 3) { + fprintf(stderr, "rill_invert \n"); + exit(1); + } + + fprintf(stderr, "opening '%s'...\n", argv[1]); + struct rill_store *src = rill_store_open(argv[1]); + if (!src) rill_exit(1); + + struct rill_pairs *pairs = make_pairs(src); + + size_t shard_i = 0; + struct rill_store *shard[shards] = {0}; + + rill_ts_t ts = rill_store_ts(src), quant = rill_store_quant(src); + + fprintf(stderr, "reading '%lu' pairs in shards of '%lu'...\n", + rill_store_pairs(src), pairs->cap); + + struct rill_kv kv; + struct rill_store_it *it = rill_store_begin(src); + + while (true) { + if (!rill_store_it_next(it, &kv)) rill_exit(1); + if (rill_kv_nil(&kv)) break; + + pairs = rill_pairs_push(pairs, kv.val, kv.key); + + if (pairs->len == pairs->cap) { + printf("writing shard '%lu'\n", shard_i); + + rill_pairs_compact(pairs); + + char path[256]; + snprintf(path, sizeof(path), "%s.%lu", argv[2], shard_i); + if (!rill_store_write(path, ts, quant, pairs)) rill_exit(1); + + shard[shard_i] = rill_store_open(path); + if (!shard[shard_i]) rill_exit(1); + + shard_i++; + rill_pairs_clear(pairs); + } + } + + fprintf(stderr, "merging...\n"); + rill_store_merge(argv[2], ts, quant, shard, shards); + + for (size_t i = 0; i < shards; ++i) rill_store_rm(shard[i]); + + return 0; +} From 7a980c57c39dacd9c69da47acdfbf79c99032f47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 3 Nov 2017 11:13:15 -0400 Subject: [PATCH 39/91] Switch to boring binary search --- src/index.c | 38 +++++++++++++++----------------------- test/indexer_test.c | 1 - 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/index.c b/src/index.c index 64fa52e..da92200 100644 --- a/src/index.c +++ b/src/index.c @@ -17,7 +17,7 @@ struct rill_packed index_kv struct rill_packed index { uint64_t len; - uint64_t slope; + uint64_t __unused; // kept for backwards compatibility struct index_kv data[]; }; @@ -68,39 +68,31 @@ static size_t indexer_write(struct indexer *indexer, struct index *index) { index->len = indexer->len; - uint64_t min = indexer->kvs[0].key; - uint64_t max = indexer->kvs[indexer->len - 1].key; - index->slope = (max - min) / indexer->len; - if (!index->slope) index->slope = 1; - size_t len = indexer->len * sizeof(indexer->kvs[0]); memcpy(index->data, indexer->kvs, len); return sizeof(*index) + len; } -// One pass interpolation search. We assume that the keys are hashes and -// therefore uniformly distributed. So a single jump should get us close enough -// to our goal. +// RIP fancy pants interpolation search :( static bool index_find( struct index *index, rill_key_t key, size_t *key_idx, uint64_t *off) { - size_t i = (key - index->data[0].key) / index->slope; - if (i >= index->len) i = index->len - 1; - - while (i && key < index->data[i].key) i--; - - for (; i < index->len; ++i) { - struct index_kv *kv = &index->data[i]; - if (key < kv->key) break; - if (key != kv->key) continue; - - *key_idx = i; - *off = kv->off; - return true; + size_t idx = 0; + size_t len = index->len; + struct index_kv *low = index->data; + + while (len > 1) { + size_t mid = len / 2; + if (key < low[mid].key) len = mid; + else { low += mid; len -= mid; idx += mid;} } - return false; + struct index_kv *kv = &index->data[idx]; + if (kv->key != key) return false; + *key_idx = idx; + *off = kv->off; + return true; } static rill_key_t index_get(struct index *index, size_t i) diff --git a/test/indexer_test.c b/test/indexer_test.c index 05026b4..db8c05f 100644 --- a/test/indexer_test.c +++ b/test/indexer_test.c @@ -24,7 +24,6 @@ bool test_indexer_build(void) size_t n_written = indexer_write(indexer, index); assert(n_written == size); - assert(index->slope == (data[CAP - 1] - data[0]) / CAP); indexer_free(indexer); From 49e2c688f1becf4aed63f39a0ae4bcad88d0764e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 3 Nov 2017 14:40:27 -0400 Subject: [PATCH 40/91] Improve invert utility --- src/rill_invert.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/rill_invert.c b/src/rill_invert.c index 03544d2..a08146c 100644 --- a/src/rill_invert.c +++ b/src/rill_invert.c @@ -54,19 +54,22 @@ int main(int argc, const char **argv) struct rill_kv kv; struct rill_store_it *it = rill_store_begin(src); - while (true) { - if (!rill_store_it_next(it, &kv)) rill_exit(1); - if (rill_kv_nil(&kv)) break; + printf("%lu: reading...\n", shard_i); - pairs = rill_pairs_push(pairs, kv.val, kv.key); + do { + if (!rill_store_it_next(it, &kv)) rill_exit(1); + if (!rill_kv_nil(&kv)) + pairs = rill_pairs_push(pairs, kv.val, kv.key); - if (pairs->len == pairs->cap) { - printf("writing shard '%lu'\n", shard_i); + if (pairs->len == pairs->cap || rill_kv_nil(&kv)) { + printf("%lu: compacting...\n", shard_i); rill_pairs_compact(pairs); char path[256]; snprintf(path, sizeof(path), "%s.%lu", argv[2], shard_i); + printf("%lu: writing '%s'...\n", shard_i, path); + if (!rill_store_write(path, ts, quant, pairs)) rill_exit(1); shard[shard_i] = rill_store_open(path); @@ -74,11 +77,13 @@ int main(int argc, const char **argv) shard_i++; rill_pairs_clear(pairs); + printf("%lu: reading...\n", shard_i); } - } - fprintf(stderr, "merging...\n"); - rill_store_merge(argv[2], ts, quant, shard, shards); + } while (!rill_kv_nil(&kv)); + + fprintf(stderr, "merging to '%s'...\n", argv[2]); + if (!rill_store_merge(argv[2], ts, quant, shard, shards)) rill_exit(1); for (size_t i = 0; i < shards; ++i) rill_store_rm(shard[i]); From 328d8f37d481cc3770b1f788b637c67cabff811e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 7 Nov 2017 11:15:03 -0500 Subject: [PATCH 41/91] Add missing encoder error messages --- src/coder.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/coder.c b/src/coder.c index 98e7e13..bb558c7 100644 --- a/src/coder.c +++ b/src/coder.c @@ -77,7 +77,11 @@ static uint64_t coder_off(struct encoder *coder) static inline bool coder_write_sep(struct encoder *coder) { - if (rill_unlikely(coder->it + 1 > coder->end)) return false; + if (rill_unlikely(coder->it + 1 > coder->end)) { + rill_fail("not enough space to write sep: %p + 1 > %p\n", + (void *) coder->it, (void *) coder->end); + return false; + } *coder->it = 0; coder->it++; @@ -92,7 +96,11 @@ static inline bool coder_write_val(struct encoder *coder, rill_val_t val) uint8_t buffer[coder_max_val_len]; size_t len = leb128_encode(buffer, val) - buffer; - if (rill_unlikely(coder->it + len > coder->end)) return false; + if (rill_unlikely(coder->it + len > coder->end)) { + rill_fail("not enough space to write val: %p + %lu > %p\n", + (void *) coder->it, len, (void *) coder->end); + return false; + } memcpy(coder->it, buffer, len); coder->it += len; From 71d69e11f4fbb8cb806606206983ca7b93648620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 7 Nov 2017 11:18:31 -0500 Subject: [PATCH 42/91] Fix cap calculation for store merge --- src/store.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/store.c b/src/store.c index b405367..d43ea30 100644 --- a/src/store.c +++ b/src/store.c @@ -387,7 +387,7 @@ bool rill_store_merge( { assert(list_len > 1); - size_t cap = 0, pairs = 0; + size_t pairs = 0; struct vals *vals = NULL; struct it { struct rill_kv kv; @@ -403,15 +403,13 @@ bool rill_store_merge( if (ret) { vals = ret; } else { goto fail_vals; } its[it_len].decoder = store_decoder(list[i]); - - cap += list[i]->vma_len; pairs += list[i]->head->keys; it_len++; } - assert(it_len); struct rill_store store = {0}; + size_t cap = vals_cap(vals) + coder_cap(pairs) + indexer_cap(pairs); if (!writer_open(&store, file, cap, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; From aad67165fc11c9c2469651f34c902a534f86e43c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 7 Nov 2017 11:22:57 -0500 Subject: [PATCH 43/91] Fix test rm function to no longer abort --- test/test.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test.h b/test/test.h index 887cd5e..fd82104 100644 --- a/test/test.h +++ b/test/test.h @@ -72,8 +72,7 @@ void rm(const char *path) struct dirent *entry; while (true) { - if ((entry = readdir(dir))) abort(); - else if (!entry) break; + if (!(entry = readdir(dir))) break; else if (entry->d_type != DT_REG) continue; char file[PATH_MAX]; From e14846d78a5f6b50b0a8733fd28fef96ed97a633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 7 Nov 2017 11:24:41 -0500 Subject: [PATCH 44/91] Bump expiration to 15 months --- src/utils.h | 2 +- test/rotate_test.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/utils.h b/src/utils.h index c3be9f3..2e15d85 100644 --- a/src/utils.h +++ b/src/utils.h @@ -60,7 +60,7 @@ enum hours_in_day = 24, days_in_week = 8, // more closely approximates a month weeks_in_month = 4, - months_in_expire = 13, + months_in_expire = 16, sec_secs = 1, min_secs = 60 * sec_secs, diff --git a/test/rotate_test.c b/test/rotate_test.c index e8bfe0a..3b52646 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -22,13 +22,13 @@ bool test_rotate(void) { struct rill_acc *acc = rill_acc_open(dir, 1); - for (rill_ts_t ts = 0; ts < 13 * month_secs; ts += step) { + for (rill_ts_t ts = 0; ts < expire_secs; ts += step) { rill_acc_ingest(acc, key, ts + 1); rill_rotate(dir, ts); } rill_acc_close(acc); - rill_rotate(dir, 13 * month_secs); + rill_rotate(dir, expire_secs); } { @@ -37,7 +37,7 @@ bool test_rotate(void) rill_query_close(query); size_t i = 0; - for (rill_ts_t ts = 0; ts < 13 * month_secs; ts += step) { + for (rill_ts_t ts = 0; ts < expire_secs; ts += step) { assert(pairs->data[i].key == key); assert(pairs->data[i].val == ts + 1); ++i; @@ -47,7 +47,7 @@ bool test_rotate(void) } for (size_t i = 1; i <= 6; ++i) - rill_rotate(dir, (13 + i) * month_secs); + rill_rotate(dir, (months_in_expire + i) * month_secs); { struct rill_query *query = rill_query_open(dir); From 09e7ca65506c6ecef62867968c6f8fa249b68ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 7 Nov 2017 13:56:33 -0500 Subject: [PATCH 45/91] Fix merge cap calculation (again) --- src/store.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/store.c b/src/store.c index d43ea30..ec8b7b1 100644 --- a/src/store.c +++ b/src/store.c @@ -403,7 +403,7 @@ bool rill_store_merge( if (ret) { vals = ret; } else { goto fail_vals; } its[it_len].decoder = store_decoder(list[i]); - pairs += list[i]->head->keys; + pairs += list[i]->head->pairs; it_len++; } assert(it_len); From 409d7d45b118cbc36c59261f629621e9de3fa7e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Wed, 8 Nov 2017 10:34:53 -0500 Subject: [PATCH 46/91] Dedup filenames when rotating --- src/rotate.c | 58 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/src/rotate.c b/src/rotate.c index 3c28724..53a3be9 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -11,7 +11,9 @@ #include #include +#include #include +#include // ----------------------------------------------------------------------------- @@ -49,36 +51,62 @@ static ssize_t expire(rill_ts_t now, struct rill_store **list, ssize_t len) return end; } -static struct rill_store *merge( - const char *dir, - rill_ts_t ts, rill_ts_t quant, - struct rill_store **list, size_t len) +static int file_exists(const char *file) { - assert(len > 0); - if (len == 1) { - struct rill_store *result = list[0]; - list[0] = NULL; - return result; - } + struct stat s; + if (!stat(file, &s)) return 1; + if (errno == ENOENT) return 0; + + rill_fail_errno("unable to stat '%s'", file); + return -1; +} + +static bool file_name( + const char *dir, rill_ts_t ts, rill_ts_t quant, char *out, size_t len) +{ rill_ts_t month = ts / month_secs; rill_ts_t week = (ts / week_secs) % weeks_in_month; rill_ts_t day = (ts / day_secs) % days_in_week; rill_ts_t hour = (ts / hour_secs) % hours_in_day; - char file[PATH_MAX]; + char base[NAME_MAX]; if (quant == hour_secs) - snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu-%02lu.rill", + snprintf(base, sizeof(base), "%s/%05lu-%02lu-%02lu-%02lu.rill", dir, month, week, day, hour); else if (quant == day_secs) - snprintf(file, sizeof(file), "%s/%05lu-%02lu-%02lu.rill", dir, month, week, day); + snprintf(base, sizeof(base), "%s/%05lu-%02lu-%02lu.rill", dir, month, week, day); else if (quant == week_secs) - snprintf(file, sizeof(file), "%s/%05lu-%02lu.rill", dir, month, week); + snprintf(base, sizeof(base), "%s/%05lu-%02lu.rill", dir, month, week); else if (quant == month_secs) - snprintf(file, sizeof(file), "%s/%05lu.rill", dir, month); + snprintf(base, sizeof(base), "%s/%05lu.rill", dir, month); else assert(false); + strncpy(out, base, len < sizeof(base) ? len : sizeof(base)); + int ret; + size_t i = 0; + while ((ret = file_exists(out)) == 1) + snprintf(out, len, "%s.%lu", base, i++); + + if (ret == -1) return false; + return true; +} + +static struct rill_store *merge( + const char *dir, + rill_ts_t ts, rill_ts_t quant, + struct rill_store **list, size_t len) +{ + assert(len > 0); + if (len == 1) { + struct rill_store *result = list[0]; + list[0] = NULL; + return result; + } + + char file[PATH_MAX]; + if (!file_name(dir, ts, quant, file, sizeof(file))) return NULL; if (!rill_store_merge(file, ts, quant, list, len)) return NULL; for (size_t i = 0; i < len; ++i) { From e73f69dec3559594821e7cce6139cd10a241c932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 21 Nov 2017 11:50:20 -0500 Subject: [PATCH 47/91] Adjust capacity estimate based on number of values This should cut down the file size by half which should alleviate issues with mmap returning errors due to overly large VMAs. --- src/coder.c | 7 +++++-- src/store.c | 17 ++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/coder.c b/src/coder.c index bb558c7..04ea21b 100644 --- a/src/coder.c +++ b/src/coder.c @@ -64,9 +64,12 @@ struct encoder size_t pairs; }; -static size_t coder_cap(size_t pairs) +static size_t coder_cap(size_t vals, size_t pairs) { - return coder_max_val_len * (pairs + 1); + size_t bytes = 1; + while (vals >= 1UL << (bytes * 7)) bytes++; + + return bytes * (pairs + 1); } static uint64_t coder_off(struct encoder *coder) diff --git a/src/store.c b/src/store.c index ec8b7b1..175a001 100644 --- a/src/store.c +++ b/src/store.c @@ -240,7 +240,8 @@ bool rill_store_rm(struct rill_store *store) static bool writer_open( struct rill_store *store, - const char *file, size_t cap, + const char *file, + struct vals *vals, size_t pairs, rill_ts_t ts, size_t quant) { store->file = file; @@ -251,7 +252,12 @@ static bool writer_open( goto fail_open; } - size_t len = sizeof(struct header) + cap; + size_t len = + sizeof(struct header) + + vals_cap(vals) + + coder_cap(vals->len, pairs) + + indexer_cap(pairs); + if (ftruncate(store->fd, len) == -1) { rill_fail_errno("unable to resize '%s'", file); goto fail_truncate; @@ -344,10 +350,8 @@ bool rill_store_write( struct vals *vals = vals_from_pairs(pairs); if (!vals) goto fail_vals; - size_t cap = vals_cap(vals) + coder_cap(pairs->len) + indexer_cap(pairs->len); - struct rill_store store = {0}; - if (!writer_open(&store, file, cap, ts, quant)) { + if (!writer_open(&store, file, vals, pairs->len, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } @@ -409,8 +413,7 @@ bool rill_store_merge( assert(it_len); struct rill_store store = {0}; - size_t cap = vals_cap(vals) + coder_cap(pairs) + indexer_cap(pairs); - if (!writer_open(&store, file, cap, ts, quant)) { + if (!writer_open(&store, file, vals, pairs, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } From b248551e1bd2314a022c450847a11ea4f8eaeb83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 21 Nov 2017 16:54:28 -0500 Subject: [PATCH 48/91] Start of a README --- README.md | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..7da05d1 --- /dev/null +++ b/README.md @@ -0,0 +1,125 @@ +# Rill + +A one-off specialized database for a 2 column schema that focuses on compressed +storage and read-only mmap files. + +## Building + +```shell +$ mkdir build && cd build +$ PREFIX=.. ../compile.sh +``` + +Currently only used in the `rill-rs` project which means that no install target +is currently provided. Build artifacts are the following: +- `src/rill.h` +- `build/rill.a` + +## Design Space + +- A pair is composed of a `u64` key and a `u64` value +- Key cardinality is in the order of 1 million +- Value cardinality is in the order of 100 million +- Infrequent batch query of keys over entire dataset +- Batch queries must finish within 5 minutes +- Pair ingestion must happen in real-time (order of 100k/sec) +- Pairs duplicates are very common +- Expire entire month of data older then 15 months +- Expect around 50 billion unique pairs in a single month +- Servers have around 250Gb of RAM and 2TB of SSD disk space + + +## Architecture + +Rill is split into the following major components: + +- `acc`: real-time data ingestion +- `store`: file storage format +- `rotation`: progressive merging and expiration of store files + +### Ingestion + + +### Storage + +Basic design philosophy: + +- Immutable +- Mutation through merge operation +- All pairs are sorted + + +#### Compression + +The main goal for storage is to fit the entire dataset on the disks of a single +server: + + 50B pairs * 15 months * 16bytes per pair = 12TB of disk space + +Given our 2TB of available disk space, we need to do some compression to store +everything. A general sketch of the compression is as follows: + +- Don't repeat keys +- Uniformize the namespace of the values +- Block encode (LEB128) the uniformized values + +Implemention basically begins by extracting all the unique values in the dataset +sorting them and storing them in a table. Using this table, we can then encode +indexes into our table instead of the values themselves. This means our +compression is dependent on the cardinality of the value set and not so much the +values themselves. + +Encoding the pairs is a simple scheme of writting the key in full, followed by a +list of all the value associated with that key. The list of values is a +block-encoding (LEB128) of the indexes into the value table. In other words, the +smaller the cardinality of the set the less byte we'll use on average to write a +value. + +Empirically, we were are able compress a single month of data down to less then +100GB which means that our dataset now sits comfortably on our 2TB disks. + + +#### Index + +We must also be able to quickly query a single key and extract all the +associated values for that key. Our compression requirements puts a bound on the +size of our index. A general sketch of the index is as follows: + +- Don't duplicate keys +- Store the keys along with the offset of their value location in a table +- Search the table via tweaked binary search + +Implementation starts by building a table of all the keys and filling in their +offset as we encode the pairs. We also no longer store the keys with the pairs +as we can simply recover the key for a given list of value via it's implicit +index in the file. The index table is stored as is at the end of the file. + +Searching is done via a tweaked binary search over the index table. Empirically +this has proven to be fast enough to meet our 5 minutes batch query +requirements. Further optimizations are possible. We've also experimented with a +single pass interpolation search followed by a vectorized linear scan but +changes in the input data meant that the keys were no longer well distributed +which made the approach unusable. + + +#### Stamp + +Safe persistence is accomplished via a pseudo-2-phase commit scheme that uses a +stamp to mark the file as complete. Steps are as follow: + +- Write the entire file +- Flush to disk +- Write a magic stamp value in the header +- Flush to disk + +This guarantees that if the stamp is found at the beginning of the file then the +file has been completely written and persisted to disk. Note that rill relies on +the underlying file system to detect file corruption as no checksums are +computed or maintained. + +Note that after rill files are frequently deleted after being merged so the +stamping mechanism is critical to avoid deleting files that were not properly +merged. + + +### Rotation From 48a5c512f67a50f657889571a8aa3da7e0dc846b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 23 Nov 2017 16:29:20 -0500 Subject: [PATCH 49/91] Tweak the readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7da05d1..87ff5e9 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Basic design philosophy: - Immutable - Mutation through merge operation - All pairs are sorted +- Memory mapped and queried directly. #### Compression @@ -85,7 +86,7 @@ We must also be able to quickly query a single key and extract all the associated values for that key. Our compression requirements puts a bound on the size of our index. A general sketch of the index is as follows: -- Don't duplicate keys +- Don't repeat keys - Store the keys along with the offset of their value location in a table - Search the table via tweaked binary search From 153fbd3c64b57afd0f3425ad3546a7f3df2f3cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 21 Nov 2017 14:50:54 -0500 Subject: [PATCH 50/91] Add an ingest utility --- compile.sh | 3 +- src/rill_ingest.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 src/rill_ingest.c diff --git a/compile.sh b/compile.sh index 9bb2626..1d112ee 100755 --- a/compile.sh +++ b/compile.sh @@ -34,9 +34,10 @@ ar rcs librill.a $OBJ $CC -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS $CC -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS $CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS -$CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_invert "${PREFIX}/src/rill_invert.c" librill.a $CFLAGS +$CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS +$CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder diff --git a/src/rill_ingest.c b/src/rill_ingest.c new file mode 100644 index 0000000..91483c9 --- /dev/null +++ b/src/rill_ingest.c @@ -0,0 +1,138 @@ +/* rill_ingest.c + Rémi Attab (remi.attab@gmail.com), 21 Nov 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline uint64_t endian_btol(uint64_t x) +{ + return __builtin_bswap64(x); +} + +struct rill_store *load_file(const char *file, rill_ts_t ts, rill_ts_t quant) +{ + printf("loading: %s\n", file); + + struct stat st = {0}; + if (stat(file, &st) == -1) { + rill_fail_errno("unable to stat '%s'", file); + rill_exit(1); + } + + int fd = open(file, O_RDONLY); + if (fd == -1) { + rill_fail_errno("unable to open '%s'", file); + rill_exit(1); + } + + const int prot = PROT_READ | PROT_WRITE; + size_t len = to_vma_len(st.st_size); + + void *ptr = mmap(0, len + page_len, prot, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (ptr == MAP_FAILED) { + rill_fail_errno("unable to mmap anon '%p'", (void *) (len + page_len)); + rill_exit(1); + } + + const int flags = MAP_PRIVATE | MAP_FIXED | MAP_POPULATE; + void *data = mmap((uint8_t *)ptr + page_len, len, prot, flags, fd, 0); + if (data == MAP_FAILED) { + rill_fail_errno("unable to mmap fixed '%d'", fd); + rill_exit(1); + } + + struct rill_kv *it = data; + struct rill_kv *end = it + (st.st_size / sizeof(*it)); + for (; it < end; ++it) { + rill_key_t key = endian_btol(it->val); + rill_val_t val = endian_btol(it->key); + *it = (struct rill_kv) { .key = key, .val = val }; + } + + struct rill_pairs *pairs = ((struct rill_pairs *)data) - 1; + if (!pairs) rill_exit(1); + + pairs->cap = pairs->len = st.st_size / sizeof(pairs->data[0]); + rill_pairs_compact(pairs); + + char file_rill[PATH_MAX]; + snprintf(file_rill, sizeof(file_rill), "%s.rill", file); + + if (!rill_store_write(file_rill, ts, quant, pairs)) rill_exit(1); + munmap(ptr, page_len); + munmap(data, len); + + struct rill_store *store = rill_store_open(file_rill); + if (!store) rill_exit(1); + return store; +} + +void usage() +{ + fprintf(stderr, "rill_ingest -t -q -o \n"); + exit(1); +} + +int main(int argc, char **argv) +{ + rill_ts_t ts = 0; + rill_ts_t quant = 0; + char *output = NULL; + + int opt = 0; + while ((opt = getopt(argc, argv, "+t:q:o:")) != -1) { + switch (opt) { + case 't': ts = atol(optarg); break; + case 'q': quant = atol(optarg); break; + case 'o': output = optarg; break; + default: usage(); + } + } + + if (!ts || !quant || !output) usage(); + if (optind >= argc) usage(); + + struct rill_store *merge[64] = {0}; + + for (; optind < argc; optind++) { + struct rill_store *store = load_file(argv[optind], ts, quant); + for (size_t i = 0; i < 64; ++i) { + if (!merge[i]) { merge[i] = store; break; } + + printf("merging: %lu\n", i); + + char out[PATH_MAX]; + snprintf(out, sizeof(out), "%s.rill.%lu", argv[optind], i); + + struct rill_store *list[2] = { store, merge[i] }; + if (!rill_store_merge(out, ts, quant, list, 2)) rill_exit(1); + + store = rill_store_open(out); + if (!store) rill_exit(1); + + merge[i] = NULL; + rill_store_rm(list[0]); + rill_store_rm(list[1]); + + } + } + + if (!rill_store_merge(output, ts, quant, merge, 64)) rill_exit(1); + for (size_t i = 0; i < 64; ++i) { + if (!merge[i]) continue; + rill_store_rm(merge[i]); + } + + return 0; +} From 6b828b1dff253887895512eb442cfd99108c9e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Tue, 21 Nov 2017 15:53:15 -0500 Subject: [PATCH 51/91] Improve dump output --- src/store.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/store.c b/src/store.c index 175a001..7a98107 100644 --- a/src/store.c +++ b/src/store.c @@ -637,10 +637,8 @@ bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv) void rill_store_print_head(struct rill_store *store) { - printf("%s\n", store->file); - printf("magic: 0x%x\n", store->head->magic); + printf("file: %s\n", store->file); printf("version: %u\n", store->head->version); - printf("stamp: %p\n", (void *) store->head->stamp); printf("ts: %lu\n", store->head->ts); printf("quant: %lu\n", store->head->quant); printf("keys: %lu\n", store->head->keys); @@ -662,10 +660,10 @@ void rill_store_print(struct rill_store *store) if (!coder_decode(&coder, &kv)) goto fail; if (rill_kv_nil(&kv)) break; - if (kv.key == key) printf(", %lu", kv.val); + if (kv.key == key) printf(", %p", (void *) kv.val); else { if (key != no_key) printf("]\n"); - printf("%p: [ %lu", (void *) kv.key, kv.val); + printf("%p: [ %p", (void *) kv.key, (void *) kv.val); key = kv.key; } } From 0bc059bd30d37094f5b8ff77669f7f216b68af89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 23 Nov 2017 16:33:51 -0500 Subject: [PATCH 52/91] Add merge utility --- compile.sh | 1 + src/rill_merge.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/rill_merge.c diff --git a/compile.sh b/compile.sh index 1d112ee..a489058 100755 --- a/compile.sh +++ b/compile.sh @@ -37,6 +37,7 @@ $CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_invert "${PREFIX}/src/rill_invert.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS +$CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS $CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer diff --git a/src/rill_merge.c b/src/rill_merge.c new file mode 100644 index 0000000..b4ac79a --- /dev/null +++ b/src/rill_merge.c @@ -0,0 +1,72 @@ +/* rill_merge.c + Rémi Attab (remi.attab@gmail.com), 23 Nov 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include +#include + + +void usage() +{ + fprintf(stderr, "rill_ingest -t -q -o \n"); + exit(1); +} + +int main(int argc, char **argv) +{ + rill_ts_t ts = 0; + rill_ts_t quant = 0; + char *output = NULL; + + int opt = 0; + while ((opt = getopt(argc, argv, "+t:q:o:")) != -1) { + switch (opt) { + case 't': ts = atol(optarg); break; + case 'q': quant = atol(optarg); break; + case 'o': output = optarg; break; + default: usage(); + } + } + + if (!ts || !quant || !output) usage(); + if (optind >= argc) usage(); + + struct rill_store *merge[64] = {0}; + + for (; optind < argc; optind++) { + struct rill_store *store = rill_store_open(argv[optind]); + if (!store) rill_exit(1); + + for (size_t i = 0; i < 64; ++i) { + if (!merge[i]) { merge[i] = store; break; } + + printf("merging: %lu\n", i); + + char out[PATH_MAX]; + snprintf(out, sizeof(out), "%s.rill.%lu", argv[optind], i); + + struct rill_store *list[2] = { store, merge[i] }; + if (!rill_store_merge(out, ts, quant, list, 2)) rill_exit(1); + + store = rill_store_open(out); + if (!store) rill_exit(1); + + merge[i] = NULL; + rill_store_rm(list[0]); + rill_store_rm(list[1]); + } + } + + if (!rill_store_merge(output, ts, quant, merge, 64)) rill_exit(1); + for (size_t i = 0; i < 64; ++i) { + if (!merge[i]) continue; + rill_store_rm(merge[i]); + } + + return 0; +} From 9c9d06bb923f672e7c8d2c0c42d156ec8fe123bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sun, 26 Nov 2017 01:11:37 -0500 Subject: [PATCH 53/91] Improve merge --- src/rill_merge.c | 39 ++++++++++----------------------------- src/store.c | 42 +++++++++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 46 deletions(-) diff --git a/src/rill_merge.c b/src/rill_merge.c index b4ac79a..3d38366 100644 --- a/src/rill_merge.c +++ b/src/rill_merge.c @@ -36,37 +36,18 @@ int main(int argc, char **argv) if (!ts || !quant || !output) usage(); if (optind >= argc) usage(); - struct rill_store *merge[64] = {0}; - - for (; optind < argc; optind++) { - struct rill_store *store = rill_store_open(argv[optind]); - if (!store) rill_exit(1); - - for (size_t i = 0; i < 64; ++i) { - if (!merge[i]) { merge[i] = store; break; } - - printf("merging: %lu\n", i); - - char out[PATH_MAX]; - snprintf(out, sizeof(out), "%s.rill.%lu", argv[optind], i); - - struct rill_store *list[2] = { store, merge[i] }; - if (!rill_store_merge(out, ts, quant, list, 2)) rill_exit(1); - - store = rill_store_open(out); - if (!store) rill_exit(1); - - merge[i] = NULL; - rill_store_rm(list[0]); - rill_store_rm(list[1]); - } + size_t len = argc - optind; + struct rill_store *stores[len]; + for (size_t i = 0; i < len; i++, optind++) { + stores[i] = rill_store_open(argv[optind]); + if (!stores[i]) rill_exit(1); } - if (!rill_store_merge(output, ts, quant, merge, 64)) rill_exit(1); - for (size_t i = 0; i < 64; ++i) { - if (!merge[i]) continue; - rill_store_rm(merge[i]); - } + if (!rill_store_merge(output, ts, quant, stores, len)) + rill_exit(1); + + for (size_t i = 0; i < len; ++i) + rill_store_rm(stores[i]); return 0; } diff --git a/src/store.c b/src/store.c index 7a98107..b337472 100644 --- a/src/store.c +++ b/src/store.c @@ -391,12 +391,12 @@ bool rill_store_merge( { assert(list_len > 1); + size_t keys = 0; size_t pairs = 0; struct vals *vals = NULL; - struct it { - struct rill_kv kv; - struct decoder decoder; - } its[list_len]; + + struct decoder decoders[list_len]; + struct rill_kv kvs[list_len]; size_t it_len = 0; for (size_t i = 0; i < list_len; ++i) { @@ -406,8 +406,9 @@ bool rill_store_merge( struct vals *ret = vals_merge(vals, list[i]->vals); if (ret) { vals = ret; } else { goto fail_vals; } - its[it_len].decoder = store_decoder(list[i]); + decoders[it_len] = store_decoder(list[i]); pairs += list[i]->head->pairs; + keys += list[i]->head->keys; it_len++; } assert(it_len); @@ -418,11 +419,13 @@ bool rill_store_merge( goto fail_open; } - struct indexer *indexer = indexer_alloc(pairs); + struct indexer *indexer = indexer_alloc(keys); + if (!indexer) goto fail_index; + struct encoder encoder = writer_begin(&store, vals, indexer); for (size_t i = 0; i < it_len; ++i) { - if (!(coder_decode(&its[i].decoder, &its[i].kv))) goto fail_coder; + if (!(coder_decode(&decoders[i], &kvs[i]))) goto fail_coder; } struct rill_kv prev = {0}; @@ -430,21 +433,25 @@ bool rill_store_merge( while (it_len > 0) { size_t target = 0; for (size_t i = 1; i < it_len; ++i) { - if (rill_kv_cmp(&its[i].kv, &its[target].kv) < 0) + if (rill_kv_cmp(&kvs[i], &kvs[target]) < 0) target = i; } - struct it *it = &its[target]; - if (rill_likely(rill_kv_nil(&prev) || rill_kv_cmp(&prev, &it->kv) < 0)) { - prev = it->kv; - if (!coder_encode(&encoder, &it->kv)) goto fail_coder; + struct rill_kv *kv = &kvs[target]; + struct decoder *decoder = &decoders[target]; + if (rill_likely(rill_kv_nil(&prev) || rill_kv_cmp(&prev, kv) < 0)) { + if (!coder_encode(&encoder, kv)) goto fail_coder; + prev = *kv; } - if (!coder_decode(&it->decoder, &it->kv)) goto fail_coder; - if (rill_unlikely(rill_kv_nil(&it->kv))) { - memmove(its + target, - its + target + 1, - (it_len - target - 1) * sizeof(its[0])); + if (!coder_decode(decoder, kv)) goto fail_coder; + if (rill_unlikely(rill_kv_nil(kv))) { + memmove(kvs + target, + kvs + target + 1, + (it_len - target - 1) * sizeof(kvs[0])); + memmove(decoders + target, + decoders + target + 1, + (it_len - target - 1) * sizeof(decoders[0])); it_len--; } } @@ -468,6 +475,7 @@ bool rill_store_merge( coder_close(&encoder); writer_close(&store, indexer, 0); indexer_free(indexer); + fail_index: fail_open: fail_vals: free(vals); From 5ec03fcbf5098eea422afb9101bdb63b6d5a5ca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 27 Nov 2017 14:58:49 -0500 Subject: [PATCH 54/91] Improve the rill_dump util --- compile.sh | 1 - src/rill.h | 7 +- src/rill_dump.c | 89 ++++++++++++++----- src/rill_historical_dump.c | 172 ------------------------------------- src/rill_query.c | 38 +++++++- src/store.c | 61 +++++-------- 6 files changed, 127 insertions(+), 241 deletions(-) delete mode 100644 src/rill_historical_dump.c diff --git a/compile.sh b/compile.sh index a489058..8027cf6 100755 --- a/compile.sh +++ b/compile.sh @@ -38,7 +38,6 @@ $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_invert "${PREFIX}/src/rill_invert.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS -$CC -o rill_historical_dump "${PREFIX}/src/rill_historical_dump.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder diff --git a/src/rill.h b/src/rill.h index 0c42dd4..920a0a4 100644 --- a/src/rill.h +++ b/src/rill.h @@ -114,8 +114,10 @@ bool rill_store_merge( bool rill_store_rm(struct rill_store *store); const char * rill_store_file(const struct rill_store *store); +unsigned rill_store_version(const struct rill_store *store); rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); +size_t rill_store_keys(const struct rill_store *store); size_t rill_store_vals(const struct rill_store *store); size_t rill_store_pairs(const struct rill_store *store); @@ -130,6 +132,8 @@ struct rill_pairs *rill_store_scan_vals( const rill_val_t *vals, size_t len, struct rill_pairs *out); +size_t rill_store_dump_keys( + const struct rill_store *store, rill_val_t *out, size_t cap); size_t rill_store_dump_vals( const struct rill_store *store, rill_val_t *out, size_t cap); @@ -137,9 +141,6 @@ struct rill_store_it *rill_store_begin(struct rill_store *store); void rill_store_it_free(struct rill_store_it *it); bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv); -void rill_store_print_head(struct rill_store *store); -void rill_store_print(struct rill_store *store); - // ----------------------------------------------------------------------------- // acc diff --git a/src/rill_dump.c b/src/rill_dump.c index f38ec3c..de9e462 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -6,38 +6,83 @@ #include "rill.h" #include "utils.h" -#include +#include #include -#include -#include +#include + +void usage() +{ + fprintf(stderr, "rill_dump [-h] [-k] [-v] [-p] \n"); + exit(1); +} int main(int argc, char **argv) { - const char *file = NULL; - bool header_only = false; - - switch (argc) { - case 3: - if (strcmp(argv[1], "-h") != 0) { - printf("unknown arg '%s'", argv[1]); - return 1; + bool header = false; + bool keys = false; + bool vals = false; + bool pairs = false; + + int opt = 0; + while ((opt = getopt(argc, argv, "+hkvp")) != -1) { + switch (opt) { + case 'h': header = true; break; + case 'k': keys = true; break; + case 'v': vals = true; break; + case 'p': pairs = true; break; + default: + fprintf(stderr, "unknown argument: %c\n", opt); + usage(); } - header_only = true; - case 2: - file = argv[argc - 1]; - break; - default: - printf("you done goofed mate\n"); - return 1; } - struct rill_store *store = rill_store_open(file); + if (!header && !keys && !vals && !pairs) usage(); + if (optind >= argc) usage(); + + struct rill_store *store = rill_store_open(argv[optind]); if (!store) rill_exit(1); - rill_store_print_head(store); - if (!header_only) rill_store_print(store); + if (header) { + printf("file: %s\n", rill_store_file(store)); + printf("version: %u\n", rill_store_version(store)); + printf("ts: %lu\n", rill_store_ts(store)); + printf("quant: %lu\n", rill_store_quant(store)); + printf("keys: %lu\n", rill_store_keys(store)); + printf("vals: %lu\n", rill_store_vals(store)); + printf("pairs: %lu\n", rill_store_pairs(store)); + } - rill_store_close(store); + if (keys) { + size_t key_len = rill_store_keys(store); + rill_key_t *keys = calloc(key_len, sizeof(*keys)); + (void) rill_store_dump_keys(store, keys, key_len); + printf("keys:\n"); + for (size_t i = 0; i < key_len; ++i) + printf(" %p\n", (void *) keys[i]); + } + + if (vals) { + size_t val_len = rill_store_vals(store); + rill_val_t *vals = calloc(val_len, sizeof(*vals)); + (void) rill_store_dump_vals(store, vals, val_len); + + printf("vals:\n"); + for (size_t i = 0; i < val_len; ++i) + printf(" %p\n", (void *) vals[i]); + } + + if (pairs) { + struct rill_kv kv; + struct rill_store_it *it = rill_store_begin(store); + + printf("pairs:\n"); + while (rill_store_it_next(it, &kv)) + printf(" %p %p\n", (void *) kv.key, (void *) kv.val); + + rill_store_it_free(it); + } + + rill_store_close(store); return 0; } diff --git a/src/rill_historical_dump.c b/src/rill_historical_dump.c deleted file mode 100644 index 9038784..0000000 --- a/src/rill_historical_dump.c +++ /dev/null @@ -1,172 +0,0 @@ -/* rill_historical_dump.c - Rémi Attab (remi.attab@gmail.com), 22 Sep 2017 - FreeBSD-style copyright and disclaimer apply -*/ - - -#include "rill.h" -#include "utils.h" -#include "htable.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static const uint64_t val_mask = 1UL << 63; - -void read_utf8(int fd, char *out, size_t to_read) -{ - size_t len = 0; - for (size_t i = 0; i < to_read; ++i) { - uint8_t c; - assert(read(fd, &c, sizeof(c)) > 0); - out[len++] = c; - - size_t extra = 0; - if (c >= 0xc0) extra++; - if (c >= 0xe0) extra++; - for (size_t j = 0; j < extra; ++j) { - assert(read(fd, &c, sizeof(c)) > 0); - out[len++] = c; - } - } - - out[len++] = 0; -} - -static void read_table(const char *file, struct htable *table) -{ - htable_reset(table); - - int fd = open(file, O_RDONLY); - if (fd == -1) { - fprintf(stderr, "file not there %s: %s", file, strerror(errno)); - abort(); - } - - ssize_t ret; - while (true) { - uint64_t len = 0; - assert((ret = read(fd, &len, sizeof(len))) >= 0); - if (!ret || !len) break; - - char *name = calloc(len * 4 + 1, sizeof(*name)); - //read_utf8(fd, name, len); - assert(read(fd, name, len) > 0); - - uint64_t key = 0; - assert(read(fd, &key, sizeof(key)) > 0); - - uint64_t type = 0; - assert(read(fd, &type, sizeof(type)) > 0); - - assert ((key & val_mask) == 0); - if (type == 12) key |= val_mask; - else assert(type == 13); - - if (false) { - uint64_t val = key & ~val_mask; - size_t type = key & val_mask ? 1 : 0; - static const char *type_str[] = {"set", "rov"}; - printf("%s %lu -> %s\n", type_str[type], val, name); - } - - assert(htable_put(table, key, (uint64_t)name).ok); - } -} - -void print_val(rill_val_t val, struct htable *table) -{ - uint64_t id = val & ~val_mask; - size_t type = val & val_mask ? 1 : 0; - static const char *type_str[] = {"set", "rov"}; - printf(" %s %lu", type_str[type], id); - - struct htable_ret ret = htable_get(table, val); - if (ret.ok) printf(" -> %s", (char *) ret.value); - printf("\n"); -} - -void dump_vals(const char *file, struct htable *table) -{ - struct rill_store *store = rill_store_open(file); - assert(store); - - size_t len = rill_store_vals(store); - rill_val_t *vals = calloc(1, len * sizeof(*vals)); - rill_store_dump_vals(store, vals, len); - - printf("values:\n"); - for (size_t i = 0; i < len; ++i) - print_val(vals[i], table); - - free(vals); -} - -void dump_keys(const char *file, struct htable *table) -{ - struct rill_store *store = rill_store_open(file); - if (!store) rill_exit(1); - - rill_store_print_head(store); - - rill_key_t current = 0; - struct rill_kv kv = {0}; - struct rill_store_it *it = rill_store_begin(store); - while (rill_store_it_next(it, &kv)) { - if (rill_kv_nil(&kv)) break; - - if (kv.key != current) { - current = kv.key; - printf("%p:\n", (void *) kv.key); - } - - print_val(kv.val, table); - } - - rill_store_close(store); -} - -int main(int argc, char **argv) -{ - const char *store_file = NULL; - const char *table_file = NULL; - bool vals_dump = false; - - if (argc == 4) { - if (strcmp(argv[1], "-v") != 0) { - printf("unknown arg '%s'", argv[1]); - return 1; - } - vals_dump = true; - } - - if (argc == 3 || argc == 4) { - table_file = argv[argc - 1]; - store_file = argv[argc - 2]; - } - else { - fprintf(stderr, - "invalid number of arguments\n" - " rill_historical_dump
"); - return 1; - } - - - struct htable table = {0}; - read_table(table_file, &table); - - if (vals_dump) - dump_vals(store_file, &table); - else - dump_keys(store_file, &table); - - return 0; -} diff --git a/src/rill_query.c b/src/rill_query.c index 907c0bf..a265833 100644 --- a/src/rill_query.c +++ b/src/rill_query.c @@ -26,6 +26,38 @@ void usage() exit(1); } +uint64_t read_u64(char *arg) +{ + size_t n = strnlen(arg, 128); + + bool is_hex = false; + if (n > 2 && arg[0] == '0' && arg[1] == 'x') { + if (n > 2 + 16) { + rill_fail("value too big '%s'\n", arg); + rill_exit(1); + } + + is_hex = true; + } + + uint64_t value = 0; + + for (size_t i = 2; i < n; ++i) { + char c = arg[i]; + value *= is_hex ? 16 : 10; + + if (c >= '0' && c <= '9') value += c - '0'; + else if (is_hex && c >= 'a' && c <= 'f') value += c - 'a' + 10; + else if (is_hex && c >= 'A' && c <= 'F') value += c - 'A' + 10; + else { + rill_fail("invalid character '%c' in '%s'\n", c, arg); + rill_exit(1); + } + } + + return value; +} + int main(int argc, char *argv[]) { rill_key_t key = 0; @@ -34,8 +66,8 @@ int main(int argc, char *argv[]) int opt = 0; while ((opt = getopt(argc, argv, "k:v:")) != -1) { switch (opt) { - case 'k': key = atoi(optarg); break; - case 'v': val = atoi(optarg); break; + case 'k': key = read_u64(optarg); break; + case 'v': val = read_u64(optarg); break; default: usage(); exit(1); } } @@ -69,7 +101,7 @@ int main(int argc, char *argv[]) if (!pairs) rill_exit(1); for (size_t i = 0; i < pairs->len; ++i) { - if (key) printf("%lu\n", pairs->data[i].val); + if (key) printf("%p\n", (void *) pairs->data[i].val); else printf("%p\n", (void *) pairs->data[i].key); } diff --git a/src/store.c b/src/store.c index b337472..4b38ad5 100644 --- a/src/store.c +++ b/src/store.c @@ -492,6 +492,11 @@ const char * rill_store_file(const struct rill_store *store) return store->file; } +unsigned rill_store_version(const struct rill_store *store) +{ + return store->head->version; +} + rill_ts_t rill_store_ts(const struct rill_store *store) { return store->head->ts; @@ -502,6 +507,11 @@ size_t rill_store_quant(const struct rill_store *store) return store->head->quant; } +size_t rill_store_keys(const struct rill_store *store) +{ + return store->index->len; +} + size_t rill_store_vals(const struct rill_store *store) { return store->vals->len; @@ -620,6 +630,17 @@ size_t rill_store_dump_vals( return len; } +size_t rill_store_dump_keys( + const struct rill_store *store, rill_key_t *out, size_t cap) +{ + size_t len = cap < store->index->len ? cap : store->index->len; + + for (size_t i = 0; i < len; ++i) + out[i] = store->index->data[i].key; + + return len; +} + struct rill_store_it { struct decoder decoder; }; @@ -641,43 +662,3 @@ bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv) { return coder_decode(&it->decoder, kv); } - - -void rill_store_print_head(struct rill_store *store) -{ - printf("file: %s\n", store->file); - printf("version: %u\n", store->head->version); - printf("ts: %lu\n", store->head->ts); - printf("quant: %lu\n", store->head->quant); - printf("keys: %lu\n", store->head->keys); - printf("vals: %lu\n", store->vals->len); - printf("pairs: %lu\n", store->head->pairs); -} - -void rill_store_print(struct rill_store *store) -{ - vma_will_need(store); - - struct rill_kv kv = {0}; - struct decoder coder = store_decoder(store); - - const rill_key_t no_key = -1ULL; - rill_key_t key = no_key; - - for (size_t i = 0; i < store->head->pairs; ++i) { - if (!coder_decode(&coder, &kv)) goto fail; - if (rill_kv_nil(&kv)) break; - - if (kv.key == key) printf(", %p", (void *) kv.val); - else { - if (key != no_key) printf("]\n"); - printf("%p: [ %p", (void *) kv.key, (void *) kv.val); - key = kv.key; - } - } - - printf(" ]\n"); - - fail: - vma_dont_need(store); -} From 0ad07b07da4bdad7f87b90746623a255d48317e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 7 Dec 2017 13:52:31 -0500 Subject: [PATCH 55/91] Grab flock on rotation folder --- src/rotate.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/rotate.c b/src/rotate.c index 53a3be9..84bba5f 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -182,8 +183,40 @@ static int store_cmp(const void *l, const void *r) return 0; } +// Note that an flock is released on process termination on linux. This means +// that we don't have to worry about cleaning up in case of segfaults or signal +// termination. +static int lock(const char *dir) +{ + int fd = open(dir, O_DIRECTORY | O_RDONLY); + if (fd == -1) { + rill_fail_errno("unable to open: %s\n", dir); + return -1; + } + + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + if (errno == EWOULDBLOCK) return 0; + + rill_fail_errno("unable acquire flock on '%s'\n", dir); + close(fd); + return -1; + } + + return fd; +} + +static void unlock(int fd) +{ + flock(fd, LOCK_UN); + close(fd); +} + bool rill_rotate(const char *dir, rill_ts_t now) { + int fd = lock(dir); + if (!fd) return true; + if (fd == -1) return false; + rotate_acc(dir, now); enum { cap = 1024 }; @@ -202,5 +235,6 @@ bool rill_rotate(const char *dir, rill_ts_t now) if (list[i]) rill_store_close(list[i]); } + unlock(fd); return len >= 0; } From b7b9821544f78dab8bdb9709220bd1876d9eac35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 7 Dec 2017 14:20:02 -0500 Subject: [PATCH 56/91] Split acc out of rotation --- src/rill_load.c | 15 +++++++++++++-- src/rotate.c | 14 -------------- test/rotate_test.c | 23 +++++++++++++++++++---- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/rill_load.c b/src/rill_load.c index bc2f578..4f28734 100644 --- a/src/rill_load.c +++ b/src/rill_load.c @@ -33,6 +33,14 @@ void rm(const char *path) rmdir(path); } +void acc_dump(struct rill_acc *acc, const char *dir, rill_ts_t ts) +{ + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%010lu.rill", dir, ts); + + if (!rill_acc_write(acc, file, ts)) rill_abort(); +} + int main(int argc, char **argv) { (void) argc, (void) argv; @@ -65,12 +73,15 @@ int main(int argc, char **argv) } if (ts % rotation_rate == 0) { + acc_dump(acc, "db", ts); if (!rill_rotate("db", ts)) rill_abort(); } } - rill_acc_close(acc); - if (!rill_rotate("db", seconds + 60 * 60)) rill_abort(); + rill_ts_t ts = seconds + 60 * 60; + acc_dump(acc, "db", ts); + if (!rill_rotate("db", ts)) rill_abort(); + rill_acc_close(acc); return 0; } diff --git a/src/rotate.c b/src/rotate.c index 84bba5f..6d3f24c 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -21,18 +21,6 @@ // rotate // ----------------------------------------------------------------------------- -static void rotate_acc(const char *dir, rill_ts_t now) -{ - struct rill_acc *acc = rill_acc_open(dir, rill_acc_read_only); - if (!acc) return; - - char file[PATH_MAX]; - snprintf(file, sizeof(file), "%s/%010lu.rill", dir, now); - - (void) rill_acc_write(acc, file, now); - rill_acc_close(acc); -} - static ssize_t expire(rill_ts_t now, struct rill_store **list, ssize_t len) { if (len < 0) return len; @@ -217,8 +205,6 @@ bool rill_rotate(const char *dir, rill_ts_t now) if (!fd) return true; if (fd == -1) return false; - rotate_acc(dir, now); - enum { cap = 1024 }; struct rill_store *list[cap]; size_t list_len = rill_scan_dir(dir, list, cap); diff --git a/test/rotate_test.c b/test/rotate_test.c index 3b52646..bd8e28a 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -10,6 +10,14 @@ // rotate // ----------------------------------------------------------------------------- +void acc_dump(struct rill_acc *acc, const char *dir, rill_ts_t ts) +{ + char file[PATH_MAX]; + snprintf(file, sizeof(file), "%s/%010lu.rill", dir, ts); + + if (!rill_acc_write(acc, file, ts)) rill_abort(); +} + bool test_rotate(void) { const char *dir = "test.rotate.db"; @@ -19,15 +27,17 @@ bool test_rotate(void) const uint64_t key = 1; enum { step = 10 * min_secs }; + struct rill_acc *acc = rill_acc_open(dir, 1); + { - struct rill_acc *acc = rill_acc_open(dir, 1); for (rill_ts_t ts = 0; ts < expire_secs; ts += step) { rill_acc_ingest(acc, key, ts + 1); + acc_dump(acc, dir, ts); rill_rotate(dir, ts); } - rill_acc_close(acc); + acc_dump(acc, dir, expire_secs); rill_rotate(dir, expire_secs); } @@ -46,8 +56,13 @@ bool test_rotate(void) rill_pairs_free(pairs); } - for (size_t i = 1; i <= 6; ++i) - rill_rotate(dir, (months_in_expire + i) * month_secs); + for (size_t i = 1; i <= 6; ++i) { + rill_ts_t ts = (months_in_expire + i) * month_secs; + acc_dump(acc, dir, ts); + rill_rotate(dir, ts); + } + + rill_acc_close(acc); { struct rill_query *query = rill_query_open(dir); From c83031ce412b067a06083c51ba451aff9c278f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 11 Dec 2017 11:10:07 -0500 Subject: [PATCH 57/91] Use substring search for rill extension The trailing number used to disambiguate would otherwise not be detected by the rill file lister. --- src/rotate.c | 9 +++++++++ src/utils.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/rotate.c b/src/rotate.c index 6d3f24c..a590f16 100644 --- a/src/rotate.c +++ b/src/rotate.c @@ -125,6 +125,15 @@ static ssize_t merge_quant( assert(end > start); size_t next_ts = i + 1 != (size_t) len ? rill_store_ts(list[i + 1]) : -1UL; + + // Useful for debugging + /* printf("[%lu,%lu] file=%s, now=%lu(%lu), ts=%lu(%lu), next=%lu(%lu), earliest=%lu(%lu)\n", */ + /* quant, i, rill_store_file(list[i]), now, now / quant, */ + /* rill_store_ts(list[i]), rill_store_ts(list[i]) / quant, */ + /* i + 1 != (size_t) len ? rill_store_ts(list[i + 1]) : 0, */ + /* i + 1 != (size_t) len ? rill_store_ts(list[i + 1]) / quant : 0, */ + /* rill_store_ts(list[start]), rill_store_ts(list[start]) / quant); */ + if (next_ts / quant == current_quant) continue; rill_ts_t earliest_ts = rill_store_ts(list[start]); diff --git a/src/utils.c b/src/utils.c index 222f044..945b559 100644 --- a/src/utils.c +++ b/src/utils.c @@ -86,7 +86,7 @@ static bool is_rill_file(const char *name) size_t len = strnlen(name, NAME_MAX); if (len < sizeof(ext)) return false; - return !strcmp(name + (len - sizeof(ext) + 1), ext); + return strstr(name, ext); } size_t rill_scan_dir(const char *dir, struct rill_store **list, size_t cap) From 584f6842b048ceeac0bde284e0b63103ceda4d8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 11 Dec 2017 11:20:24 -0500 Subject: [PATCH 58/91] Add size asserts on index write Should help diagnose the source of a segfault when writting the index. --- src/index.c | 5 ++++- src/store.c | 4 +++- test/coder_test.c | 6 ++++-- test/indexer_test.c | 4 ++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/index.c b/src/index.c index da92200..19dbca8 100644 --- a/src/index.c +++ b/src/index.c @@ -64,11 +64,14 @@ static void indexer_put(struct indexer *indexer, rill_key_t key, uint64_t off) assert(indexer->len <= indexer->cap); } -static size_t indexer_write(struct indexer *indexer, struct index *index) +static size_t indexer_write( + struct indexer *indexer, struct index *index, size_t cap) { index->len = indexer->len; size_t len = indexer->len * sizeof(indexer->kvs[0]); + assert(len <= cap); + memcpy(index->data, indexer->kvs, len); return sizeof(*index) + len; diff --git a/src/store.c b/src/store.c index 4b38ad5..908d207 100644 --- a/src/store.c +++ b/src/store.c @@ -312,9 +312,11 @@ static void writer_close( struct rill_store *store, struct indexer *indexer, size_t len) { if (len) { + assert(len <= store->vma_len); + store->head->index_off = len; store->index = (void *) ((uintptr_t) store->vma + store->head->index_off); - len += indexer_write(indexer, store->index); + len += indexer_write(indexer, store->index, store->vma_len - len); if (ftruncate(store->fd, len) == -1) rill_fail_errno("unable to resize '%s'", store->file); diff --git a/test/coder_test.c b/test/coder_test.c index 8cff48d..7a6ebdd 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -144,7 +144,9 @@ void check_coder(struct rill_pairs *pairs) size_t cap = (pairs->len + 1) * (sizeof(pairs->data[0]) + 3); uint8_t *buffer = calloc(1, cap); struct vals *vals = vals_from_pairs(pairs); - struct index *index = calloc(1, sizeof(*index) + pairs->len * sizeof(index->data[0])); + + size_t index_cap = sizeof(struct index) + pairs->len * sizeof(struct index_kv); + struct index *index = calloc(1, index_cap); size_t len = 0; { @@ -157,7 +159,7 @@ void check_coder(struct rill_pairs *pairs) len = coder.it - buffer; assert(len <= cap); - indexer_write(indexer, index); + indexer_write(indexer, index, index_cap); indexer_free(indexer); } diff --git a/test/indexer_test.c b/test/indexer_test.c index db8c05f..edd106e 100644 --- a/test/indexer_test.c +++ b/test/indexer_test.c @@ -22,7 +22,7 @@ bool test_indexer_build(void) struct index *index = calloc(1, size); assert(index); - size_t n_written = indexer_write(indexer, index); + size_t n_written = indexer_write(indexer, index, size); assert(n_written == size); indexer_free(indexer); @@ -43,7 +43,7 @@ static struct index *make_index(rill_key_t *data, size_t n) indexer_put(indexer, data[i], i); struct index *index = calloc(1, indexer_cap(n)); - indexer_write(indexer, index); + indexer_write(indexer, index, indexer_cap(n)); indexer_free(indexer); return index; From 4a452a7d632410edaca9e915a713e2be587a2680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 11 Dec 2017 13:08:17 -0500 Subject: [PATCH 59/91] Fix coder cap estimates in edge cases --- src/coder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coder.c b/src/coder.c index 04ea21b..1494c20 100644 --- a/src/coder.c +++ b/src/coder.c @@ -69,7 +69,8 @@ static size_t coder_cap(size_t vals, size_t pairs) size_t bytes = 1; while (vals >= 1UL << (bytes * 7)) bytes++; - return bytes * (pairs + 1); + return (bytes + 1) // + 1 -> end-of-values terminator + * (pairs + 1); // + 1 -> end-of-pairs terminator } static uint64_t coder_off(struct encoder *coder) From e84b3601413e4c73a30e74349d8cbac11fd109e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 11 Dec 2017 13:11:31 -0500 Subject: [PATCH 60/91] Remove stamp version check 4 is no longer supported so meh --- src/store.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/store.c b/src/store.c index 908d207..a6c8e89 100644 --- a/src/store.c +++ b/src/store.c @@ -189,11 +189,9 @@ struct rill_store *rill_store_open(const char *file) goto fail_version; } - if (store->head->version >= 4) { - if (store->head->stamp != stamp) { - rill_fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); - goto fail_stamp; - } + if (store->head->stamp != stamp) { + rill_fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); + goto fail_stamp; } return store; From 802707bb59815ea71b53c7bb2f08f43a546a7dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Mon, 8 Jan 2018 15:18:05 -0500 Subject: [PATCH 61/91] Terminate pairs iteration in dump --- src/rill_dump.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rill_dump.c b/src/rill_dump.c index de9e462..34ff5cf 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -77,8 +77,10 @@ int main(int argc, char **argv) struct rill_store_it *it = rill_store_begin(store); printf("pairs:\n"); - while (rill_store_it_next(it, &kv)) + while (rill_store_it_next(it, &kv)) { + if (rill_kv_nil(&kv)) break; printf(" %p %p\n", (void *) kv.key, (void *) kv.val); + } rill_store_it_free(it); } From 5735d41b30e8ea2cebf80f1ca6d53a3287d9550c Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Tue, 23 Jan 2018 16:44:01 -0500 Subject: [PATCH 62/91] Add reverse lookup to rill - Fix store test - Fix coder test - Remove rill_invert - Remove keys from store header - Add query test - Simple sanity check to test querying col a and col b - Remove rill_query_all - Add rill space usage metrics in rill_dump --- compile.sh | 19 +- src/coder.c | 27 +-- src/index.c | 6 +- src/pairs.c | 12 +- src/query.c | 46 +--- src/rill.h | 48 ++-- src/rill_dump.c | 91 ++++--- src/rill_invert.c | 91 ------- src/rill_query.c | 8 +- src/store.c | 564 +++++++++++++++++++++++++++---------------- src/vals.c | 53 +--- test/coder_test.c | 165 ++++++++++--- test/query_test.c | 69 ++++++ test/rill_generate.c | 64 +++++ test/store_test.c | 119 +++++---- test/test.h | 2 + 16 files changed, 845 insertions(+), 539 deletions(-) delete mode 100644 src/rill_invert.c create mode 100644 test/query_test.c create mode 100644 test/rill_generate.c diff --git a/compile.sh b/compile.sh index 8027cf6..1ad531f 100755 --- a/compile.sh +++ b/compile.sh @@ -8,6 +8,10 @@ declare -a SRC SRC=(htable rng utils pairs store acc rotate query) CC=${OTHERC:-gcc} +LEAKCHECK_ENABLED=${LEAKCHECK_ENABLED:-} +LEAKCHECK=${OTHERMEMCHECK:-valgrind} +LEAKCHECK_ARGS="--leak-check=full --track-origins=yes --trace-children=yes --error-exitcode=1" + CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -I${PREFIX}/src" @@ -35,11 +39,24 @@ $CC -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS $CC -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS $CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS -$CC -o rill_invert "${PREFIX}/src/rill_invert.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS +$CC -o rill_generate "${PREFIX}/test/rill_generate.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder $CC -o test_store "${PREFIX}/test/store_test.c" librill.a $CFLAGS && ./test_store $CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS +$CC -o test_query "${PREFIX}/test/query_test.c" librill.a $CFLAGS && ./test_query + +if [ -n "$LEAKCHECK_ENABLED" ] +then + echo test_indexer ======================================= + $LEAKCHECK $LEAKCHECK_ARGS ./test_indexer + echo test_coder ========================================= + $LEAKCHECK $LEAKCHECK_ARGS ./test_coder + echo test_store ========================================= + $LEAKCHECK $LEAKCHECK_ARGS ./test_store + echo test_query ========================================= + $LEAKCHECK $LEAKCHECK_ARGS ./test_query +fi diff --git a/src/coder.c b/src/coder.c index 1494c20..94781ac 100644 --- a/src/coder.c +++ b/src/coder.c @@ -143,7 +143,10 @@ static void coder_close(struct encoder *coder) } static struct encoder make_encoder( - uint8_t *start, uint8_t *end, struct vals *vals, struct indexer *indexer) + uint8_t *start, + uint8_t *end, + struct vals *vals, + struct indexer *indexer) { struct encoder coder = { .it = start, .start = start, .end = end, @@ -166,8 +169,10 @@ struct decoder size_t keys; rill_key_t key; - struct vals *vals; + struct index *lookup; struct index *index; + + struct vals *vals; }; static inline bool coder_read_val(struct decoder *coder, rill_val_t *val) @@ -178,7 +183,7 @@ static inline bool coder_read_val(struct decoder *coder, rill_val_t *val) return false; } - if (*val) *val = vals_itov(coder->vals, *val); + if (*val) *val = coder->lookup->data[*val - 1].key; return true; } @@ -199,24 +204,16 @@ static bool coder_decode(struct decoder *coder, struct rill_kv *kv) return coder_read_val(coder, &kv->val); } -static struct decoder make_decoder( - uint8_t *it, uint8_t *end, struct vals *vals, struct index *index) -{ - return (struct decoder) { - .it = it, .end = end, - .vals = vals, - .index = index, - }; -} - static struct decoder make_decoder_at( uint8_t *it, uint8_t *end, - struct vals *vals, struct index *index, size_t key_idx) + struct index *lookup, + struct index *index, + size_t key_idx) { return (struct decoder) { .it = it, .end = end, .keys = key_idx, - .vals = vals, + .lookup = lookup, .index = index, }; } diff --git a/src/index.c b/src/index.c index 19dbca8..b3b5a92 100644 --- a/src/index.c +++ b/src/index.c @@ -65,15 +65,15 @@ static void indexer_put(struct indexer *indexer, rill_key_t key, uint64_t off) } static size_t indexer_write( - struct indexer *indexer, struct index *index, size_t cap) + struct indexer *indexer, struct index *index, + size_t cap) { index->len = indexer->len; - size_t len = indexer->len * sizeof(indexer->kvs[0]); + assert(len <= cap); memcpy(index->data, indexer->kvs, len); - return sizeof(*index) + len; } diff --git a/src/pairs.c b/src/pairs.c index db8acc0..c2d5275 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -74,7 +74,7 @@ struct rill_pairs *rill_pairs_reserve(struct rill_pairs *pairs, size_t cap) struct rill_pairs *rill_pairs_push( struct rill_pairs *pairs, rill_key_t key, rill_val_t val) { - assert(key && val); + assert(key && val && pairs); pairs = rill_pairs_reserve(pairs, pairs->len + 1); if (!pairs) return NULL; @@ -126,3 +126,13 @@ void rill_pairs_print(const struct rill_pairs *pairs) if (pairs->len) printf(" ]\n"); } + +void rill_pairs_invert(struct rill_pairs* pairs) +{ + for (size_t i = 0; i < pairs->len; ++i) { + pairs->data[i] = (struct rill_kv) { + .key = pairs->data[i].val, + .val = pairs->data[i].key, + }; + } +} diff --git a/src/query.c b/src/query.c index 2e7373f..6d60230 100644 --- a/src/query.c +++ b/src/query.c @@ -86,8 +86,10 @@ struct rill_pairs *rill_query_keys( struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_scan_keys(query->list[i], keys, len, result); - if (!result) return NULL; + for (size_t j = 0; i < len; ++j) { + result = rill_store_query_key(query->list[i], keys[j], result); + if (!result) return NULL; + } } rill_pairs_compact(result); @@ -118,8 +120,10 @@ struct rill_pairs *rill_query_vals( struct rill_pairs *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_scan_vals(query->list[i], sorted, len, result); - if (!result) goto fail_scan; + for (size_t j = 0; j < len; ++j) { + result = rill_store_query_value(query->list[i], sorted[j], result); + if (!result) goto fail_scan; + } } rill_pairs_compact(result); @@ -132,37 +136,3 @@ struct rill_pairs *rill_query_vals( // \todo potentially leaking result return NULL; } - -struct rill_pairs *rill_query_all(const struct rill_query *query) -{ - struct rill_pairs *result = rill_pairs_new(1); - for (size_t i = 0; i < query->len; ++i) { - - size_t pairs = rill_store_pairs(query->list[i]); - result = rill_pairs_reserve(result, result->len + pairs); - if (!result) goto fail_scan; - - struct rill_store_it *it = rill_store_begin(query->list[i]); - if (!it) goto fail_scan; - - struct rill_kv kv; - while (true) { - if (!rill_store_it_next(it, &kv)) { - rill_store_it_free(it); - goto fail_scan; - } - if (rill_kv_nil(&kv)) break; - - result = rill_pairs_push(result, kv.key, kv.val); - } - rill_store_it_free(it); - } - - rill_pairs_compact(result); - return result; - - fail_scan: - free(result); - return NULL; - -} diff --git a/src/rill.h b/src/rill.h index 920a0a4..8bd9e3e 100644 --- a/src/rill.h +++ b/src/rill.h @@ -90,20 +90,26 @@ void rill_pairs_compact(struct rill_pairs *pairs); void rill_pairs_print(const struct rill_pairs *pairs); +void rill_pairs_invert(struct rill_pairs* pairs); + // ----------------------------------------------------------------------------- // store // ----------------------------------------------------------------------------- +enum rill_col { rill_col_a = 0, rill_col_b = 1 }; + struct rill_store; struct rill_store_it; +struct rill_space; struct rill_store *rill_store_open(const char *file); void rill_store_close(struct rill_store *store); bool rill_store_write( const char *file, - rill_ts_t ts, size_t quant, + rill_ts_t ts, + size_t quant, struct rill_pairs *pairs); bool rill_store_merge( @@ -117,27 +123,30 @@ const char * rill_store_file(const struct rill_store *store); unsigned rill_store_version(const struct rill_store *store); rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); -size_t rill_store_keys(const struct rill_store *store); -size_t rill_store_vals(const struct rill_store *store); +size_t rill_store_keys_count(const struct rill_store *store, enum rill_col column); size_t rill_store_pairs(const struct rill_store *store); +size_t rill_store_index_len(const struct rill_store *store, enum rill_col col); + +struct rill_space* rill_store_space(struct rill_store *store); +size_t rill_store_space_header(struct rill_space *space); +size_t rill_store_space_index(struct rill_space *space, enum rill_col col); +size_t rill_store_space_pairs(struct rill_space *space, enum rill_col col); +void rill_space_free(struct rill_space* space); + + +struct rill_pairs *rill_store_query_value( + struct rill_store *store, rill_val_t val, struct rill_pairs *out); struct rill_pairs *rill_store_query_key( struct rill_store *store, rill_key_t key, struct rill_pairs *out); -struct rill_pairs *rill_store_scan_keys( - struct rill_store *store, - const rill_key_t *keys, size_t len, - struct rill_pairs *out); -struct rill_pairs *rill_store_scan_vals( - struct rill_store *store, - const rill_val_t *vals, size_t len, - struct rill_pairs *out); -size_t rill_store_dump_keys( - const struct rill_store *store, rill_val_t *out, size_t cap); -size_t rill_store_dump_vals( - const struct rill_store *store, rill_val_t *out, size_t cap); -struct rill_store_it *rill_store_begin(struct rill_store *store); +size_t rill_store_keys( + const struct rill_store *store, rill_val_t *out, size_t cap, + enum rill_col column); + +struct rill_store_it *rill_store_begin( + struct rill_store *store, enum rill_col column); void rill_store_it_free(struct rill_store_it *it); bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv); @@ -174,7 +183,9 @@ struct rill_query * rill_query_open(const char *dir); void rill_query_close(struct rill_query *db); struct rill_pairs *rill_query_key( - const struct rill_query *query, rill_key_t key, struct rill_pairs *out); + const struct rill_query *query, + rill_key_t key, + struct rill_pairs *out); struct rill_pairs *rill_query_keys( const struct rill_query *query, @@ -186,9 +197,6 @@ struct rill_pairs *rill_query_vals( const rill_val_t *vals, size_t len, struct rill_pairs *out); -struct rill_pairs *rill_query_all(const struct rill_query *query); - - // ----------------------------------------------------------------------------- // misc // ----------------------------------------------------------------------------- diff --git a/src/rill_dump.c b/src/rill_dump.c index 34ff5cf..b0623f8 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -12,79 +12,104 @@ void usage() { - fprintf(stderr, "rill_dump [-h] [-k] [-v] [-p] \n"); + fprintf(stderr, "rill_dump [-h] [-k] [-p] [-m] - \n"); exit(1); } int main(int argc, char **argv) { bool header = false; - bool keys = false; - bool vals = false; + bool key = false; bool pairs = false; + bool a = false; + bool b = false; + bool space = false; int opt = 0; - while ((opt = getopt(argc, argv, "+hkvp")) != -1) { + while ((opt = getopt(argc, argv, "+habkpm")) != -1) { switch (opt) { case 'h': header = true; break; - case 'k': keys = true; break; - case 'v': vals = true; break; + case 'k': key = true; break; case 'p': pairs = true; break; + case 'a': a = true; break; + case 'b': b = true; break; + case 'm': space = true; break; default: fprintf(stderr, "unknown argument: %c\n", opt); usage(); } } - if (!header && !keys && !vals && !pairs) usage(); + if (!header && !a && !b && !a && !pairs && !key && !space) usage(); if (optind >= argc) usage(); struct rill_store *store = rill_store_open(argv[optind]); if (!store) rill_exit(1); if (header) { - printf("file: %s\n", rill_store_file(store)); - printf("version: %u\n", rill_store_version(store)); - printf("ts: %lu\n", rill_store_ts(store)); - printf("quant: %lu\n", rill_store_quant(store)); - printf("keys: %lu\n", rill_store_keys(store)); - printf("vals: %lu\n", rill_store_vals(store)); - printf("pairs: %lu\n", rill_store_pairs(store)); + printf("file: %s\n", rill_store_file(store)); + printf("version: %u\n", rill_store_version(store)); + printf("ts: %lu\n", rill_store_ts(store)); + printf("quant: %lu\n", rill_store_quant(store)); + printf("keys data a: %zu\n", rill_store_keys_count(store, rill_col_a)); + printf("keys data b: %zu\n", rill_store_keys_count(store, rill_col_b)); + printf("pairs: %lu\n", rill_store_pairs(store)); + printf("index a len: %zu\n", rill_store_index_len(store, rill_col_a)); + printf("index b len: %zu\n", rill_store_index_len(store, rill_col_b)); } - if (keys) { - size_t key_len = rill_store_keys(store); - rill_key_t *keys = calloc(key_len, sizeof(*keys)); - (void) rill_store_dump_keys(store, keys, key_len); - - printf("keys:\n"); - for (size_t i = 0; i < key_len; ++i) - printf(" %p\n", (void *) keys[i]); + if ((key || pairs) && !a && !b) { + fprintf(stderr, "you need to specify column a or b\n"); + return -1; } - if (vals) { - size_t val_len = rill_store_vals(store); - rill_val_t *vals = calloc(val_len, sizeof(*vals)); - (void) rill_store_dump_vals(store, vals, val_len); + if (key) { + const enum rill_col col = a ? rill_col_a : rill_col_b; + const size_t keys_len = rill_store_keys_count(store, col); + rill_key_t *keys = calloc(keys_len, sizeof(*keys)); + + (void) rill_store_keys(store, keys, keys_len, col); + + printf("vals %c:\n", col ? 'b' : 'a'); - printf("vals:\n"); - for (size_t i = 0; i < val_len; ++i) - printf(" %p\n", (void *) vals[i]); + for (size_t i = 0; i < keys_len; ++i) + printf(" 0x%lx\n", keys[i]); } if (pairs) { - struct rill_kv kv; - struct rill_store_it *it = rill_store_begin(store); + struct rill_kv kv = {0}; + const enum rill_col col = a ? rill_col_a : rill_col_b; + struct rill_store_it *it = rill_store_begin(store, col); - printf("pairs:\n"); + printf("pairs %c:\n", a ? 'a' : 'b'); while (rill_store_it_next(it, &kv)) { if (rill_kv_nil(&kv)) break; - printf(" %p %p\n", (void *) kv.key, (void *) kv.val); + printf(" 0x%lx 0x%lx\n", kv.key, kv.val); } rill_store_it_free(it); } + if (space) { + struct rill_space* space = rill_store_space(store); + + printf( + "size stats : %s\n" + "header size : %zu\n" + "index a size: %zu\n" + "index b size: %zu\n" + "data a size : %zu\n" + "data b size : %zu\n", + rill_store_file(store), + rill_store_space_header(space), + rill_store_space_index(space, rill_col_a), + rill_store_space_index(space, rill_col_b), + rill_store_space_pairs(space, rill_col_a), + rill_store_space_pairs(space, rill_col_b)); + + free(space); + } + rill_store_close(store); return 0; } diff --git a/src/rill_invert.c b/src/rill_invert.c deleted file mode 100644 index a08146c..0000000 --- a/src/rill_invert.c +++ /dev/null @@ -1,91 +0,0 @@ -/* rill_invert.c - Rémi Attab (remi.attab@gmail.com), 02 Nov 2017 - FreeBSD-style copyright and disclaimer apply -*/ - -#include "rill.h" -#include "utils.h" - -#include -#include -#include - -enum { shards = 10 }; - -static struct rill_pairs *make_pairs(struct rill_store *store) -{ - size_t cap = (rill_store_pairs(store) - 1) / shards + 1; - size_t len = sizeof(struct rill_pairs) + cap * sizeof(struct rill_kv); - len = (len / page_len + 1) * page_len; - - // glibc's malloc chokes on large allocation for whatever reason... - struct rill_pairs *pairs = - mmap(0, len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (pairs == MAP_FAILED) { - rill_fail_errno("unable mmap '%p' bytes", (void *) len); - rill_exit(1); - } - - pairs->cap = cap; - return pairs; -} - -int main(int argc, const char **argv) -{ - if (argc != 3) { - fprintf(stderr, "rill_invert \n"); - exit(1); - } - - fprintf(stderr, "opening '%s'...\n", argv[1]); - struct rill_store *src = rill_store_open(argv[1]); - if (!src) rill_exit(1); - - struct rill_pairs *pairs = make_pairs(src); - - size_t shard_i = 0; - struct rill_store *shard[shards] = {0}; - - rill_ts_t ts = rill_store_ts(src), quant = rill_store_quant(src); - - fprintf(stderr, "reading '%lu' pairs in shards of '%lu'...\n", - rill_store_pairs(src), pairs->cap); - - struct rill_kv kv; - struct rill_store_it *it = rill_store_begin(src); - - printf("%lu: reading...\n", shard_i); - - do { - if (!rill_store_it_next(it, &kv)) rill_exit(1); - if (!rill_kv_nil(&kv)) - pairs = rill_pairs_push(pairs, kv.val, kv.key); - - if (pairs->len == pairs->cap || rill_kv_nil(&kv)) { - printf("%lu: compacting...\n", shard_i); - - rill_pairs_compact(pairs); - - char path[256]; - snprintf(path, sizeof(path), "%s.%lu", argv[2], shard_i); - printf("%lu: writing '%s'...\n", shard_i, path); - - if (!rill_store_write(path, ts, quant, pairs)) rill_exit(1); - - shard[shard_i] = rill_store_open(path); - if (!shard[shard_i]) rill_exit(1); - - shard_i++; - rill_pairs_clear(pairs); - printf("%lu: reading...\n", shard_i); - } - - } while (!rill_kv_nil(&kv)); - - fprintf(stderr, "merging to '%s'...\n", argv[2]); - if (!rill_store_merge(argv[2], ts, quant, shard, shards)) rill_exit(1); - - for (size_t i = 0; i < shards; ++i) rill_store_rm(shard[i]); - - return 0; -} diff --git a/src/rill_query.c b/src/rill_query.c index a265833..aa2eb00 100644 --- a/src/rill_query.c +++ b/src/rill_query.c @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) if (!store) rill_exit(1); if (key) pairs = rill_store_query_key(store, key, pairs); - else pairs = rill_store_scan_vals(store, &val, 1, pairs); + else pairs = rill_store_query_value(store, val, pairs); rill_store_close(store); } @@ -100,10 +100,8 @@ int main(int argc, char *argv[]) if (!pairs) rill_exit(1); - for (size_t i = 0; i < pairs->len; ++i) { - if (key) printf("%p\n", (void *) pairs->data[i].val); - else printf("%p\n", (void *) pairs->data[i].key); - } + for (size_t i = 0; i < pairs->len; ++i) + printf("0x%lx 0x%lx\n", pairs->data[i].key, pairs->data[i].val); rill_pairs_free(pairs); return 0; diff --git a/src/store.c b/src/store.c index a6c8e89..d84f07c 100644 --- a/src/store.c +++ b/src/store.c @@ -28,15 +28,17 @@ #include "index.c" #include "coder.c" - // ----------------------------------------------------------------------------- // store // ----------------------------------------------------------------------------- +/* version 6 introduces reverse lookup, and massive db format changes */ +static const uint32_t version = 6; + static const uint32_t magic = 0x4C4C4952; static const uint64_t stamp = 0xFFFFFFFFFFFFFFFFUL; -static const uint32_t version = 5; -static const uint32_t supported_versions[] = { 5 }; +/* version 6 can not support older dbs -- they'll need to be updated */ +static const uint32_t supported_versions[] = { 6 }; struct rill_packed header { @@ -46,14 +48,15 @@ struct rill_packed header uint64_t ts; uint64_t quant; - uint64_t keys; uint64_t pairs; - uint64_t vals_off; - uint64_t data_off; - uint64_t index_off; + uint64_t data_a_off; + uint64_t data_b_off; + + uint64_t index_a_off; + uint64_t index_b_off; - uint64_t reserved[3]; // for future use + uint64_t reserved[2]; uint64_t stamp; }; @@ -68,42 +71,82 @@ struct rill_store struct header *head; struct vals *vals; - uint8_t *data; - struct index *index; + + uint8_t *data_a; + uint8_t *data_b; + struct index *index_a; + struct index *index_b; uint8_t *end; }; +struct rill_space +{ + size_t header_bytes; + size_t index_bytes[2]; + size_t pairs_bytes[2]; +}; + // ----------------------------------------------------------------------------- // coder // ----------------------------------------------------------------------------- static struct encoder store_encoder( - struct rill_store *store, struct indexer *indexer) + struct rill_store *store, + struct indexer *indexer, + struct vals* vals, + uint64_t offset) { return make_encoder( - store->vma + store->head->data_off, - store->vma + store->vma_len, - store->vals, indexer); -} - -static struct decoder store_decoder(struct rill_store *store) -{ - return make_decoder( - store->vma + store->head->data_off, + store->vma + offset, store->vma + store->vma_len, - store->vals, store->index); + vals, + indexer); } static struct decoder store_decoder_at( - struct rill_store *store, size_t key_idx, uint64_t off) + struct rill_store *store, + size_t key_idx, + uint64_t curr_off, + enum rill_col column) { + size_t offset = 0; + size_t offset_end = 0; + struct index* lookup = NULL; + struct index* index = NULL; + + switch (column) { + case rill_col_a: + lookup = store->index_b; + index = store->index_a; + offset = store->head->data_a_off; + offset_end = store->head->data_b_off; + break; + case rill_col_b: + lookup = store->index_a; + index = store->index_b; + offset = store->head->data_b_off; + offset_end = store->vma_len; + break; + default: + rill_fail("improper rill col passed: %d", column); + break; + } + return make_decoder_at( - store->vma + store->head->data_off + off, - store->vma + store->vma_len, - store->vals, store->index, key_idx); + store->vma + offset + curr_off, + store->vma + offset_end, + lookup, + index, + key_idx); } +static struct decoder store_decoder( + struct rill_store *store, + enum rill_col column) +{ + return store_decoder_at(store, 0, 0, column); +} // ----------------------------------------------------------------------------- // vma @@ -174,9 +217,10 @@ struct rill_store *rill_store_open(const char *file) } store->head = store->vma; - store->vals = (void *) ((uintptr_t) store->vma + store->head->vals_off); - store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); - store->index = (void *) ((uintptr_t) store->vma + store->head->index_off); + store->index_a = (void *) ((uintptr_t) store->vma + store->head->index_a_off); + store->index_b = (void *) ((uintptr_t) store->vma + store->head->index_b_off); + store->data_a = (void *) ((uintptr_t) store->vma + store->head->data_a_off); + store->data_b = (void *) ((uintptr_t) store->vma + store->head->data_b_off); store->end = (void *) ((uintptr_t) store->vma + store->vma_len); if (store->head->magic != magic) { @@ -185,12 +229,12 @@ struct rill_store *rill_store_open(const char *file) } if (!is_supported_version(store->head->version)) { - rill_fail("invalid version '%du' for '%s'", store->head->version, file); + rill_fail("invalid version '%u' for '%s'", store->head->version, file); goto fail_version; } if (store->head->stamp != stamp) { - rill_fail("invalid stamp '%p' for '%s'", (void *) store->head->stamp, file); + rill_fail("invalid stamp '%lx' for '%s'", store->head->stamp, file); goto fail_stamp; } @@ -239,8 +283,11 @@ bool rill_store_rm(struct rill_store *store) static bool writer_open( struct rill_store *store, const char *file, - struct vals *vals, size_t pairs, - rill_ts_t ts, size_t quant) + struct vals *vals, + struct vals *inverted_vals, + size_t pairs, + rill_ts_t ts, + size_t quant) { store->file = file; @@ -252,9 +299,10 @@ static bool writer_open( size_t len = sizeof(struct header) + - vals_cap(vals) + + indexer_cap(inverted_vals->len) + + indexer_cap(vals->len) + coder_cap(vals->len, pairs) + - indexer_cap(pairs); + coder_cap(inverted_vals->len, pairs); if (ftruncate(store->fd, len) == -1) { rill_fail_errno("unable to resize '%s'", file); @@ -269,7 +317,6 @@ static bool writer_open( } store->head = store->vma; - store->vals = (void *) ((uintptr_t) store->vma + sizeof(struct header)); store->end = (void *) ((uintptr_t) store->vma + store->vma_len); *store->head = (struct header) { @@ -277,7 +324,6 @@ static bool writer_open( .version = version, .ts = ts, .quant = quant, - .vals_off = sizeof(struct header), }; return true; @@ -290,32 +336,23 @@ static bool writer_open( return false; } -static struct encoder writer_begin( - struct rill_store *store, - const struct vals *vals, - struct indexer *indexer) +static void writer_flush_indices( + struct rill_store *store, + struct indexer *indexer_a, + struct indexer *indexer_b) { - size_t len = sizeof(*vals) + sizeof(vals->data[0]) * vals->len; - assert(store->head->vals_off + len < store->vma_len); - - memcpy(store->vals, vals, len); + const size_t indexer_a_size = indexer_cap(indexer_a->len); + const size_t indexer_b_size = indexer_cap(indexer_b->len); - store->head->data_off = store->head->vals_off + len; - store->data = (void *) ((uintptr_t) store->vma + store->head->data_off); - - return store_encoder(store, indexer); + indexer_write(indexer_a, store->index_a, indexer_a_size); + indexer_write(indexer_b, store->index_b, indexer_b_size); } static void writer_close( - struct rill_store *store, struct indexer *indexer, size_t len) + struct rill_store *store, size_t len) { if (len) { assert(len <= store->vma_len); - - store->head->index_off = len; - store->index = (void *) ((uintptr_t) store->vma + store->head->index_off); - len += indexer_write(indexer, store->index, store->vma_len - len); - if (ftruncate(store->fd, len) == -1) rill_fail_errno("unable to resize '%s'", store->file); @@ -330,7 +367,6 @@ static void writer_close( store->head->stamp = stamp; if (fdatasync(store->fd) == -1) rill_fail_errno("unable to fdatasync stamp '%s'", store->file); - } else if (unlink(store->file) == -1) rill_fail_errno("unable to unlink '%s'", store->file); @@ -339,99 +375,176 @@ static void writer_close( close(store->fd); } +static void init_store_offsets( + struct rill_store* store, size_t vals, size_t inverse_vals) +{ + store->head->index_a_off = sizeof(struct header); + store->head->index_b_off = store->head->index_a_off + indexer_cap(inverse_vals); + store->head->data_a_off = store->head->index_b_off + indexer_cap(vals); + + store->index_a = (void *) ((uintptr_t) store->vma + store->head->index_a_off); + store->index_b = (void *) ((uintptr_t) store->vma + store->head->index_b_off); + store->data_a = (void *) ((uintptr_t) store->vma + store->head->data_a_off); +} + +static void prepare_col_b_offsets( + struct rill_store* store, struct encoder* coder_a) +{ + store->head->data_b_off = store->head->data_a_off + coder_off(coder_a); + store->data_b = (void *) ((uintptr_t) store->vma + store->head->data_b_off); +} + bool rill_store_write( const char *file, - rill_ts_t ts, size_t quant, + rill_ts_t ts, + size_t quant, struct rill_pairs *pairs) { rill_pairs_compact(pairs); if (!pairs->len) return true; - struct vals *vals = vals_from_pairs(pairs); + struct vals *vals = vals_cols_from_pairs(pairs, rill_col_b); if (!vals) goto fail_vals; + struct vals *invert_vals = vals_cols_from_pairs(pairs, rill_col_a); + if (!invert_vals) goto fail_invert_vals; struct rill_store store = {0}; - if (!writer_open(&store, file, vals, pairs->len, ts, quant)) { + if (!writer_open(&store, file, vals, invert_vals, + pairs->len, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } - struct indexer *indexer = indexer_alloc(pairs->len); - struct encoder coder = writer_begin(&store, vals, indexer); + struct indexer *indexer_a = indexer_alloc(invert_vals->len); + if (!indexer_a) goto fail_indexer_a_alloc; + struct indexer *indexer_b = indexer_alloc(vals->len); + if (!indexer_b) goto fail_indexer_b_alloc; + + init_store_offsets(&store, vals->len, invert_vals->len); + + struct encoder coder_a = + store_encoder(&store, indexer_a, vals, store.head->data_a_off); for (size_t i = 0; i < pairs->len; ++i) { - if (!coder_encode(&coder, &pairs->data[i])) goto fail_encode; + if (!coder_encode(&coder_a, &pairs->data[i])) goto fail_encode_a; } - if (!coder_finish(&coder)) goto fail_encode; + if (!coder_finish(&coder_a)) goto fail_encode_a; - store.head->keys = coder.keys; - store.head->pairs = coder.pairs; + prepare_col_b_offsets(&store, &coder_a); - writer_close(&store, indexer, store.head->data_off + coder_off(&coder)); + struct encoder coder_b = + store_encoder(&store, indexer_b, invert_vals, store.head->data_b_off); + + rill_pairs_invert(pairs); + rill_pairs_compact(pairs); /* recompact mainly for sort */ + + for (size_t i = 0; i < pairs->len; ++i) { + if (!coder_encode(&coder_b, &pairs->data[i])) goto fail_encode_b; + } + if (!coder_finish(&coder_b)) goto fail_encode_b; + + writer_flush_indices(&store, indexer_a, indexer_b); + + store.head->pairs = coder_a.pairs; + + writer_close(&store, store.head->data_b_off + coder_off(&coder_b)); + + coder_close(&coder_a); + coder_close(&coder_b); + + indexer_free(indexer_a); + indexer_free(indexer_b); - coder_close(&coder); - indexer_free(indexer); free(vals); + free(invert_vals); + return true; - fail_encode: - coder_close(&coder); - writer_close(&store, indexer, 0); - indexer_free(indexer); + fail_encode_b: + coder_close(&coder_b); + fail_encode_a: + coder_close(&coder_a); + indexer_free(indexer_b); + fail_indexer_b_alloc: + indexer_free(indexer_a); + fail_indexer_a_alloc: + writer_close(&store, 0); + free(invert_vals); + fail_invert_vals: fail_open: free(vals); fail_vals: return false; } -bool rill_store_merge( - const char *file, - rill_ts_t ts, size_t quant, - struct rill_store **list, size_t list_len) +static struct vals *vals_merge_from_index( + struct vals *vals, struct index *merge) { - assert(list_len > 1); + assert(merge); + + if (!vals) { + size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; + vals = calloc(1, len); + if (!vals) { + rill_fail("unable to allocate memory for vals: %lu", merge->len); + return NULL; + } - size_t keys = 0; - size_t pairs = 0; - struct vals *vals = NULL; + for (size_t i = 0; i < merge->len; ++i) + vals->data[i] = merge->data[i].key; - struct decoder decoders[list_len]; + vals->len = merge->len; + + return vals; + } + + vals = realloc(vals, + sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); + + if (!vals) { + rill_fail("unable to allocate memory for vals: %lu + %lu", + vals->len, merge->len); + return NULL; + } + + for (size_t i = 0; i < merge->len; ++i) + vals->data[vals->len + i] = merge->data[i].key; + + vals->len += merge->len; + + vals_compact(vals); + + return vals; +} + + +static bool merge_with_config( + struct encoder* coder, + struct rill_store** list, + size_t list_len, + enum rill_col col) +{ struct rill_kv kvs[list_len]; + struct decoder decoders[list_len]; + size_t it_len = 0; for (size_t i = 0; i < list_len; ++i) { if (!list[i]) continue; - vma_will_need(list[i]); - - struct vals *ret = vals_merge(vals, list[i]->vals); - if (ret) { vals = ret; } else { goto fail_vals; } - - decoders[it_len] = store_decoder(list[i]); - pairs += list[i]->head->pairs; - keys += list[i]->head->keys; + decoders[it_len] = store_decoder(list[i], col); it_len++; } assert(it_len); - struct rill_store store = {0}; - if (!writer_open(&store, file, vals, pairs, ts, quant)) { - rill_fail("unable to create '%s'", file); - goto fail_open; - } - - struct indexer *indexer = indexer_alloc(keys); - if (!indexer) goto fail_index; - - struct encoder encoder = writer_begin(&store, vals, indexer); - for (size_t i = 0; i < it_len; ++i) { - if (!(coder_decode(&decoders[i], &kvs[i]))) goto fail_coder; + if (!(coder_decode(&decoders[i], &kvs[i]))) goto fail_decoder; } struct rill_kv prev = {0}; while (it_len > 0) { size_t target = 0; + for (size_t i = 1; i < it_len; ++i) { if (rill_kv_cmp(&kvs[i], &kvs[target]) < 0) target = i; @@ -439,12 +552,13 @@ bool rill_store_merge( struct rill_kv *kv = &kvs[target]; struct decoder *decoder = &decoders[target]; + if (rill_likely(rill_kv_nil(&prev) || rill_kv_cmp(&prev, kv) < 0)) { - if (!coder_encode(&encoder, kv)) goto fail_coder; + if (!coder_encode(coder, kv)) goto fail_decoder; prev = *kv; } - if (!coder_decode(decoder, kv)) goto fail_coder; + if (!coder_decode(decoder, kv)) goto fail_decoder; if (rill_unlikely(rill_kv_nil(kv))) { memmove(kvs + target, kvs + target + 1, @@ -456,27 +570,94 @@ bool rill_store_merge( } } - store.head->keys = encoder.keys; - store.head->pairs = encoder.pairs; + return true; - if (!coder_finish(&encoder)) goto fail_coder; - writer_close(&store, indexer, store.head->data_off + coder_off(&encoder)); + fail_decoder: + return false; +} + +bool rill_store_merge( + const char *file, + rill_ts_t ts, size_t quant, + struct rill_store **list, size_t list_len) +{ + assert(list_len > 1); + + size_t pairs = 0; + struct vals *vals = NULL; + struct vals *invert_vals = NULL; for (size_t i = 0; i < list_len; ++i) { - if (list[i]) vma_dont_need(list[i]); + if (!list[i]) continue; + vma_will_need(list[i]); + + struct vals *ret = vals_merge_from_index(vals, list[i]->index_b); + struct vals *iret = vals_merge_from_index(invert_vals, list[i]->index_a); + + pairs += list[i]->head->pairs; + + if (ret) vals = ret; else goto fail_vals; + if (iret) invert_vals = iret; else goto fail_invert_vals; } - coder_close(&encoder); - indexer_free(indexer); + struct rill_store store = {0}; + if (!writer_open(&store, file, vals, invert_vals, + pairs, ts, quant)) { + rill_fail("unable to create '%s'", file); + goto fail_open; + } + + init_store_offsets(&store, vals->len, invert_vals->len); + + struct indexer *indexer_a = indexer_alloc(invert_vals->len); + if (!indexer_a) goto fail_index_a; + + struct indexer *indexer_b = indexer_alloc(vals->len); + if (!indexer_b) goto fail_index_b; + + struct encoder encoder_a = store_encoder(&store, indexer_a, vals, store.head->data_a_off); + if (!merge_with_config(&encoder_a, list, list_len, rill_col_a)) goto fail_merge_with_config_a; + if (!coder_finish(&encoder_a)) goto fail_coder_a; + + prepare_col_b_offsets(&store, &encoder_a); + + struct encoder encoder_b = + store_encoder(&store, indexer_b, invert_vals, store.head->data_b_off); + + if (!merge_with_config(&encoder_b, list, list_len, rill_col_b)) goto fail_merge_with_config_b; + if (!coder_finish(&encoder_b)) goto fail_coder_b; + + writer_flush_indices(&store, indexer_a, indexer_b); + + store.head->pairs = encoder_a.pairs; + + writer_close(&store, store.head->data_b_off + coder_off(&encoder_b)); + + for (size_t i = 0; i < list_len; ++i) + if (list[i]) vma_dont_need(list[i]); + + coder_close(&encoder_a); + coder_close(&encoder_b); + indexer_free(indexer_a); + indexer_free(indexer_b); free(vals); + free(invert_vals); return true; - fail_coder: - coder_close(&encoder); - writer_close(&store, indexer, 0); - indexer_free(indexer); - fail_index: + fail_coder_b: + coder_close(&encoder_b); + fail_coder_a: + fail_merge_with_config_a: + coder_close(&encoder_a); + writer_close(&store, 0); + indexer_free(indexer_a); + fail_merge_with_config_b: + fail_index_b: + free(indexer_a); + fail_index_a: fail_open: + fail_invert_vals: + free(invert_vals); fail_vals: free(vals); return false; @@ -507,14 +688,12 @@ size_t rill_store_quant(const struct rill_store *store) return store->head->quant; } -size_t rill_store_keys(const struct rill_store *store) +size_t rill_store_keys_count(const struct rill_store *store, enum rill_col column) { - return store->index->len; -} - -size_t rill_store_vals(const struct rill_store *store) -{ - return store->vals->len; + assert(column == rill_col_a || column == rill_col_b); + const struct index* + ix = column == rill_col_a ? store->index_a : store->index_b; + return ix->len; } size_t rill_store_pairs(const struct rill_store *store) @@ -522,18 +701,28 @@ size_t rill_store_pairs(const struct rill_store *store) return store->head->pairs; } +size_t rill_store_index_len(const struct rill_store *store, enum rill_col col) +{ + assert(col == rill_col_a || col == rill_col_b); + return col == rill_col_a ? store->index_a->len : store->index_b->len; +} -struct rill_pairs *rill_store_query_key( - struct rill_store *store, rill_key_t key, struct rill_pairs *out) +static struct rill_pairs *store_query_key_or_value( + struct rill_store *store, + rill_key_t key, + struct rill_pairs *out, + enum rill_col column) { struct rill_pairs *result = out; - size_t key_idx = 0; uint64_t off = 0; - if (!index_find(store->index, key, &key_idx, &off)) return result; + struct index *ix = + column == rill_col_a ? store->index_a : store->index_b; + + if (!index_find(ix, key, &key_idx, &off)) return result; struct rill_kv kv = {0}; - struct decoder coder = store_decoder_at(store, key_idx, off); + struct decoder coder = store_decoder_at(store, key_idx, off, column); while (true) { if (!coder_decode(&coder, &kv)) goto fail; @@ -551,92 +740,31 @@ struct rill_pairs *rill_store_query_key( return NULL; } - -struct rill_pairs *rill_store_scan_keys( - struct rill_store *store, - const rill_key_t *keys, size_t len, - struct rill_pairs *out) +struct rill_pairs *rill_store_query_key( + struct rill_store *store, rill_val_t key, struct rill_pairs *out) { - vma_will_need(store); - - struct rill_kv kv = {0}; - struct rill_pairs *result = out; - struct decoder coder = store_decoder(store); - - while (true) { - if (!coder_decode(&coder, &kv)) goto fail; - if (rill_kv_nil(&kv)) break; - - for (size_t j = 0; j < len; ++j) { - if (kv.key != keys[j]) continue; - - result = rill_pairs_push(result, kv.key, kv.val); - if (!result) goto fail; - } - } - - vma_dont_need(store); - return result; - - fail: - // \todo potentially leaking result - vma_dont_need(store); - return NULL; + return store_query_key_or_value(store, key, out, rill_col_a); } -struct rill_pairs *rill_store_scan_vals( - struct rill_store *store, - const rill_val_t *vals, size_t len, - struct rill_pairs *out) +struct rill_pairs *rill_store_query_value( + struct rill_store *store, rill_val_t key, struct rill_pairs *out) { - if (!vals_contains(store->vals, vals, len)) return out; - - vma_will_need(store); - - struct rill_kv kv = {0}; - struct rill_pairs *result = out; - struct decoder coder = store_decoder(store); - - size_t i = 0; - rill_key_t current = 0; - - while (true) { - if (!coder_decode(&coder, &kv)) goto fail; - if (rill_kv_nil(&kv)) break; - - if (current != kv.key) { i = 0; current = kv.key; } - while (i < len && vals[i] < kv.val) ++i; - - if (vals[i] == kv.val) { - result = rill_pairs_push(result, kv.key, kv.val); - if (!result) goto fail; - } - } - - vma_dont_need(store); - return result; - - fail: - // \todo potentially leaking result - vma_dont_need(store); - return NULL; + return store_query_key_or_value(store, key, out, rill_col_b); } -size_t rill_store_dump_vals( - const struct rill_store *store, rill_val_t *out, size_t cap) +size_t rill_store_keys( + const struct rill_store *store, rill_key_t *out, size_t cap, + enum rill_col column) { - size_t len = cap < store->vals->len ? cap : store->vals->len; - memcpy(out, store->vals->data, len * sizeof(*out)); - return len; -} + assert(column == rill_col_a || column == rill_col_b); -size_t rill_store_dump_keys( - const struct rill_store *store, rill_key_t *out, size_t cap) -{ - size_t len = cap < store->index->len ? cap : store->index->len; + const struct index* ix = + column == rill_col_a ? store->index_a : store->index_b; + + size_t len = cap < ix->len ? cap : ix->len; for (size_t i = 0; i < len; ++i) - out[i] = store->index->data[i].key; + out[i] = ix->data[i].key; return len; } @@ -644,12 +772,13 @@ size_t rill_store_dump_keys( struct rill_store_it { struct decoder decoder; }; -struct rill_store_it *rill_store_begin(struct rill_store *store) +struct rill_store_it *rill_store_begin( + struct rill_store *store, enum rill_col column) { struct rill_store_it *it = calloc(1, sizeof(*it)); if (!it) return NULL; - it->decoder = store_decoder(store); + it->decoder = store_decoder(store, column); return it; } @@ -662,3 +791,30 @@ bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv) { return coder_decode(&it->decoder, kv); } + +struct rill_space* rill_store_space(struct rill_store* store) +{ + struct rill_space *ret = calloc(1, sizeof(*ret)); + + *ret = (struct rill_space) { + .header_bytes = sizeof(*store->head), + .index_bytes[rill_col_a] = store->head->index_b_off - store->head->index_a_off, + .index_bytes[rill_col_b] = store->head->data_a_off - store->head->index_b_off, + .pairs_bytes[rill_col_a] = store->head->data_b_off - store->head->data_a_off, + .pairs_bytes[rill_col_b] = store->vma_len - store->head->data_b_off, + }; + + return ret; +} + +size_t rill_store_space_header(struct rill_space* space) { + return space->header_bytes; +} +size_t rill_store_space_index(struct rill_space* space, enum rill_col col) { + assert(col == rill_col_a || col == rill_col_b); + return space->index_bytes[col]; +} +size_t rill_store_space_pairs(struct rill_space* space, enum rill_col col) { + assert(col == rill_col_a || col == rill_col_b); + return space->pairs_bytes[col]; +} diff --git a/src/vals.c b/src/vals.c index bb078d9..543ddc5 100644 --- a/src/vals.c +++ b/src/vals.c @@ -15,17 +15,6 @@ struct rill_packed vals typedef struct htable vals_rev_t; -static size_t vals_cap(struct vals *vals) -{ - return sizeof(*vals) + vals->len * sizeof(vals->data[0]); -} - -static rill_val_t vals_itov(struct vals *vals, size_t index) -{ - assert(index <= vals->len); - return vals->data[index - 1]; -} - static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) { if (!val) return 0; // \todo giant hack for coder_finish @@ -71,53 +60,17 @@ static void vals_compact(struct vals *vals) vals->len = j + 1; } -static struct vals *vals_from_pairs(struct rill_pairs *pairs) +static struct vals *vals_cols_from_pairs(struct rill_pairs *pairs, enum rill_col col) { struct vals *vals = calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * pairs->len); + if (!vals) return NULL; vals->len = pairs->len; for (size_t i = 0; i < pairs->len; ++i) - vals->data[i] = pairs->data[i].val; - - vals_compact(vals); - return vals; -} - -static struct vals *vals_merge(struct vals *vals, struct vals *merge) -{ - if (!vals) { - size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; - vals = calloc(1, len); - memcpy(vals, merge, len); - return vals; - } - - vals = realloc(vals, - sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); - if (!vals) { - rill_fail("unable to allocate memory for vals: %lu + %lu", - vals->len, merge->len); - return NULL; - } - - memcpy( vals->data + vals->len, - merge->data, - sizeof(merge->data[0]) * merge->len); - vals->len += merge->len; + vals->data[i] = col == rill_col_a ? pairs->data[i].key : pairs->data[i].val; vals_compact(vals); return vals; } - -static bool vals_contains( - struct vals *vals, const rill_val_t *items, size_t len) -{ - for (size_t i = 0, j = 0; i < len; ++i) { - while (j < vals->len && vals->data[j] < items[i]) j++; - if (vals->data[j] == items[i]) return true; - } - - return false; -} diff --git a/test/coder_test.c b/test/coder_test.c index 7a6ebdd..f3a0b14 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -64,6 +64,16 @@ bool test_leb128(void) make_vals_impl(vals, sizeof(vals) / sizeof(vals[0])); \ }) +#define make_index(...) \ + ({ \ + rill_val_t kvs[] = { __VA_ARGS__ }; \ + size_t len = sizeof(kvs) / sizeof(kvs[0]); \ + struct indexer* indexer = indexer_alloc(len); \ + for (size_t i = 0; i < len; ++i) \ + indexer_put(indexer, kvs[i], 1); \ + make_index_impl(indexer, len); \ + }) + static struct vals *make_vals_impl(rill_val_t *list, size_t len) { struct vals *vals = calloc(1, sizeof(struct vals) + sizeof(list[0]) * len); @@ -75,9 +85,18 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) return vals; } +static struct index* make_index_impl(struct indexer* indexer, size_t len) +{ + const size_t cap = indexer_cap(len); + struct index* index = calloc(1, cap); + indexer_write(indexer, index, cap); + indexer_free(indexer); + return index; +} + static void check_vals(struct rill_pairs *pairs, struct vals *exp) { - struct vals *vals = vals_from_pairs(pairs); + struct vals *vals = vals_cols_from_pairs(pairs, rill_col_b); assert(vals->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -88,7 +107,7 @@ static void check_vals(struct rill_pairs *pairs, struct vals *exp) for (size_t i = 0; i < exp->len; ++i) { size_t index = vals_vtoi(&rev, exp->data[i]); - assert(vals_itov(vals, index) == exp->data[i]); + assert(vals->data[index - 1] == exp->data[i]); } free(vals); @@ -97,9 +116,9 @@ static void check_vals(struct rill_pairs *pairs, struct vals *exp) htable_reset(&rev); } -static void check_vals_merge(struct vals *a, struct vals *b, struct vals *exp) +static void check_vals_merge(struct vals *a, struct index *b, struct vals *exp) { - struct vals *result = vals_merge(a, b); + struct vals *result = vals_merge_from_index(a, b); assert(result->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -123,11 +142,14 @@ bool test_vals(void) check_vals(make_pair(kv(2, 20), kv(1, 10)), make_vals(10, 20)); check_vals(make_pair(kv(1, 20), kv(1, 10)), make_vals(10, 20)); - check_vals_merge(make_vals(10), make_vals(10), make_vals(10)); - check_vals_merge(make_vals(10), make_vals(20), make_vals(10, 20)); + check_vals_merge(make_vals(10), make_index(10), make_vals(10)); + check_vals_merge(make_vals(10), make_index(20), make_vals(10, 20)); - check_vals_merge(make_vals(10, 20), make_vals(20), make_vals(10, 20)); - check_vals_merge(make_vals(10, 20), make_vals(20, 30), make_vals(10, 20, 30)); + check_vals_merge(NULL, make_index(10, 20), make_vals(10, 20)); + check_vals_merge(make_vals(10, 20), make_index(20), make_vals(10, 20)); + check_vals_merge(make_vals(10, 20), make_index(20, 30), make_vals(10, 20, 30)); + check_vals_merge(make_vals(10, 20), make_index(20, 30, 40, 50, 60), + make_vals(10, 20, 30, 40, 50, 60)); return true; } @@ -141,29 +163,60 @@ void check_coder(struct rill_pairs *pairs) { rill_pairs_compact(pairs); - size_t cap = (pairs->len + 1) * (sizeof(pairs->data[0]) + 3); + struct rill_pairs *inverted = rill_pairs_new(pairs->len); + for (size_t i = 0; i < pairs->len; ++i) + rill_pairs_push(inverted, pairs->data[i].val, pairs->data[i].key); + rill_pairs_compact(inverted); + + size_t cap = + (pairs->len + 1) * (sizeof(pairs->data[0]) + 3) + + (inverted->len + 1) * (sizeof(inverted->data[0]) + 3); + uint8_t *buffer = calloc(1, cap); - struct vals *vals = vals_from_pairs(pairs); + struct vals *vals_a = vals_cols_from_pairs(pairs, rill_col_b); + struct vals *vals_b = vals_cols_from_pairs(inverted, rill_col_b); - size_t index_cap = sizeof(struct index) + pairs->len * sizeof(struct index_kv); - struct index *index = calloc(1, index_cap); + const size_t index_a_cap = sizeof(struct index) + pairs->len * sizeof(struct index_kv); + const size_t index_b_cap = sizeof(struct index) + inverted->len * sizeof(struct index_kv); - size_t len = 0; + struct index *index_a = calloc(1, index_a_cap); + struct index *index_b = calloc(1, index_b_cap); + + size_t len = 0, len_a = 0, len_b = 0; { - struct indexer *indexer = indexer_alloc(pairs->len); - struct encoder coder = make_encoder(buffer, buffer + cap, vals, indexer); + struct indexer *indexer_a = indexer_alloc(pairs->len); + struct indexer *indexer_b = indexer_alloc(inverted->len); + + struct encoder coder_a = + make_encoder(buffer, buffer + cap, vals_a, indexer_a); + for (size_t i = 0; i < pairs->len; ++i) - assert(coder_encode(&coder, &pairs->data[i])); - assert(coder_finish(&coder)); + assert(coder_encode(&coder_a, &pairs->data[i])); + assert(coder_finish(&coder_a)); + + len_a = len = coder_a.it - buffer; + assert(len <= cap); + + struct encoder coder_b = + make_encoder(buffer + len_a, buffer + cap, vals_b, indexer_b); + for (size_t i = 0; i < inverted->len; ++i) + assert(coder_encode(&coder_b, &inverted->data[i])); + assert(coder_finish(&coder_b)); - len = coder.it - buffer; + len_b = len = coder_b.it - buffer; assert(len <= cap); - indexer_write(indexer, index, index_cap); - indexer_free(indexer); + indexer_write(indexer_a, index_a, index_a_cap); + indexer_write(indexer_b, index_b, index_b_cap); + indexer_free(indexer_a); + indexer_free(indexer_b); + + coder_close(&coder_a); + coder_close(&coder_b); } if (false) { // hex dump for debuging + printf("offset b: %zu\n", len_a); printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); for (size_t i = 0; i < cap;) { printf("%6p: ", (void *) i); @@ -175,8 +228,11 @@ void check_coder(struct rill_pairs *pairs) } } - { - struct decoder coder = make_decoder(buffer, buffer + len, vals, index); + { /* Coder A */ + struct decoder coder = + make_decoder_at(buffer, + buffer + len_a, + index_b, index_a, 0); struct rill_kv kv = {0}; for (size_t i = 0; i < pairs->len; ++i) { @@ -188,23 +244,68 @@ void check_coder(struct rill_pairs *pairs) assert(rill_kv_nil(&kv)); } - for (size_t i = 0; i < pairs->len; ++i) { - size_t key_idx; uint64_t off; - assert(index_find(index, pairs->data[i].key, &key_idx, &off)); - struct decoder coder = make_decoder_at( - buffer + off, buffer + len, vals, index, key_idx); + { /* Coder B */ + struct decoder coder = + make_decoder_at(buffer + len_a, + buffer + len_b, + index_a, index_b, 0); struct rill_kv kv = {0}; - do { + for (size_t i = 0; i < pairs->len; ++i) { assert(coder_decode(&coder, &kv)); - assert(kv.key == pairs->data[i].key); - } while (kv.val != pairs->data[i].val); + assert(rill_kv_cmp(&kv, &inverted->data[i]) == 0); + } + + assert(coder_decode(&coder, &kv)); + assert(rill_kv_nil(&kv)); + } + + { /* Decode A */ + for (size_t i = 0; i < pairs->len; ++i) { + size_t key_idx = 0; + uint64_t off = 0; + + assert(index_find(index_a, pairs->data[i].key, &key_idx, &off)); + + struct decoder coder = make_decoder_at( + buffer + off, buffer + len, index_b, index_a, key_idx); + + struct rill_kv kv = {0}; + do { + assert(coder_decode(&coder, &kv)); + assert(kv.key == pairs->data[i].key); + } while (kv.val != pairs->data[i].val); + } + } + + { /* Decode B */ + for (size_t i = 0; i < inverted->len; ++i) { + size_t key_idx = 0; + uint64_t off = 0; + + assert(index_find(index_b, inverted->data[i].key, &key_idx, &off)); + + struct decoder coder = make_decoder_at( + buffer + len_a + off, buffer + len_b, + index_a, index_b, + key_idx); + + struct rill_kv kv = {0}; + do { + assert(coder_decode(&coder, &kv)); + assert(kv.key && kv.val); + assert(kv.key == inverted->data[i].key); + } while (kv.val != inverted->data[i].val); + } } free(buffer); - free(index); - free(vals); + free(index_a); + free(index_b); + free(vals_a); + free(vals_b); free(pairs); + free(inverted); } diff --git a/test/query_test.c b/test/query_test.c new file mode 100644 index 0000000..760c146 --- /dev/null +++ b/test/query_test.c @@ -0,0 +1,69 @@ +#include "test.h" + +bool test_sequence() +{ + const char* name = "test.query.sequence.rill"; + unlink(name); + + const size_t max_keys = 1000; + const size_t max_values = 100; + struct rill_pairs* pairs = rill_pairs_new(1024); + + for (size_t i = 1; i < max_keys + 1; ++i) + for (size_t j = 1; j < max_values + 1; ++j) + pairs = rill_pairs_push(pairs, i, j); + + rill_store_write(name, 666, 666, pairs); + rill_pairs_free(pairs); + + struct rill_store* store = rill_store_open(name); + + { + struct rill_pairs* result = rill_pairs_new(256); + + for (size_t i = 1; i < max_keys + 1; ++i) { + result = rill_store_query_key(store, i, result); + rill_pairs_compact(result); + + assert(result->len == max_values); + for (size_t x = 0; x < max_values; ++x) + assert(result->data[x].key == i && + result->data[x].val == x + 1); + + rill_pairs_clear(result); + } + + rill_pairs_free(result); + } + + { + struct rill_pairs* result = rill_pairs_new(256); + + for (size_t i = 1; i < max_values + 1; ++i) { + result = rill_store_query_value(store, i, result); + rill_pairs_compact(result); + + assert(result->len == max_keys); + for (size_t x = 0; x < max_keys; ++x) + assert(result->data[x].key == i && + result->data[x].val == x + 1); + + rill_pairs_clear(result); + } + + rill_pairs_free(result); + } + + rill_store_close(store); + + unlink(name); + + unlink(name); + return true; +} + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + return test_sequence() ? 0 : 1; +} diff --git a/test/rill_generate.c b/test/rill_generate.c new file mode 100644 index 0000000..f04ea70 --- /dev/null +++ b/test/rill_generate.c @@ -0,0 +1,64 @@ +#include "test.h" + +static struct rill_store *make_store( + const char *name, struct rill_pairs *pairs) +{ + unlink(name); + assert(rill_store_write(name, 0, 0, pairs)); + return 0; +} + +bool generate() +{ + struct rng rng = rng_make(0); + struct rill_pairs *pairs = make_rng_pairs(&rng); + make_store("test.store.generated", pairs); + rill_pairs_free(pairs); + return true; +} + +bool generate_simple() +{ + const size_t len = 20; + struct rill_pairs *pairs = rill_pairs_new(len); + + for (size_t i = 0; i < len; ++i) { + rill_pairs_push(pairs, i + 1, 3 + i * 20); + } + + make_store("test.store.simple", pairs); + rill_pairs_free(pairs); + + return true; +} + +bool generate_with_multiple_values() +{ + const size_t len = 100; + struct rill_pairs *pairs = rill_pairs_new(len); + + for (size_t i = 0; i < 20; ++i) + for (size_t j = 1; j < 5; ++j) + rill_pairs_push(pairs, i + 1, j * 100); + + make_store("test.store.multvals", pairs); + rill_pairs_free(pairs); + + return true; +} + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +int main(int argc, char **argv) +{ + (void) argc, (void) argv; + + (void) generate(); + (void) generate_simple(); + (void) generate_with_multiple_values(); + + printf("generated some rill database(s)\n"); + return 0; +} diff --git a/test/store_test.c b/test/store_test.c index da234d9..654a65d 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -58,25 +58,36 @@ static struct list *make_rng_list(struct rng *rng, uint64_t max) // query_key // ----------------------------------------------------------------------------- +static struct rill_pairs* duplicate_pairs(struct rill_pairs* pairs) +{ + struct rill_pairs* copy = rill_pairs_new(pairs->len); + for (size_t i = 0; i < pairs->len; ++i) + rill_pairs_push(copy, pairs->data[i].key, pairs->data[i].val); + return copy; +} + static void check_query_key(struct rill_pairs *pairs) { + struct rill_pairs *expected = duplicate_pairs(pairs); struct rill_store *store = make_store("test.store.query_key", pairs); - struct rill_pairs *result = rill_pairs_new(128); + rill_pairs_compact(pairs); + rill_pairs_compact(expected); - for (size_t i = 0; i < pairs->len;) { + for (size_t i = 0; i < expected->len;) { rill_pairs_clear(result); - result = rill_store_query_key(store, pairs->data[i].key, result); + result = rill_store_query_key(store, expected->data[i].key, result); - assert(pairs->len - i >= result->len); + assert(expected->len - i >= result->len); for (size_t j = 0; j < result->len; ++j, ++i) - assert(!rill_kv_cmp(&pairs->data[i], &result->data[j])); + assert(!rill_kv_cmp(&expected->data[i], &result->data[j])); } free(result); - free(store); - free(pairs); + rill_store_close(store); + rill_pairs_free(pairs); + rill_pairs_free(expected); } bool test_query_key(void) @@ -104,7 +115,8 @@ static void check_scan_keys( struct rill_pairs *result = rill_pairs_new(128); rill_pairs_compact(pairs); - result = rill_store_scan_keys(store, keys->data, keys->len, result); + for (size_t i = 0; i < keys->len; ++i) + result = rill_store_query_key(store, keys->data[i], result); struct rill_pairs *exp = rill_pairs_new(128); for (size_t i = 0; i < pairs->len; ++i) { @@ -129,46 +141,52 @@ bool test_scan_keys(void) { struct rill_pairs *pairs = make_pair(kv(2, 10)); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); - check_scan_keys(store, pairs, make_list(1)); - check_scan_keys(store, pairs, make_list(2)); - check_scan_keys(store, pairs, make_list(3)); - check_scan_keys(store, pairs, make_list(1, 2)); - check_scan_keys(store, pairs, make_list(2, 3)); - check_scan_keys(store, pairs, make_list(1, 3)); + check_scan_keys(store, copy, make_list(1)); + check_scan_keys(store, copy, make_list(2)); + check_scan_keys(store, copy, make_list(3)); + check_scan_keys(store, copy, make_list(1, 2)); + check_scan_keys(store, copy, make_list(2, 3)); + check_scan_keys(store, copy, make_list(1, 3)); - free(store); + rill_store_close(store); + free(copy); free(pairs); } { struct rill_pairs *pairs = make_pair(kv(2, 10), kv(3, 10), kv(3, 20), kv(4, 30)); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); - check_scan_keys(store, pairs, make_list(1)); - check_scan_keys(store, pairs, make_list(3)); - check_scan_keys(store, pairs, make_list(5)); - check_scan_keys(store, pairs, make_list(1, 3)); - check_scan_keys(store, pairs, make_list(3, 5)); - check_scan_keys(store, pairs, make_list(2, 3)); - check_scan_keys(store, pairs, make_list(2, 3, 4)); + check_scan_keys(store, copy, make_list(1)); + check_scan_keys(store, copy, make_list(3)); + check_scan_keys(store, copy, make_list(5)); + check_scan_keys(store, copy, make_list(1, 3)); + check_scan_keys(store, copy, make_list(3, 5)); + check_scan_keys(store, copy, make_list(2, 3)); + check_scan_keys(store, copy, make_list(2, 3, 4)); - free(store); + rill_store_close(store); + free(copy); free(pairs); } { struct rng rng = rng_make(0); struct rill_pairs *pairs = make_rng_pairs(&rng); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); for (size_t iterations = 0; iterations < 10; ++iterations) - check_scan_keys(store, pairs, make_rng_list(&rng, rng_range_key)); + check_scan_keys(store, copy, make_rng_list(&rng, rng_range_key)); - free(store); + rill_store_close(store); free(pairs); + free(copy); } return true; @@ -185,16 +203,20 @@ static void check_scan_vals( struct rill_pairs *result = rill_pairs_new(128); rill_pairs_compact(pairs); - result = rill_store_scan_vals(store, vals->data, vals->len, result); + for (size_t i = 0; i < vals->len; ++i) + result = rill_store_query_value(store, vals->data[i], result); struct rill_pairs *exp = rill_pairs_new(128); for (size_t i = 0; i < pairs->len; ++i) { for (size_t j = 0; j < vals->len; ++j) { struct rill_kv *kv = &pairs->data[i]; - if (kv->val == vals->data[j]) exp = rill_pairs_push(exp, kv->key, kv->val); + if (kv->val == vals->data[j]) + exp = rill_pairs_push(exp, kv->val, kv->key); } } + rill_pairs_compact(exp); + assert(exp->len == result->len); for (size_t i = 0; i < exp->len; ++i) assert(!rill_kv_cmp(&exp->data[i], &result->data[i])); @@ -210,47 +232,52 @@ bool test_scan_vals(void) { struct rill_pairs *pairs = make_pair(kv(2, 20)); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); - check_scan_vals(store, pairs, make_list(10)); - check_scan_vals(store, pairs, make_list(20)); - check_scan_vals(store, pairs, make_list(30)); - check_scan_vals(store, pairs, make_list(10, 20)); - check_scan_vals(store, pairs, make_list(20, 30)); - check_scan_vals(store, pairs, make_list(10, 30)); + check_scan_vals(store, copy, make_list(10)); + check_scan_vals(store, copy, make_list(20)); + check_scan_vals(store, copy, make_list(30)); + check_scan_vals(store, copy, make_list(10, 20)); + check_scan_vals(store, copy, make_list(20, 30)); + check_scan_vals(store, copy, make_list(10, 30)); - free(store); + rill_store_close(store); free(pairs); + free(copy); } - { struct rill_pairs *pairs = make_pair(kv(2, 20), kv(3, 20), kv(3, 30), kv(4, 40)); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); - check_scan_vals(store, pairs, make_list(10)); - check_scan_vals(store, pairs, make_list(20)); - check_scan_vals(store, pairs, make_list(30)); - check_scan_vals(store, pairs, make_list(50)); - check_scan_vals(store, pairs, make_list(10, 20)); - check_scan_vals(store, pairs, make_list(20, 40)); - check_scan_vals(store, pairs, make_list(20, 50)); - check_scan_vals(store, pairs, make_list(20, 30, 40)); + check_scan_vals(store, copy, make_list(10)); + check_scan_vals(store, copy, make_list(20)); + check_scan_vals(store, copy, make_list(30)); + check_scan_vals(store, copy, make_list(50)); + check_scan_vals(store, copy, make_list(10, 20)); + check_scan_vals(store, copy, make_list(20, 40)); + check_scan_vals(store, copy, make_list(20, 50)); + check_scan_vals(store, copy, make_list(20, 30, 40)); - free(store); + rill_store_close(store); free(pairs); + free(copy); } { struct rng rng = rng_make(0); struct rill_pairs *pairs = make_rng_pairs(&rng); + struct rill_pairs *copy = duplicate_pairs(pairs); struct rill_store *store = make_store(name, pairs); for (size_t iterations = 0; iterations < 10; ++iterations) - check_scan_vals(store, pairs, make_rng_list(&rng, rng_range_val)); + check_scan_vals(store, copy, make_rng_list(&rng, rng_range_val)); - free(store); + rill_store_close(store); free(pairs); + free(copy); } return true; diff --git a/test/test.h b/test/test.h index fd82104..af02df0 100644 --- a/test/test.h +++ b/test/test.h @@ -57,6 +57,8 @@ struct rill_pairs *make_rng_pairs(struct rng *rng) assert(pairs); } + rill_pairs_compact(pairs); + return pairs; } From bbeba0b370ef11d8fdc98292b506add3a66813b0 Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Wed, 18 Apr 2018 17:11:42 -0400 Subject: [PATCH 63/91] Readd rill query all --- src/query.c | 34 ++++++++++++++++++++++++++++++++++ src/rill.h | 4 ++++ 2 files changed, 38 insertions(+) diff --git a/src/query.c b/src/query.c index 6d60230..9bcc91e 100644 --- a/src/query.c +++ b/src/query.c @@ -136,3 +136,37 @@ struct rill_pairs *rill_query_vals( // \todo potentially leaking result return NULL; } + +struct rill_pairs *rill_query_all( + const struct rill_query *query, enum rill_col col) +{ + struct rill_pairs *result = rill_pairs_new(1); + for (size_t i = 0; i < query->len; ++i) { + size_t pairs = rill_store_pairs(query->list[i]); + result = rill_pairs_reserve(result, result->len + pairs); + if (!result) goto fail_scan; + + struct rill_store_it *it = rill_store_begin(query->list[i], col); + if (!it) goto fail_scan; + + struct rill_kv kv; + while (true) { + if (!rill_store_it_next(it, &kv)) { + rill_store_it_free(it); + goto fail_scan; + } + if (rill_kv_nil(&kv)) break; + + result = rill_pairs_push(result, kv.key, kv.val); + } + rill_store_it_free(it); + } + + rill_pairs_compact(result); + return result; + + fail_scan: + free(result); + return NULL; + +} diff --git a/src/rill.h b/src/rill.h index 8bd9e3e..9ec3e56 100644 --- a/src/rill.h +++ b/src/rill.h @@ -197,6 +197,10 @@ struct rill_pairs *rill_query_vals( const rill_val_t *vals, size_t len, struct rill_pairs *out); +struct rill_pairs *rill_query_all( + const struct rill_query *query, enum rill_col col); + + // ----------------------------------------------------------------------------- // misc // ----------------------------------------------------------------------------- From a06e28ee3c6d056ed608b1f4c320b46a8bc324ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 27 Apr 2018 12:10:10 -0400 Subject: [PATCH 64/91] Remove the indexer Now that we have the index for both columns we can just use the region to build the index. Simplifies the code and avoids a bunch of unecessary work. --- src/coder.c | 8 ++-- src/index.c | 53 ++------------------------ src/store.c | 81 +++++++++------------------------------ test/coder_test.c | 93 +++++++++++++++++++++++++-------------------- test/indexer_test.c | 68 ++++++++++++++++++--------------- 5 files changed, 116 insertions(+), 187 deletions(-) diff --git a/src/coder.c b/src/coder.c index 94781ac..49f4d19 100644 --- a/src/coder.c +++ b/src/coder.c @@ -59,7 +59,7 @@ struct encoder rill_key_t key; vals_rev_t rev; - struct indexer *indexer; + struct index *index; size_t pairs; }; @@ -119,7 +119,7 @@ static bool coder_encode(struct encoder *coder, const struct rill_kv *kv) if (!coder_write_sep(coder)) return false; } - indexer_put(coder->indexer, kv->key, coder_off(coder)); + index_put(coder->index, kv->key, coder_off(coder)); coder->key = kv->key; coder->keys++; } @@ -146,11 +146,11 @@ static struct encoder make_encoder( uint8_t *start, uint8_t *end, struct vals *vals, - struct indexer *indexer) + struct index *index) { struct encoder coder = { .it = start, .start = start, .end = end, - .indexer = indexer, + .index = index, }; vals_rev_make(vals, &coder.rev); diff --git a/src/index.c b/src/index.c index b3b5a92..ebe510e 100644 --- a/src/index.c +++ b/src/index.c @@ -21,60 +21,15 @@ struct rill_packed index struct index_kv data[]; }; -// ----------------------------------------------------------------------------- -// indexer -// ----------------------------------------------------------------------------- - -struct indexer -{ - size_t len, cap; - struct index_kv kvs[]; -}; - -static size_t indexer_cap(size_t pairs) +static size_t index_cap(size_t pairs) { return sizeof(struct index) + pairs * sizeof(struct index_kv); } - -static struct indexer *indexer_alloc(size_t cap) -{ - assert(cap); - - struct indexer *indexer = calloc(1, sizeof(*indexer) + cap * sizeof(indexer->kvs[0])); - if (!indexer) { - rill_fail("unable to allocate indexer: %lu", cap); - return NULL; - } - - indexer->cap = cap; - return indexer; -} - -static void indexer_free(struct indexer *indexer) -{ - free(indexer); -} - -static void indexer_put(struct indexer *indexer, rill_key_t key, uint64_t off) +static void index_put(struct index *index, rill_key_t key, uint64_t off) { - indexer->kvs[indexer->len] = (struct index_kv) { .key = key, .off = off }; - indexer->len++; - - assert(indexer->len <= indexer->cap); -} - -static size_t indexer_write( - struct indexer *indexer, struct index *index, - size_t cap) -{ - index->len = indexer->len; - size_t len = indexer->len * sizeof(indexer->kvs[0]); - - assert(len <= cap); - - memcpy(index->data, indexer->kvs, len); - return sizeof(*index) + len; + index->data[index->len] = (struct index_kv) { .key = key, .off = off }; + index->len++; } // RIP fancy pants interpolation search :( diff --git a/src/store.c b/src/store.c index d84f07c..b279cc5 100644 --- a/src/store.c +++ b/src/store.c @@ -93,7 +93,7 @@ struct rill_space static struct encoder store_encoder( struct rill_store *store, - struct indexer *indexer, + struct index *index, struct vals* vals, uint64_t offset) { @@ -101,7 +101,7 @@ static struct encoder store_encoder( store->vma + offset, store->vma + store->vma_len, vals, - indexer); + index); } static struct decoder store_decoder_at( @@ -299,8 +299,8 @@ static bool writer_open( size_t len = sizeof(struct header) + - indexer_cap(inverted_vals->len) + - indexer_cap(vals->len) + + index_cap(inverted_vals->len) + + index_cap(vals->len) + coder_cap(vals->len, pairs) + coder_cap(inverted_vals->len, pairs); @@ -336,18 +336,6 @@ static bool writer_open( return false; } -static void writer_flush_indices( - struct rill_store *store, - struct indexer *indexer_a, - struct indexer *indexer_b) -{ - const size_t indexer_a_size = indexer_cap(indexer_a->len); - const size_t indexer_b_size = indexer_cap(indexer_b->len); - - indexer_write(indexer_a, store->index_a, indexer_a_size); - indexer_write(indexer_b, store->index_b, indexer_b_size); -} - static void writer_close( struct rill_store *store, size_t len) { @@ -379,8 +367,8 @@ static void init_store_offsets( struct rill_store* store, size_t vals, size_t inverse_vals) { store->head->index_a_off = sizeof(struct header); - store->head->index_b_off = store->head->index_a_off + indexer_cap(inverse_vals); - store->head->data_a_off = store->head->index_b_off + indexer_cap(vals); + store->head->index_b_off = store->head->index_a_off + index_cap(inverse_vals); + store->head->data_a_off = store->head->index_b_off + index_cap(vals); store->index_a = (void *) ((uintptr_t) store->vma + store->head->index_a_off); store->index_b = (void *) ((uintptr_t) store->vma + store->head->index_b_off); @@ -415,15 +403,10 @@ bool rill_store_write( goto fail_open; } - struct indexer *indexer_a = indexer_alloc(invert_vals->len); - if (!indexer_a) goto fail_indexer_a_alloc; - struct indexer *indexer_b = indexer_alloc(vals->len); - if (!indexer_b) goto fail_indexer_b_alloc; - init_store_offsets(&store, vals->len, invert_vals->len); struct encoder coder_a = - store_encoder(&store, indexer_a, vals, store.head->data_a_off); + store_encoder(&store, store.index_a, vals, store.head->data_a_off); for (size_t i = 0; i < pairs->len; ++i) { if (!coder_encode(&coder_a, &pairs->data[i])) goto fail_encode_a; @@ -433,7 +416,7 @@ bool rill_store_write( prepare_col_b_offsets(&store, &coder_a); struct encoder coder_b = - store_encoder(&store, indexer_b, invert_vals, store.head->data_b_off); + store_encoder(&store, store.index_b, invert_vals, store.head->data_b_off); rill_pairs_invert(pairs); rill_pairs_compact(pairs); /* recompact mainly for sort */ @@ -443,8 +426,6 @@ bool rill_store_write( } if (!coder_finish(&coder_b)) goto fail_encode_b; - writer_flush_indices(&store, indexer_a, indexer_b); - store.head->pairs = coder_a.pairs; writer_close(&store, store.head->data_b_off + coder_off(&coder_b)); @@ -452,9 +433,6 @@ bool rill_store_write( coder_close(&coder_a); coder_close(&coder_b); - indexer_free(indexer_a); - indexer_free(indexer_b); - free(vals); free(invert_vals); @@ -464,21 +442,16 @@ bool rill_store_write( coder_close(&coder_b); fail_encode_a: coder_close(&coder_a); - indexer_free(indexer_b); - fail_indexer_b_alloc: - indexer_free(indexer_a); - fail_indexer_a_alloc: writer_close(&store, 0); + fail_open: free(invert_vals); fail_invert_vals: - fail_open: free(vals); fail_vals: return false; } -static struct vals *vals_merge_from_index( - struct vals *vals, struct index *merge) +static struct vals *vals_merge_from_index(struct vals *vals, struct index *merge) { assert(merge); @@ -609,26 +582,18 @@ bool rill_store_merge( init_store_offsets(&store, vals->len, invert_vals->len); - struct indexer *indexer_a = indexer_alloc(invert_vals->len); - if (!indexer_a) goto fail_index_a; - - struct indexer *indexer_b = indexer_alloc(vals->len); - if (!indexer_b) goto fail_index_b; - - struct encoder encoder_a = store_encoder(&store, indexer_a, vals, store.head->data_a_off); - if (!merge_with_config(&encoder_a, list, list_len, rill_col_a)) goto fail_merge_with_config_a; + struct encoder encoder_a = + store_encoder(&store, store.index_a, vals, store.head->data_a_off); + if (!merge_with_config(&encoder_a, list, list_len, rill_col_a)) goto fail_coder_a; if (!coder_finish(&encoder_a)) goto fail_coder_a; prepare_col_b_offsets(&store, &encoder_a); struct encoder encoder_b = - store_encoder(&store, indexer_b, invert_vals, store.head->data_b_off); - - if (!merge_with_config(&encoder_b, list, list_len, rill_col_b)) goto fail_merge_with_config_b; + store_encoder(&store, store.index_b, invert_vals, store.head->data_b_off); + if (!merge_with_config(&encoder_b, list, list_len, rill_col_b)) goto fail_coder_b; if (!coder_finish(&encoder_b)) goto fail_coder_b; - writer_flush_indices(&store, indexer_a, indexer_b); - store.head->pairs = encoder_a.pairs; writer_close(&store, store.head->data_b_off + coder_off(&encoder_b)); @@ -638,28 +603,20 @@ bool rill_store_merge( coder_close(&encoder_a); coder_close(&encoder_b); - indexer_free(indexer_a); - indexer_free(indexer_b); free(vals); free(invert_vals); return true; - fail_coder_b: coder_close(&encoder_b); - fail_coder_a: - fail_merge_with_config_a: + fail_coder_b: coder_close(&encoder_a); + fail_coder_a: writer_close(&store, 0); - indexer_free(indexer_a); - fail_merge_with_config_b: - fail_index_b: - free(indexer_a); - fail_index_a: fail_open: - fail_invert_vals: free(invert_vals); - fail_vals: + fail_invert_vals: free(vals); + fail_vals: return false; } diff --git a/test/coder_test.c b/test/coder_test.c index f3a0b14..ba921d0 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -8,6 +8,16 @@ #include "store.c" +// ----------------------------------------------------------------------------- +// utils +// ----------------------------------------------------------------------------- + +static struct index *index_alloc(size_t pairs) +{ + return calloc(1, index_cap(pairs)); +} + + // ----------------------------------------------------------------------------- // leb128 // ----------------------------------------------------------------------------- @@ -68,10 +78,10 @@ bool test_leb128(void) ({ \ rill_val_t kvs[] = { __VA_ARGS__ }; \ size_t len = sizeof(kvs) / sizeof(kvs[0]); \ - struct indexer* indexer = indexer_alloc(len); \ + struct index *index = index_alloc(len); \ for (size_t i = 0; i < len; ++i) \ - indexer_put(indexer, kvs[i], 1); \ - make_index_impl(indexer, len); \ + index_put(index, kvs[i], 1); \ + index; \ }) static struct vals *make_vals_impl(rill_val_t *list, size_t len) @@ -85,15 +95,6 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) return vals; } -static struct index* make_index_impl(struct indexer* indexer, size_t len) -{ - const size_t cap = indexer_cap(len); - struct index* index = calloc(1, cap); - indexer_write(indexer, index, cap); - indexer_free(indexer); - return index; -} - static void check_vals(struct rill_pairs *pairs, struct vals *exp) { struct vals *vals = vals_cols_from_pairs(pairs, rill_col_b); @@ -168,56 +169,46 @@ void check_coder(struct rill_pairs *pairs) rill_pairs_push(inverted, pairs->data[i].val, pairs->data[i].key); rill_pairs_compact(inverted); - size_t cap = - (pairs->len + 1) * (sizeof(pairs->data[0]) + 3) + - (inverted->len + 1) * (sizeof(inverted->data[0]) + 3); - - uint8_t *buffer = calloc(1, cap); struct vals *vals_a = vals_cols_from_pairs(pairs, rill_col_b); struct vals *vals_b = vals_cols_from_pairs(inverted, rill_col_b); - const size_t index_a_cap = sizeof(struct index) + pairs->len * sizeof(struct index_kv); - const size_t index_b_cap = sizeof(struct index) + inverted->len * sizeof(struct index_kv); + const size_t pairs_a_cap = coder_cap(vals_a->len, pairs->len); + const size_t pairs_b_cap = coder_cap(vals_b->len, inverted->len); - struct index *index_a = calloc(1, index_a_cap); - struct index *index_b = calloc(1, index_b_cap); + size_t cap = pairs_a_cap + pairs_b_cap; + uint8_t *buffer = calloc(1, cap); + struct index *index_a = index_alloc(vals_b->len); + struct index *index_b = index_alloc(vals_a->len); size_t len = 0, len_a = 0, len_b = 0; { - struct indexer *indexer_a = indexer_alloc(pairs->len); - struct indexer *indexer_b = indexer_alloc(inverted->len); - struct encoder coder_a = - make_encoder(buffer, buffer + cap, vals_a, indexer_a); + make_encoder(buffer, buffer + cap, vals_a, index_a); for (size_t i = 0; i < pairs->len; ++i) assert(coder_encode(&coder_a, &pairs->data[i])); assert(coder_finish(&coder_a)); len_a = len = coder_a.it - buffer; - assert(len <= cap); + assert(len <= pairs_a_cap); struct encoder coder_b = - make_encoder(buffer + len_a, buffer + cap, vals_b, indexer_b); + make_encoder(buffer + len_a, buffer + cap, vals_b, index_b); for (size_t i = 0; i < inverted->len; ++i) assert(coder_encode(&coder_b, &inverted->data[i])); assert(coder_finish(&coder_b)); - len_b = len = coder_b.it - buffer; - assert(len <= cap); - - indexer_write(indexer_a, index_a, index_a_cap); - indexer_write(indexer_b, index_b, index_b_cap); - indexer_free(indexer_a); - indexer_free(indexer_b); + len_b = coder_b.it - coder_a.it; + assert(len_b <= pairs_b_cap); + len = coder_b.it - buffer; coder_close(&coder_a); coder_close(&coder_b); } if (false) { // hex dump for debuging - printf("offset b: %zu\n", len_a); - printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); + rill_pairs_print(pairs); + printf("buffer: start=%p, len=%lu(%lu, %lu)\n", (void *) buffer, len, len_a, len_b); for (size_t i = 0; i < cap;) { printf("%6p: ", (void *) i); for (size_t j = 0; j < 16 && i < cap; ++i, ++j) { @@ -226,12 +217,27 @@ void check_coder(struct rill_pairs *pairs) } printf("\n"); } + + printf("index_a: [ "); + for (size_t i = 0; i < index_a->len; ++i) { + struct index_kv *kv = &index_a->data[i]; + printf("{%p, %p} ", (void *) kv->key, (void *) kv->off); + } + printf("]\n"); + + printf("index_b: [ "); + for (size_t i = 0; i < index_b->len; ++i) { + struct index_kv *kv = &index_b->data[i]; + printf("{%p, %p} ", (void *) kv->key, (void *) kv->off); + } + printf("]\n"); } { /* Coder A */ + uint8_t *start = buffer; struct decoder coder = - make_decoder_at(buffer, - buffer + len_a, + make_decoder_at(start, + start + len_a, index_b, index_a, 0); struct rill_kv kv = {0}; @@ -245,9 +251,10 @@ void check_coder(struct rill_pairs *pairs) } { /* Coder B */ + uint8_t *start = buffer + len_a; struct decoder coder = - make_decoder_at(buffer + len_a, - buffer + len_b, + make_decoder_at(start, + start + len_b, index_a, index_b, 0); struct rill_kv kv = {0}; @@ -267,8 +274,9 @@ void check_coder(struct rill_pairs *pairs) assert(index_find(index_a, pairs->data[i].key, &key_idx, &off)); + uint8_t *start = buffer; struct decoder coder = make_decoder_at( - buffer + off, buffer + len, index_b, index_a, key_idx); + start + off, start + len_a, index_b, index_a, key_idx); struct rill_kv kv = {0}; do { @@ -285,8 +293,9 @@ void check_coder(struct rill_pairs *pairs) assert(index_find(index_b, inverted->data[i].key, &key_idx, &off)); + uint8_t *start = buffer + len_a; struct decoder coder = make_decoder_at( - buffer + len_a + off, buffer + len_b, + start + off, start + len_b, index_a, index_b, key_idx); diff --git a/test/indexer_test.c b/test/indexer_test.c index edd106e..8185f81 100644 --- a/test/indexer_test.c +++ b/test/indexer_test.c @@ -2,49 +2,57 @@ #include "index.c" -enum { - CAP = 10 -}; +// ----------------------------------------------------------------------------- +// utils +// ----------------------------------------------------------------------------- -bool test_indexer_build(void) +static struct index *index_alloc(size_t pairs) { - struct indexer *indexer = indexer_alloc(CAP); - assert(indexer); - assert(indexer->len == 0); - assert(indexer->cap == CAP); - - rill_key_t data[CAP] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; - for (size_t i = 0; i < CAP; i++) - indexer_put(indexer, data[i], i); - assert(indexer->len == indexer->cap); - - const size_t size = indexer_cap(CAP); - struct index *index = calloc(1, size); + struct index *index = calloc(1, index_cap(pairs)); + assert(index); + assert(index->len == 0); + + return index; +} - size_t n_written = indexer_write(indexer, index, size); - assert(n_written == size); - indexer_free(indexer); +// ----------------------------------------------------------------------------- +// test_index_build +// ----------------------------------------------------------------------------- + +static bool test_index_build(void) +{ + enum { pairs = 10 }; + struct index *index = index_alloc(pairs); + + rill_key_t data[pairs] = {0}; + for (size_t i = 1; i < pairs; ++i) data[i] = data[i - 1] += 2; + + for (size_t i = 0; i < pairs; i++) + index_put(index, data[i], i); + + assert(index->len == pairs); for (size_t i = 0; i < index->len; i++) assert(index_get(index, i) == data[i]); + assert(index_get(index, index->len) == 0); free(index); - return true; } -static struct index *make_index(rill_key_t *data, size_t n) + +// ----------------------------------------------------------------------------- +// test_index_lookup +// ----------------------------------------------------------------------------- + +static struct index *make_index(rill_key_t *data, size_t n) { - struct indexer *indexer = indexer_alloc(n); + struct index *index = index_alloc(n); for (size_t i = 0; i < n; i++) - indexer_put(indexer, data[i], i); - - struct index *index = calloc(1, indexer_cap(n)); - indexer_write(indexer, index, indexer_cap(n)); - indexer_free(indexer); + index_put(index, data[i], i); return index; } @@ -74,7 +82,7 @@ static struct index *make_index(rill_key_t *data, size_t n) assert(!index_find(index, keys[i], &key_idx, &val)); \ } -bool test_indexer_lookup(void) +bool test_index_lookup(void) { struct index *index; @@ -103,8 +111,8 @@ int main(int argc, char **argv) (void) argc, (void) argv; bool ret = true; - ret = ret && test_indexer_build(); - ret = ret && test_indexer_lookup(); + ret = ret && test_index_build(); + ret = ret && test_index_lookup(); return ret ? 0 : 1; } From 04d45bd7ed0a866a025fc9be15fd81cffdbc5c32 Mon Sep 17 00:00:00 2001 From: Simon Symeonidis Date: Fri, 13 Apr 2018 11:02:32 -0400 Subject: [PATCH 65/91] Add rill_count written by @RAttab --- compile.sh | 1 + src/rill_count.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/rill_count.c diff --git a/compile.sh b/compile.sh index 1ad531f..2c615da 100755 --- a/compile.sh +++ b/compile.sh @@ -41,6 +41,7 @@ $CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS +$CC -o rill_count "${PREFIX}/src/rill_count.c" librill.a $CFLAGS $CC -o rill_generate "${PREFIX}/test/rill_generate.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer diff --git a/src/rill_count.c b/src/rill_count.c new file mode 100644 index 0000000..cad7393 --- /dev/null +++ b/src/rill_count.c @@ -0,0 +1,63 @@ +/* rill_count.c + Rémi Attab (remi.attab@gmail.com), 07 Sep 2017 + FreeBSD-style copyright and disclaimer apply +*/ + +#include "rill.h" +#include "utils.h" + +#include +#include +#include + +void usage() +{ + fprintf(stderr, "rill_count <-a|-b> \n"); + exit(1); +} + +void count(struct rill_store *store, enum rill_col col) +{ + struct rill_kv kv; + struct rill_store_it *it = rill_store_begin(store, col); + + rill_key_t key = 0; + size_t count = 0; + while (rill_store_it_next(it, &kv)) { + if (rill_kv_nil(&kv)) break; + + if (kv.key == key) count++; + else { + if (key) printf("%lu %p\n", count, (void *) key); + count = 1; + key = kv.key; + } + } + + rill_store_it_free(it); +} + +int main(int argc, char **argv) +{ + if (argc != 3) usage(); + + int opt = 0; + bool col_a = false, col_b = false; + + while ((opt = getopt(argc, argv, "ab")) != -1) { + switch(opt) { + case 'a': col_a = true; break; + case 'b': col_b = true; break; + default: usage(); + } + } + + struct rill_store *store = rill_store_open(argv[optind]); + if (!store) rill_exit(1); + + if (col_a) count(store, rill_col_a); + if (col_b) count(store, rill_col_b); + + rill_store_close(store); + return 0; +} From 3580e4fd273990e8a85017e776ef2845947dfd9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 14 Jun 2018 12:00:45 +0200 Subject: [PATCH 66/91] Mass rename --- src/acc.c | 34 +++++------ src/coder.c | 36 ++++++------ src/index.c | 26 ++++----- src/pairs.c | 114 ++++++++++++++++++------------------- src/query.c | 44 +++++++-------- src/rill.h | 82 +++++++++++++-------------- src/rill_count.c | 12 ++-- src/rill_dump.c | 28 +++++----- src/rill_ingest.c | 18 +++--- src/rill_query.c | 20 +++---- src/store.c | 114 ++++++++++++++++++------------------- src/vals.c | 10 ++-- test/coder_test.c | 130 +++++++++++++++++++++---------------------- test/indexer_test.c | 24 ++++---- test/query_test.c | 24 ++++---- test/rill_generate.c | 26 ++++----- test/rotate_test.c | 18 +++--- test/store_test.c | 126 ++++++++++++++++++++--------------------- test/test.h | 30 +++++----- 19 files changed, 457 insertions(+), 459 deletions(-) diff --git a/src/acc.c b/src/acc.c index 0a05292..0a5f1f0 100644 --- a/src/acc.c +++ b/src/acc.c @@ -38,7 +38,7 @@ struct rill_packed header atomic_size_t write; }; -struct rill_packed kv +struct rill_packed row { uint64_t key, val; }; @@ -52,7 +52,7 @@ struct rill_acc size_t vma_len; struct header *head; - struct kv *data; + struct row *data; }; enum { min_cap = 32 }; @@ -178,16 +178,16 @@ void rill_acc_close(struct rill_acc *acc) free(acc); } -void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val) +void rill_acc_ingest(struct rill_acc *acc, rill_val_t key, rill_val_t val) { assert(key && val); size_t write = atomic_load_explicit(&acc->head->write, memory_order_relaxed); size_t index = write % acc->head->len; - struct kv *kv = &acc->data[index]; + struct row *row = &acc->data[index]; - kv->key = key; - kv->val = val; + row->key = key; + row->val = val; atomic_store_explicit(&acc->head->write, write + 1, memory_order_release); } @@ -205,32 +205,32 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) start = end - acc->head->len; } - struct rill_pairs *pairs = rill_pairs_new(end - start); - if (!pairs) { - rill_fail("unable to allocate pairs for len '%lu'", acc->head->len); - goto fail_pairs_alloc; + struct rill_rows *rows = rill_rows_new(end - start); + if (!rows) { + rill_fail("unable to allocate rows for len '%lu'", acc->head->len); + goto fail_rows_alloc; } for (size_t i = start; i < end; ++i) { size_t index = i % acc->head->len; - struct kv *kv = &acc->data[index]; + struct row *row = &acc->data[index]; - struct rill_pairs *ret = rill_pairs_push(pairs, kv->key, kv->val); - assert(ret == pairs); + struct rill_rows *ret = rill_rows_push(rows, row->key, row->val); + assert(ret == rows); } - if (!rill_store_write(file, now, 0, pairs)) { + if (!rill_store_write(file, now, 0, rows)) { rill_fail("unable to write acc file '%s'", file); goto fail_write; } atomic_store_explicit(&acc->head->read, end, memory_order_release); - rill_pairs_free(pairs); + rill_rows_free(rows); return true; fail_write: - rill_pairs_free(pairs); - fail_pairs_alloc: + rill_rows_free(rows); + fail_rows_alloc: return false; } diff --git a/src/coder.c b/src/coder.c index 49f4d19..f29404a 100644 --- a/src/coder.c +++ b/src/coder.c @@ -56,21 +56,21 @@ struct encoder uint8_t *it, *start, *end; size_t keys; - rill_key_t key; + rill_val_t key; vals_rev_t rev; struct index *index; - size_t pairs; + size_t rows; }; -static size_t coder_cap(size_t vals, size_t pairs) +static size_t coder_cap(size_t vals, size_t rows) { size_t bytes = 1; while (vals >= 1UL << (bytes * 7)) bytes++; return (bytes + 1) // + 1 -> end-of-values terminator - * (pairs + 1); // + 1 -> end-of-pairs terminator + * (rows + 1); // + 1 -> end-of-rows terminator } static uint64_t coder_off(struct encoder *coder) @@ -112,21 +112,21 @@ static inline bool coder_write_val(struct encoder *coder, rill_val_t val) return true; } -static bool coder_encode(struct encoder *coder, const struct rill_kv *kv) +static bool coder_encode(struct encoder *coder, const struct rill_row *row) { - if (coder->key != kv->key) { + if (coder->key != row->key) { if (rill_likely(coder->key)) { if (!coder_write_sep(coder)) return false; } - index_put(coder->index, kv->key, coder_off(coder)); - coder->key = kv->key; + index_put(coder->index, row->key, coder_off(coder)); + coder->key = row->key; coder->keys++; } - if (!coder_write_val(coder, kv->val)) return false; + if (!coder_write_val(coder, row->val)) return false; - coder->pairs++; + coder->rows++; return true; } @@ -167,7 +167,7 @@ struct decoder uint8_t *it, *end; size_t keys; - rill_key_t key; + rill_val_t key; struct index *lookup; struct index *index; @@ -187,21 +187,21 @@ static inline bool coder_read_val(struct decoder *coder, rill_val_t *val) return true; } -static bool coder_decode(struct decoder *coder, struct rill_kv *kv) +static bool coder_decode(struct decoder *coder, struct rill_row *row) { if (rill_likely(coder->key)) { - kv->key = coder->key; - if (!coder_read_val(coder, &kv->val)) return false; - if (kv->val) return true; + row->key = coder->key; + if (!coder_read_val(coder, &row->val)) return false; + if (row->val) return true; } coder->key = index_get(coder->index, coder->keys); coder->keys++; - kv->key = coder->key; - if (!kv->key) return true; // eof + row->key = coder->key; + if (!row->key) return true; // eof - return coder_read_val(coder, &kv->val); + return coder_read_val(coder, &row->val); } static struct decoder make_decoder_at( diff --git a/src/index.c b/src/index.c index ebe510e..93a83ee 100644 --- a/src/index.c +++ b/src/index.c @@ -8,9 +8,9 @@ // config // ----------------------------------------------------------------------------- -struct rill_packed index_kv +struct rill_packed index_row { - rill_key_t key; + rill_val_t key; uint64_t off; }; @@ -18,27 +18,27 @@ struct rill_packed index { uint64_t len; uint64_t __unused; // kept for backwards compatibility - struct index_kv data[]; + struct index_row data[]; }; -static size_t index_cap(size_t pairs) +static size_t index_cap(size_t rows) { - return sizeof(struct index) + pairs * sizeof(struct index_kv); + return sizeof(struct index) + rows * sizeof(struct index_row); } -static void index_put(struct index *index, rill_key_t key, uint64_t off) +static void index_put(struct index *index, rill_val_t key, uint64_t off) { - index->data[index->len] = (struct index_kv) { .key = key, .off = off }; + index->data[index->len] = (struct index_row) { .key = key, .off = off }; index->len++; } // RIP fancy pants interpolation search :( static bool index_find( - struct index *index, rill_key_t key, size_t *key_idx, uint64_t *off) + struct index *index, rill_val_t key, size_t *key_idx, uint64_t *off) { size_t idx = 0; size_t len = index->len; - struct index_kv *low = index->data; + struct index_row *low = index->data; while (len > 1) { size_t mid = len / 2; @@ -46,14 +46,14 @@ static bool index_find( else { low += mid; len -= mid; idx += mid;} } - struct index_kv *kv = &index->data[idx]; - if (kv->key != key) return false; + struct index_row *row = &index->data[idx]; + if (row->key != key) return false; *key_idx = idx; - *off = kv->off; + *off = row->off; return true; } -static rill_key_t index_get(struct index *index, size_t i) +static rill_val_t index_get(struct index *index, size_t i) { return i < index->len ? index->data[i].key : 0; } diff --git a/src/pairs.c b/src/pairs.c index c2d5275..0b11073 100644 --- a/src/pairs.c +++ b/src/pairs.c @@ -1,4 +1,4 @@ -/* pairs.c +/* rows.c Rémi Attab (remi.attab@gmail.com), 02 Sep 2017 FreeBSD-style copyright and disclaimer apply */ @@ -12,15 +12,15 @@ // ----------------------------------------------------------------------------- -// kv +// row // ----------------------------------------------------------------------------- -extern inline bool rill_kv_nil(const struct rill_kv *); -extern inline int rill_kv_cmp(const struct rill_kv *, const struct rill_kv *); +extern inline bool rill_row_nil(const struct rill_row *); +extern inline int rill_row_cmp(const struct rill_row *, const struct rill_row *); // ----------------------------------------------------------------------------- -// pairs +// rows // ----------------------------------------------------------------------------- static size_t adjust_cap(size_t cap, size_t len) @@ -29,110 +29,110 @@ static size_t adjust_cap(size_t cap, size_t len) return cap; } -struct rill_pairs *rill_pairs_new(size_t cap) +struct rill_rows *rill_rows_new(size_t cap) { cap = adjust_cap(1, cap); - struct rill_pairs *pairs = - calloc(1, sizeof(*pairs) + cap * sizeof(pairs->data[0])); - if (!pairs) { - rill_fail("unable to alloc pairs: cap=%lu", cap); + struct rill_rows *rows = + calloc(1, sizeof(*rows) + cap * sizeof(rows->data[0])); + if (!rows) { + rill_fail("unable to alloc rows: cap=%lu", cap); return NULL; } - pairs->cap = cap; - return pairs; + rows->cap = cap; + return rows; } -void rill_pairs_free(struct rill_pairs *pairs) +void rill_rows_free(struct rill_rows *rows) { - free(pairs); + free(rows); } -void rill_pairs_clear(struct rill_pairs *pairs) +void rill_rows_clear(struct rill_rows *rows) { - pairs->len = 0; + rows->len = 0; } -struct rill_pairs *rill_pairs_reserve(struct rill_pairs *pairs, size_t cap) +struct rill_rows *rill_rows_reserve(struct rill_rows *rows, size_t cap) { - if (rill_likely(cap <= pairs->cap)) return pairs; - cap = adjust_cap(pairs->cap, cap); + if (rill_likely(cap <= rows->cap)) return rows; + cap = adjust_cap(rows->cap, cap); - pairs = realloc(pairs, sizeof(*pairs) + cap * sizeof(pairs->data[0])); - if (!pairs) { - rill_fail("unable to realloc pairs: cap=%lu", cap); + rows = realloc(rows, sizeof(*rows) + cap * sizeof(rows->data[0])); + if (!rows) { + rill_fail("unable to realloc rows: cap=%lu", cap); return NULL; } - pairs->cap = cap; - return pairs; + rows->cap = cap; + return rows; } -struct rill_pairs *rill_pairs_push( - struct rill_pairs *pairs, rill_key_t key, rill_val_t val) +struct rill_rows *rill_rows_push( + struct rill_rows *rows, rill_val_t key, rill_val_t val) { - assert(key && val && pairs); + assert(key && val && rows); - pairs = rill_pairs_reserve(pairs, pairs->len + 1); - if (!pairs) return NULL; + rows = rill_rows_reserve(rows, rows->len + 1); + if (!rows) return NULL; - pairs->data[pairs->len] = (struct rill_kv) { .key = key, .val = val }; - pairs->len++; + rows->data[rows->len] = (struct rill_row) { .key = key, .val = val }; + rows->len++; - return pairs; + return rows; } -static int kv_cmp(const void *lhs, const void *rhs) +static int row_cmp(const void *lhs, const void *rhs) { - return rill_kv_cmp(lhs, rhs); + return rill_row_cmp(lhs, rhs); } -void rill_pairs_compact(struct rill_pairs *pairs) +void rill_rows_compact(struct rill_rows *rows) { - if (pairs->len <= 1) return; - qsort(pairs->data, pairs->len, sizeof(*pairs->data), &kv_cmp); + if (rows->len <= 1) return; + qsort(rows->data, rows->len, sizeof(*rows->data), &row_cmp); size_t j = 0; - for (size_t i = 1; i < pairs->len; ++i) { - if (!rill_kv_cmp(&pairs->data[i], &pairs->data[j])) continue; + for (size_t i = 1; i < rows->len; ++i) { + if (!rill_row_cmp(&rows->data[i], &rows->data[j])) continue; ++j; - if (j != i) pairs->data[j] = pairs->data[i]; + if (j != i) rows->data[j] = rows->data[i]; } - assert(j + 1 <= pairs->len); - pairs->len = j + 1; + assert(j + 1 <= rows->len); + rows->len = j + 1; } -void rill_pairs_print(const struct rill_pairs *pairs) +void rill_rows_print(const struct rill_rows *rows) { - const rill_key_t no_key = -1ULL; - rill_key_t key = no_key; + const rill_val_t no_key = -1ULL; + rill_val_t key = no_key; - printf("pairs(%p, %lu, %lu):\n", (void *) pairs, pairs->len, pairs->cap); + printf("rows(%p, %lu, %lu):\n", (void *) rows, rows->len, rows->cap); - for (size_t i = 0; i < pairs->len; ++i) { - const struct rill_kv *kv = &pairs->data[i]; + for (size_t i = 0; i < rows->len; ++i) { + const struct rill_row *row = &rows->data[i]; - if (kv->key == key) printf(", %lu", kv->val); + if (row->key == key) printf(", %lu", row->val); else { if (key != no_key) printf("]\n"); - printf(" %p: [ %lu", (void *) kv->key, kv->val); - key = kv->key; + printf(" %p: [ %lu", (void *) row->key, row->val); + key = row->key; } } - if (pairs->len) printf(" ]\n"); + if (rows->len) printf(" ]\n"); } -void rill_pairs_invert(struct rill_pairs* pairs) +void rill_rows_invert(struct rill_rows* rows) { - for (size_t i = 0; i < pairs->len; ++i) { - pairs->data[i] = (struct rill_kv) { - .key = pairs->data[i].val, - .val = pairs->data[i].key, + for (size_t i = 0; i < rows->len; ++i) { + rows->data[i] = (struct rill_row) { + .key = rows->data[i].val, + .val = rows->data[i].key, }; } } diff --git a/src/query.c b/src/query.c index 9bcc91e..9ccdd38 100644 --- a/src/query.c +++ b/src/query.c @@ -62,29 +62,29 @@ void rill_query_close(struct rill_query *query) free(query); } -struct rill_pairs *rill_query_key( - const struct rill_query *query, rill_key_t key, struct rill_pairs *out) +struct rill_rows *rill_query_key( + const struct rill_query *query, rill_val_t key, struct rill_rows *out) { if (!key) return out; - struct rill_pairs *result = out; + struct rill_rows *result = out; for (size_t i = 0; i < query->len; ++i) { result = rill_store_query_key(query->list[i], key, result); if (!result) return NULL; } - rill_pairs_compact(result); + rill_rows_compact(result); return result; } -struct rill_pairs *rill_query_keys( +struct rill_rows *rill_query_keys( const struct rill_query *query, - const rill_key_t *keys, size_t len, - struct rill_pairs *out) + const rill_val_t *keys, size_t len, + struct rill_rows *out) { if (!len) return out; - struct rill_pairs *result = out; + struct rill_rows *result = out; for (size_t i = 0; i < query->len; ++i) { for (size_t j = 0; i < len; ++j) { result = rill_store_query_key(query->list[i], keys[j], result); @@ -92,7 +92,7 @@ struct rill_pairs *rill_query_keys( } } - rill_pairs_compact(result); + rill_rows_compact(result); return result; } @@ -105,10 +105,10 @@ static int compare_rill_values(const void *v1, const void *v2) { return 0; } -struct rill_pairs *rill_query_vals( +struct rill_rows *rill_query_vals( const struct rill_query *query, const rill_val_t *vals, size_t len, - struct rill_pairs *out) + struct rill_rows *out) { if (!len) return out; @@ -118,7 +118,7 @@ struct rill_pairs *rill_query_vals( memcpy(sorted, vals, sizeof(vals[0]) * len); qsort(sorted, len, sizeof(vals[0]), compare_rill_values); - struct rill_pairs *result = out; + struct rill_rows *result = out; for (size_t i = 0; i < query->len; ++i) { for (size_t j = 0; j < len; ++j) { result = rill_store_query_value(query->list[i], sorted[j], result); @@ -126,7 +126,7 @@ struct rill_pairs *rill_query_vals( } } - rill_pairs_compact(result); + rill_rows_compact(result); free(sorted); return result; @@ -137,32 +137,32 @@ struct rill_pairs *rill_query_vals( return NULL; } -struct rill_pairs *rill_query_all( +struct rill_rows *rill_query_all( const struct rill_query *query, enum rill_col col) { - struct rill_pairs *result = rill_pairs_new(1); + struct rill_rows *result = rill_rows_new(1); for (size_t i = 0; i < query->len; ++i) { - size_t pairs = rill_store_pairs(query->list[i]); - result = rill_pairs_reserve(result, result->len + pairs); + size_t rows = rill_store_rows(query->list[i]); + result = rill_rows_reserve(result, result->len + rows); if (!result) goto fail_scan; struct rill_store_it *it = rill_store_begin(query->list[i], col); if (!it) goto fail_scan; - struct rill_kv kv; + struct rill_row row; while (true) { - if (!rill_store_it_next(it, &kv)) { + if (!rill_store_it_next(it, &row)) { rill_store_it_free(it); goto fail_scan; } - if (rill_kv_nil(&kv)) break; + if (rill_row_nil(&row)) break; - result = rill_pairs_push(result, kv.key, kv.val); + result = rill_rows_push(result, row.key, row.val); } rill_store_it_free(it); } - rill_pairs_compact(result); + rill_rows_compact(result); return result; fail_scan: diff --git a/src/rill.h b/src/rill.h index 9ec3e56..b505d65 100644 --- a/src/rill.h +++ b/src/rill.h @@ -36,61 +36,59 @@ size_t rill_strerror(struct rill_error *err, char *dest, size_t len); // ----------------------------------------------------------------------------- typedef uint64_t rill_ts_t; -typedef uint64_t rill_key_t; typedef uint64_t rill_val_t; // ----------------------------------------------------------------------------- -// kv +// row // ----------------------------------------------------------------------------- -struct rill_kv +struct rill_row { - rill_key_t key; - rill_val_t val; + rill_val_t a, b; }; -inline bool rill_kv_nil(const struct rill_kv *kv) +inline bool rill_row_nil(const struct rill_row *row) { - return !kv->key && !kv->val; + return !row->a && !row->b; } -inline int rill_kv_cmp(const struct rill_kv *lhs, const struct rill_kv *rhs) +inline int rill_row_cmp(const struct rill_row *lhs, const struct rill_row *rhs) { - if (lhs->key < rhs->key) return -1; - if (lhs->key > rhs->key) return +1; + if (lhs->a < rhs->a) return -1; + if (lhs->a > rhs->a) return +1; - if (lhs->val < rhs->val) return -1; - if (lhs->val > rhs->val) return +1; + if (lhs->b < rhs->b) return -1; + if (lhs->b > rhs->b) return +1; return 0; } // ----------------------------------------------------------------------------- -// pairs +// rows // ----------------------------------------------------------------------------- -struct rill_pairs +struct rill_rows { size_t len, cap; - struct rill_kv data[]; + struct rill_row *data; }; -struct rill_pairs *rill_pairs_new(size_t cap); -void rill_pairs_free(struct rill_pairs *pairs); -void rill_pairs_clear(struct rill_pairs *pairs); +struct rill_rows *rill_rows_new(size_t cap); +void rill_rows_free(struct rill_rows *rows); +void rill_rows_clear(struct rill_rows *rows); -struct rill_pairs *rill_pairs_reserve(struct rill_pairs *pairs, size_t cap); +struct rill_rows *rill_rows_reserve(struct rill_rows *rows, size_t cap); -struct rill_pairs *rill_pairs_push( - struct rill_pairs *pairs, rill_key_t key, rill_val_t val); +struct rill_rows *rill_rows_push( + struct rill_rows *rows, rill_val_t key, rill_val_t val); -void rill_pairs_compact(struct rill_pairs *pairs); +void rill_rows_compact(struct rill_rows *rows); -void rill_pairs_print(const struct rill_pairs *pairs); +void rill_rows_print(const struct rill_rows *rows); -void rill_pairs_invert(struct rill_pairs* pairs); +void rill_rows_invert(struct rill_rows* rows); // ----------------------------------------------------------------------------- @@ -110,7 +108,7 @@ bool rill_store_write( const char *file, rill_ts_t ts, size_t quant, - struct rill_pairs *pairs); + struct rill_rows *rows); bool rill_store_merge( const char *file, @@ -124,21 +122,21 @@ unsigned rill_store_version(const struct rill_store *store); rill_ts_t rill_store_ts(const struct rill_store *store); size_t rill_store_quant(const struct rill_store *store); size_t rill_store_keys_count(const struct rill_store *store, enum rill_col column); -size_t rill_store_pairs(const struct rill_store *store); +size_t rill_store_rows(const struct rill_store *store); size_t rill_store_index_len(const struct rill_store *store, enum rill_col col); struct rill_space* rill_store_space(struct rill_store *store); size_t rill_store_space_header(struct rill_space *space); size_t rill_store_space_index(struct rill_space *space, enum rill_col col); -size_t rill_store_space_pairs(struct rill_space *space, enum rill_col col); +size_t rill_store_space_rows(struct rill_space *space, enum rill_col col); void rill_space_free(struct rill_space* space); -struct rill_pairs *rill_store_query_value( - struct rill_store *store, rill_val_t val, struct rill_pairs *out); -struct rill_pairs *rill_store_query_key( - struct rill_store *store, rill_key_t key, struct rill_pairs *out); +struct rill_rows *rill_store_query_value( + struct rill_store *store, rill_val_t val, struct rill_rows *out); +struct rill_rows *rill_store_query_key( + struct rill_store *store, rill_val_t key, struct rill_rows *out); size_t rill_store_keys( @@ -148,7 +146,7 @@ size_t rill_store_keys( struct rill_store_it *rill_store_begin( struct rill_store *store, enum rill_col column); void rill_store_it_free(struct rill_store_it *it); -bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv); +bool rill_store_it_next(struct rill_store_it *it, struct rill_row *row); // ----------------------------------------------------------------------------- @@ -162,7 +160,7 @@ enum { rill_acc_read_only = 0 }; struct rill_acc *rill_acc_open(const char *dir, size_t cap); void rill_acc_close(struct rill_acc *acc); -void rill_acc_ingest(struct rill_acc *acc, rill_key_t key, rill_val_t val); +void rill_acc_ingest(struct rill_acc *acc, rill_val_t key, rill_val_t val); bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now); @@ -182,22 +180,22 @@ struct rill_query; struct rill_query * rill_query_open(const char *dir); void rill_query_close(struct rill_query *db); -struct rill_pairs *rill_query_key( +struct rill_rows *rill_query_key( const struct rill_query *query, - rill_key_t key, - struct rill_pairs *out); + rill_val_t key, + struct rill_rows *out); -struct rill_pairs *rill_query_keys( +struct rill_rows *rill_query_keys( const struct rill_query *query, - const rill_key_t *keys, size_t len, - struct rill_pairs *out); + const rill_val_t *keys, size_t len, + struct rill_rows *out); -struct rill_pairs *rill_query_vals( +struct rill_rows *rill_query_vals( const struct rill_query *query, const rill_val_t *vals, size_t len, - struct rill_pairs *out); + struct rill_rows *out); -struct rill_pairs *rill_query_all( +struct rill_rows *rill_query_all( const struct rill_query *query, enum rill_col col); diff --git a/src/rill_count.c b/src/rill_count.c index cad7393..8026b48 100644 --- a/src/rill_count.c +++ b/src/rill_count.c @@ -18,19 +18,19 @@ void usage() void count(struct rill_store *store, enum rill_col col) { - struct rill_kv kv; + struct rill_row row; struct rill_store_it *it = rill_store_begin(store, col); - rill_key_t key = 0; + rill_val_t key = 0; size_t count = 0; - while (rill_store_it_next(it, &kv)) { - if (rill_kv_nil(&kv)) break; + while (rill_store_it_next(it, &row)) { + if (rill_row_nil(&row)) break; - if (kv.key == key) count++; + if (row.key == key) count++; else { if (key) printf("%lu %p\n", count, (void *) key); count = 1; - key = kv.key; + key = row.key; } } diff --git a/src/rill_dump.c b/src/rill_dump.c index b0623f8..a02ee25 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -20,7 +20,7 @@ int main(int argc, char **argv) { bool header = false; bool key = false; - bool pairs = false; + bool rows = false; bool a = false; bool b = false; bool space = false; @@ -30,7 +30,7 @@ int main(int argc, char **argv) switch (opt) { case 'h': header = true; break; case 'k': key = true; break; - case 'p': pairs = true; break; + case 'p': rows = true; break; case 'a': a = true; break; case 'b': b = true; break; case 'm': space = true; break; @@ -40,7 +40,7 @@ int main(int argc, char **argv) } } - if (!header && !a && !b && !a && !pairs && !key && !space) usage(); + if (!header && !a && !b && !a && !rows && !key && !space) usage(); if (optind >= argc) usage(); struct rill_store *store = rill_store_open(argv[optind]); @@ -53,12 +53,12 @@ int main(int argc, char **argv) printf("quant: %lu\n", rill_store_quant(store)); printf("keys data a: %zu\n", rill_store_keys_count(store, rill_col_a)); printf("keys data b: %zu\n", rill_store_keys_count(store, rill_col_b)); - printf("pairs: %lu\n", rill_store_pairs(store)); + printf("rows: %lu\n", rill_store_rows(store)); printf("index a len: %zu\n", rill_store_index_len(store, rill_col_a)); printf("index b len: %zu\n", rill_store_index_len(store, rill_col_b)); } - if ((key || pairs) && !a && !b) { + if ((key || rows) && !a && !b) { fprintf(stderr, "you need to specify column a or b\n"); return -1; } @@ -66,7 +66,7 @@ int main(int argc, char **argv) if (key) { const enum rill_col col = a ? rill_col_a : rill_col_b; const size_t keys_len = rill_store_keys_count(store, col); - rill_key_t *keys = calloc(keys_len, sizeof(*keys)); + rill_val_t *keys = calloc(keys_len, sizeof(*keys)); (void) rill_store_keys(store, keys, keys_len, col); @@ -76,15 +76,15 @@ int main(int argc, char **argv) printf(" 0x%lx\n", keys[i]); } - if (pairs) { - struct rill_kv kv = {0}; + if (rows) { + struct rill_row row = {0}; const enum rill_col col = a ? rill_col_a : rill_col_b; struct rill_store_it *it = rill_store_begin(store, col); - printf("pairs %c:\n", a ? 'a' : 'b'); - while (rill_store_it_next(it, &kv)) { - if (rill_kv_nil(&kv)) break; - printf(" 0x%lx 0x%lx\n", kv.key, kv.val); + printf("rows %c:\n", a ? 'a' : 'b'); + while (rill_store_it_next(it, &row)) { + if (rill_row_nil(&row)) break; + printf(" 0x%lx 0x%lx\n", row.key, row.val); } rill_store_it_free(it); @@ -104,8 +104,8 @@ int main(int argc, char **argv) rill_store_space_header(space), rill_store_space_index(space, rill_col_a), rill_store_space_index(space, rill_col_b), - rill_store_space_pairs(space, rill_col_a), - rill_store_space_pairs(space, rill_col_b)); + rill_store_space_rows(space, rill_col_a), + rill_store_space_rows(space, rill_col_b)); free(space); } diff --git a/src/rill_ingest.c b/src/rill_ingest.c index 91483c9..2b640b5 100644 --- a/src/rill_ingest.c +++ b/src/rill_ingest.c @@ -52,24 +52,24 @@ struct rill_store *load_file(const char *file, rill_ts_t ts, rill_ts_t quant) rill_exit(1); } - struct rill_kv *it = data; - struct rill_kv *end = it + (st.st_size / sizeof(*it)); + struct rill_row *it = data; + struct rill_row *end = it + (st.st_size / sizeof(*it)); for (; it < end; ++it) { - rill_key_t key = endian_btol(it->val); + rill_val_t key = endian_btol(it->val); rill_val_t val = endian_btol(it->key); - *it = (struct rill_kv) { .key = key, .val = val }; + *it = (struct rill_row) { .key = key, .val = val }; } - struct rill_pairs *pairs = ((struct rill_pairs *)data) - 1; - if (!pairs) rill_exit(1); + struct rill_rows *rows = ((struct rill_rows *)data) - 1; + if (!rows) rill_exit(1); - pairs->cap = pairs->len = st.st_size / sizeof(pairs->data[0]); - rill_pairs_compact(pairs); + rows->cap = rows->len = st.st_size / sizeof(rows->data[0]); + rill_rows_compact(rows); char file_rill[PATH_MAX]; snprintf(file_rill, sizeof(file_rill), "%s.rill", file); - if (!rill_store_write(file_rill, ts, quant, pairs)) rill_exit(1); + if (!rill_store_write(file_rill, ts, quant, rows)) rill_exit(1); munmap(ptr, page_len); munmap(data, len); diff --git a/src/rill_query.c b/src/rill_query.c index aa2eb00..8cf7b68 100644 --- a/src/rill_query.c +++ b/src/rill_query.c @@ -60,7 +60,7 @@ uint64_t read_u64(char *arg) int main(int argc, char *argv[]) { - rill_key_t key = 0; + rill_val_t key = 0; rill_val_t val = 0; int opt = 0; @@ -77,14 +77,14 @@ int main(int argc, char *argv[]) if (optind >= argc) { usage(); } const char *db = argv[optind]; - struct rill_pairs *pairs = rill_pairs_new(100); + struct rill_rows *rows = rill_rows_new(100); if (is_file(db)) { struct rill_store *store = rill_store_open(db); if (!store) rill_exit(1); - if (key) pairs = rill_store_query_key(store, key, pairs); - else pairs = rill_store_query_value(store, val, pairs); + if (key) rows = rill_store_query_key(store, key, rows); + else rows = rill_store_query_value(store, val, rows); rill_store_close(store); } @@ -92,17 +92,17 @@ int main(int argc, char *argv[]) struct rill_query *query = rill_query_open(db); if (!query) rill_exit(1); - if (key) pairs = rill_query_key(query, key, pairs); - else pairs = rill_query_vals(query, &val, 1, pairs); + if (key) rows = rill_query_key(query, key, rows); + else rows = rill_query_vals(query, &val, 1, rows); rill_query_close(query); } - if (!pairs) rill_exit(1); + if (!rows) rill_exit(1); - for (size_t i = 0; i < pairs->len; ++i) - printf("0x%lx 0x%lx\n", pairs->data[i].key, pairs->data[i].val); + for (size_t i = 0; i < rows->len; ++i) + printf("0x%lx 0x%lx\n", rows->data[i].key, rows->data[i].val); - rill_pairs_free(pairs); + rill_rows_free(rows); return 0; } diff --git a/src/store.c b/src/store.c index b279cc5..73edf43 100644 --- a/src/store.c +++ b/src/store.c @@ -48,7 +48,7 @@ struct rill_packed header uint64_t ts; uint64_t quant; - uint64_t pairs; + uint64_t rows; uint64_t data_a_off; uint64_t data_b_off; @@ -83,7 +83,7 @@ struct rill_space { size_t header_bytes; size_t index_bytes[2]; - size_t pairs_bytes[2]; + size_t rows_bytes[2]; }; @@ -285,7 +285,7 @@ static bool writer_open( const char *file, struct vals *vals, struct vals *inverted_vals, - size_t pairs, + size_t rows, rill_ts_t ts, size_t quant) { @@ -301,8 +301,8 @@ static bool writer_open( sizeof(struct header) + index_cap(inverted_vals->len) + index_cap(vals->len) + - coder_cap(vals->len, pairs) + - coder_cap(inverted_vals->len, pairs); + coder_cap(vals->len, rows) + + coder_cap(inverted_vals->len, rows); if (ftruncate(store->fd, len) == -1) { rill_fail_errno("unable to resize '%s'", file); @@ -386,19 +386,19 @@ bool rill_store_write( const char *file, rill_ts_t ts, size_t quant, - struct rill_pairs *pairs) + struct rill_rows *rows) { - rill_pairs_compact(pairs); - if (!pairs->len) return true; + rill_rows_compact(rows); + if (!rows->len) return true; - struct vals *vals = vals_cols_from_pairs(pairs, rill_col_b); + struct vals *vals = vals_cols_from_rows(rows, rill_col_b); if (!vals) goto fail_vals; - struct vals *invert_vals = vals_cols_from_pairs(pairs, rill_col_a); + struct vals *invert_vals = vals_cols_from_rows(rows, rill_col_a); if (!invert_vals) goto fail_invert_vals; struct rill_store store = {0}; if (!writer_open(&store, file, vals, invert_vals, - pairs->len, ts, quant)) { + rows->len, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } @@ -408,8 +408,8 @@ bool rill_store_write( struct encoder coder_a = store_encoder(&store, store.index_a, vals, store.head->data_a_off); - for (size_t i = 0; i < pairs->len; ++i) { - if (!coder_encode(&coder_a, &pairs->data[i])) goto fail_encode_a; + for (size_t i = 0; i < rows->len; ++i) { + if (!coder_encode(&coder_a, &rows->data[i])) goto fail_encode_a; } if (!coder_finish(&coder_a)) goto fail_encode_a; @@ -418,15 +418,15 @@ bool rill_store_write( struct encoder coder_b = store_encoder(&store, store.index_b, invert_vals, store.head->data_b_off); - rill_pairs_invert(pairs); - rill_pairs_compact(pairs); /* recompact mainly for sort */ + rill_rows_invert(rows); + rill_rows_compact(rows); /* recompact mainly for sort */ - for (size_t i = 0; i < pairs->len; ++i) { - if (!coder_encode(&coder_b, &pairs->data[i])) goto fail_encode_b; + for (size_t i = 0; i < rows->len; ++i) { + if (!coder_encode(&coder_b, &rows->data[i])) goto fail_encode_b; } if (!coder_finish(&coder_b)) goto fail_encode_b; - store.head->pairs = coder_a.pairs; + store.head->rows = coder_a.rows; writer_close(&store, store.head->data_b_off + coder_off(&coder_b)); @@ -497,7 +497,7 @@ static bool merge_with_config( size_t list_len, enum rill_col col) { - struct rill_kv kvs[list_len]; + struct rill_row rows[list_len]; struct decoder decoders[list_len]; @@ -510,32 +510,32 @@ static bool merge_with_config( assert(it_len); for (size_t i = 0; i < it_len; ++i) { - if (!(coder_decode(&decoders[i], &kvs[i]))) goto fail_decoder; + if (!(coder_decode(&decoders[i], &rows[i]))) goto fail_decoder; } - struct rill_kv prev = {0}; + struct rill_row prev = {0}; while (it_len > 0) { size_t target = 0; for (size_t i = 1; i < it_len; ++i) { - if (rill_kv_cmp(&kvs[i], &kvs[target]) < 0) + if (rill_row_cmp(&rows[i], &rows[target]) < 0) target = i; } - struct rill_kv *kv = &kvs[target]; + struct rill_row *row = &rows[target]; struct decoder *decoder = &decoders[target]; - if (rill_likely(rill_kv_nil(&prev) || rill_kv_cmp(&prev, kv) < 0)) { - if (!coder_encode(coder, kv)) goto fail_decoder; - prev = *kv; + if (rill_likely(rill_row_nil(&prev) || rill_row_cmp(&prev, row) < 0)) { + if (!coder_encode(coder, row)) goto fail_decoder; + prev = *row; } - if (!coder_decode(decoder, kv)) goto fail_decoder; - if (rill_unlikely(rill_kv_nil(kv))) { - memmove(kvs + target, - kvs + target + 1, - (it_len - target - 1) * sizeof(kvs[0])); + if (!coder_decode(decoder, row)) goto fail_decoder; + if (rill_unlikely(rill_row_nil(row))) { + memmove(rows + target, + rows + target + 1, + (it_len - target - 1) * sizeof(rows[0])); memmove(decoders + target, decoders + target + 1, (it_len - target - 1) * sizeof(decoders[0])); @@ -556,7 +556,7 @@ bool rill_store_merge( { assert(list_len > 1); - size_t pairs = 0; + size_t rows = 0; struct vals *vals = NULL; struct vals *invert_vals = NULL; @@ -567,7 +567,7 @@ bool rill_store_merge( struct vals *ret = vals_merge_from_index(vals, list[i]->index_b); struct vals *iret = vals_merge_from_index(invert_vals, list[i]->index_a); - pairs += list[i]->head->pairs; + rows += list[i]->head->rows; if (ret) vals = ret; else goto fail_vals; if (iret) invert_vals = iret; else goto fail_invert_vals; @@ -575,7 +575,7 @@ bool rill_store_merge( struct rill_store store = {0}; if (!writer_open(&store, file, vals, invert_vals, - pairs, ts, quant)) { + rows, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } @@ -594,7 +594,7 @@ bool rill_store_merge( if (!merge_with_config(&encoder_b, list, list_len, rill_col_b)) goto fail_coder_b; if (!coder_finish(&encoder_b)) goto fail_coder_b; - store.head->pairs = encoder_a.pairs; + store.head->rows = encoder_a.rows; writer_close(&store, store.head->data_b_off + coder_off(&encoder_b)); @@ -653,9 +653,9 @@ size_t rill_store_keys_count(const struct rill_store *store, enum rill_col colum return ix->len; } -size_t rill_store_pairs(const struct rill_store *store) +size_t rill_store_rows(const struct rill_store *store) { - return store->head->pairs; + return store->head->rows; } size_t rill_store_index_len(const struct rill_store *store, enum rill_col col) @@ -664,13 +664,13 @@ size_t rill_store_index_len(const struct rill_store *store, enum rill_col col) return col == rill_col_a ? store->index_a->len : store->index_b->len; } -static struct rill_pairs *store_query_key_or_value( +static struct rill_rows *store_query_key_or_value( struct rill_store *store, - rill_key_t key, - struct rill_pairs *out, + rill_val_t key, + struct rill_rows *out, enum rill_col column) { - struct rill_pairs *result = out; + struct rill_rows *result = out; size_t key_idx = 0; uint64_t off = 0; struct index *ix = @@ -678,15 +678,15 @@ static struct rill_pairs *store_query_key_or_value( if (!index_find(ix, key, &key_idx, &off)) return result; - struct rill_kv kv = {0}; + struct rill_row row = {0}; struct decoder coder = store_decoder_at(store, key_idx, off, column); while (true) { - if (!coder_decode(&coder, &kv)) goto fail; - if (rill_kv_nil(&kv)) break; - if (kv.key != key) break; + if (!coder_decode(&coder, &row)) goto fail; + if (rill_row_nil(&row)) break; + if (row.key != key) break; - result = rill_pairs_push(result, kv.key, kv.val); + result = rill_rows_push(result, row.key, row.val); if (!result) goto fail; } @@ -697,20 +697,20 @@ static struct rill_pairs *store_query_key_or_value( return NULL; } -struct rill_pairs *rill_store_query_key( - struct rill_store *store, rill_val_t key, struct rill_pairs *out) +struct rill_rows *rill_store_query_key( + struct rill_store *store, rill_val_t key, struct rill_rows *out) { return store_query_key_or_value(store, key, out, rill_col_a); } -struct rill_pairs *rill_store_query_value( - struct rill_store *store, rill_val_t key, struct rill_pairs *out) +struct rill_rows *rill_store_query_value( + struct rill_store *store, rill_val_t key, struct rill_rows *out) { return store_query_key_or_value(store, key, out, rill_col_b); } size_t rill_store_keys( - const struct rill_store *store, rill_key_t *out, size_t cap, + const struct rill_store *store, rill_val_t *out, size_t cap, enum rill_col column) { assert(column == rill_col_a || column == rill_col_b); @@ -744,9 +744,9 @@ void rill_store_it_free(struct rill_store_it *it) free(it); } -bool rill_store_it_next(struct rill_store_it *it, struct rill_kv *kv) +bool rill_store_it_next(struct rill_store_it *it, struct rill_row *row) { - return coder_decode(&it->decoder, kv); + return coder_decode(&it->decoder, row); } struct rill_space* rill_store_space(struct rill_store* store) @@ -757,8 +757,8 @@ struct rill_space* rill_store_space(struct rill_store* store) .header_bytes = sizeof(*store->head), .index_bytes[rill_col_a] = store->head->index_b_off - store->head->index_a_off, .index_bytes[rill_col_b] = store->head->data_a_off - store->head->index_b_off, - .pairs_bytes[rill_col_a] = store->head->data_b_off - store->head->data_a_off, - .pairs_bytes[rill_col_b] = store->vma_len - store->head->data_b_off, + .rows_bytes[rill_col_a] = store->head->data_b_off - store->head->data_a_off, + .rows_bytes[rill_col_b] = store->vma_len - store->head->data_b_off, }; return ret; @@ -771,7 +771,7 @@ size_t rill_store_space_index(struct rill_space* space, enum rill_col col) { assert(col == rill_col_a || col == rill_col_b); return space->index_bytes[col]; } -size_t rill_store_space_pairs(struct rill_space* space, enum rill_col col) { +size_t rill_store_space_rows(struct rill_space* space, enum rill_col col) { assert(col == rill_col_a || col == rill_col_b); - return space->pairs_bytes[col]; + return space->rows_bytes[col]; } diff --git a/src/vals.c b/src/vals.c index 543ddc5..449db81 100644 --- a/src/vals.c +++ b/src/vals.c @@ -60,16 +60,16 @@ static void vals_compact(struct vals *vals) vals->len = j + 1; } -static struct vals *vals_cols_from_pairs(struct rill_pairs *pairs, enum rill_col col) +static struct vals *vals_cols_from_rows(struct rill_rows *rows, enum rill_col col) { struct vals *vals = - calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * pairs->len); + calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * rows->len); if (!vals) return NULL; - vals->len = pairs->len; - for (size_t i = 0; i < pairs->len; ++i) - vals->data[i] = col == rill_col_a ? pairs->data[i].key : pairs->data[i].val; + vals->len = rows->len; + for (size_t i = 0; i < rows->len; ++i) + vals->data[i] = col == rill_col_a ? rows->data[i].key : rows->data[i].val; vals_compact(vals); return vals; diff --git a/test/coder_test.c b/test/coder_test.c index ba921d0..b31df3b 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -12,9 +12,9 @@ // utils // ----------------------------------------------------------------------------- -static struct index *index_alloc(size_t pairs) +static struct index *index_alloc(size_t rows) { - return calloc(1, index_cap(pairs)); + return calloc(1, index_cap(rows)); } @@ -76,11 +76,11 @@ bool test_leb128(void) #define make_index(...) \ ({ \ - rill_val_t kvs[] = { __VA_ARGS__ }; \ - size_t len = sizeof(kvs) / sizeof(kvs[0]); \ + rill_val_t rows[] = { __VA_ARGS__ }; \ + size_t len = sizeof(rows) / sizeof(rows[0]); \ struct index *index = index_alloc(len); \ for (size_t i = 0; i < len; ++i) \ - index_put(index, kvs[i], 1); \ + index_put(index, rows[i], 1); \ index; \ }) @@ -95,9 +95,9 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) return vals; } -static void check_vals(struct rill_pairs *pairs, struct vals *exp) +static void check_vals(struct rill_rows *rows, struct vals *exp) { - struct vals *vals = vals_cols_from_pairs(pairs, rill_col_b); + struct vals *vals = vals_cols_from_rows(rows, rill_col_b); assert(vals->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -113,7 +113,7 @@ static void check_vals(struct rill_pairs *pairs, struct vals *exp) free(vals); free(exp); - free(pairs); + free(rows); htable_reset(&rev); } @@ -132,16 +132,16 @@ static void check_vals_merge(struct vals *a, struct index *b, struct vals *exp) bool test_vals(void) { - check_vals(make_pair(kv(1, 10)), make_vals(10)); + check_vals(make_pair(row(1, 10)), make_vals(10)); - check_vals(make_pair(kv(1, 10), kv(1, 10)), make_vals(10)); - check_vals(make_pair(kv(1, 10), kv(2, 10)), make_vals(10)); + check_vals(make_pair(row(1, 10), row(1, 10)), make_vals(10)); + check_vals(make_pair(row(1, 10), row(2, 10)), make_vals(10)); - check_vals(make_pair(kv(1, 10), kv(1, 20)), make_vals(10, 20)); - check_vals(make_pair(kv(1, 10), kv(2, 20)), make_vals(10, 20)); + check_vals(make_pair(row(1, 10), row(1, 20)), make_vals(10, 20)); + check_vals(make_pair(row(1, 10), row(2, 20)), make_vals(10, 20)); - check_vals(make_pair(kv(2, 20), kv(1, 10)), make_vals(10, 20)); - check_vals(make_pair(kv(1, 20), kv(1, 10)), make_vals(10, 20)); + check_vals(make_pair(row(2, 20), row(1, 10)), make_vals(10, 20)); + check_vals(make_pair(row(1, 20), row(1, 10)), make_vals(10, 20)); check_vals_merge(make_vals(10), make_index(10), make_vals(10)); check_vals_merge(make_vals(10), make_index(20), make_vals(10, 20)); @@ -160,22 +160,22 @@ bool test_vals(void) // coder // ----------------------------------------------------------------------------- -void check_coder(struct rill_pairs *pairs) +void check_coder(struct rill_rows *rows) { - rill_pairs_compact(pairs); + rill_rows_compact(rows); - struct rill_pairs *inverted = rill_pairs_new(pairs->len); - for (size_t i = 0; i < pairs->len; ++i) - rill_pairs_push(inverted, pairs->data[i].val, pairs->data[i].key); - rill_pairs_compact(inverted); + struct rill_rows *inverted = rill_rows_new(rows->len); + for (size_t i = 0; i < rows->len; ++i) + rill_rows_push(inverted, rows->data[i].val, rows->data[i].key); + rill_rows_compact(inverted); - struct vals *vals_a = vals_cols_from_pairs(pairs, rill_col_b); - struct vals *vals_b = vals_cols_from_pairs(inverted, rill_col_b); + struct vals *vals_a = vals_cols_from_rows(rows, rill_col_b); + struct vals *vals_b = vals_cols_from_rows(inverted, rill_col_b); - const size_t pairs_a_cap = coder_cap(vals_a->len, pairs->len); - const size_t pairs_b_cap = coder_cap(vals_b->len, inverted->len); + const size_t rows_a_cap = coder_cap(vals_a->len, rows->len); + const size_t rows_b_cap = coder_cap(vals_b->len, inverted->len); - size_t cap = pairs_a_cap + pairs_b_cap; + size_t cap = rows_a_cap + rows_b_cap; uint8_t *buffer = calloc(1, cap); struct index *index_a = index_alloc(vals_b->len); struct index *index_b = index_alloc(vals_a->len); @@ -185,12 +185,12 @@ void check_coder(struct rill_pairs *pairs) struct encoder coder_a = make_encoder(buffer, buffer + cap, vals_a, index_a); - for (size_t i = 0; i < pairs->len; ++i) - assert(coder_encode(&coder_a, &pairs->data[i])); + for (size_t i = 0; i < rows->len; ++i) + assert(coder_encode(&coder_a, &rows->data[i])); assert(coder_finish(&coder_a)); len_a = len = coder_a.it - buffer; - assert(len <= pairs_a_cap); + assert(len <= rows_a_cap); struct encoder coder_b = make_encoder(buffer + len_a, buffer + cap, vals_b, index_b); @@ -199,7 +199,7 @@ void check_coder(struct rill_pairs *pairs) assert(coder_finish(&coder_b)); len_b = coder_b.it - coder_a.it; - assert(len_b <= pairs_b_cap); + assert(len_b <= rows_b_cap); len = coder_b.it - buffer; coder_close(&coder_a); @@ -207,7 +207,7 @@ void check_coder(struct rill_pairs *pairs) } if (false) { // hex dump for debuging - rill_pairs_print(pairs); + rill_rows_print(rows); printf("buffer: start=%p, len=%lu(%lu, %lu)\n", (void *) buffer, len, len_a, len_b); for (size_t i = 0; i < cap;) { printf("%6p: ", (void *) i); @@ -220,15 +220,15 @@ void check_coder(struct rill_pairs *pairs) printf("index_a: [ "); for (size_t i = 0; i < index_a->len; ++i) { - struct index_kv *kv = &index_a->data[i]; - printf("{%p, %p} ", (void *) kv->key, (void *) kv->off); + struct index_row *row = &index_a->data[i]; + printf("{%p, %p} ", (void *) row->key, (void *) row->off); } printf("]\n"); printf("index_b: [ "); for (size_t i = 0; i < index_b->len; ++i) { - struct index_kv *kv = &index_b->data[i]; - printf("{%p, %p} ", (void *) kv->key, (void *) kv->off); + struct index_row *row = &index_b->data[i]; + printf("{%p, %p} ", (void *) row->key, (void *) row->off); } printf("]\n"); } @@ -240,14 +240,14 @@ void check_coder(struct rill_pairs *pairs) start + len_a, index_b, index_a, 0); - struct rill_kv kv = {0}; - for (size_t i = 0; i < pairs->len; ++i) { - assert(coder_decode(&coder, &kv)); - assert(rill_kv_cmp(&kv, &pairs->data[i]) == 0); + struct rill_row row = {0}; + for (size_t i = 0; i < rows->len; ++i) { + assert(coder_decode(&coder, &row)); + assert(rill_row_cmp(&row, &rows->data[i]) == 0); } - assert(coder_decode(&coder, &kv)); - assert(rill_kv_nil(&kv)); + assert(coder_decode(&coder, &row)); + assert(rill_row_nil(&row)); } { /* Coder B */ @@ -257,32 +257,32 @@ void check_coder(struct rill_pairs *pairs) start + len_b, index_a, index_b, 0); - struct rill_kv kv = {0}; - for (size_t i = 0; i < pairs->len; ++i) { - assert(coder_decode(&coder, &kv)); - assert(rill_kv_cmp(&kv, &inverted->data[i]) == 0); + struct rill_row row = {0}; + for (size_t i = 0; i < rows->len; ++i) { + assert(coder_decode(&coder, &row)); + assert(rill_row_cmp(&row, &inverted->data[i]) == 0); } - assert(coder_decode(&coder, &kv)); - assert(rill_kv_nil(&kv)); + assert(coder_decode(&coder, &row)); + assert(rill_row_nil(&row)); } { /* Decode A */ - for (size_t i = 0; i < pairs->len; ++i) { + for (size_t i = 0; i < rows->len; ++i) { size_t key_idx = 0; uint64_t off = 0; - assert(index_find(index_a, pairs->data[i].key, &key_idx, &off)); + assert(index_find(index_a, rows->data[i].key, &key_idx, &off)); uint8_t *start = buffer; struct decoder coder = make_decoder_at( start + off, start + len_a, index_b, index_a, key_idx); - struct rill_kv kv = {0}; + struct rill_row row = {0}; do { - assert(coder_decode(&coder, &kv)); - assert(kv.key == pairs->data[i].key); - } while (kv.val != pairs->data[i].val); + assert(coder_decode(&coder, &row)); + assert(row.key == rows->data[i].key); + } while (row.val != rows->data[i].val); } } @@ -299,12 +299,12 @@ void check_coder(struct rill_pairs *pairs) index_a, index_b, key_idx); - struct rill_kv kv = {0}; + struct rill_row row = {0}; do { - assert(coder_decode(&coder, &kv)); - assert(kv.key && kv.val); - assert(kv.key == inverted->data[i].key); - } while (kv.val != inverted->data[i].val); + assert(coder_decode(&coder, &row)); + assert(row.key && row.val); + assert(row.key == inverted->data[i].key); + } while (row.val != inverted->data[i].val); } } @@ -313,22 +313,22 @@ void check_coder(struct rill_pairs *pairs) free(index_b); free(vals_a); free(vals_b); - free(pairs); + free(rows); free(inverted); } bool test_coder(void) { - check_coder(make_pair(kv(1, 10))); - check_coder(make_pair(kv(1, 10), kv(1, 20))); - check_coder(make_pair(kv(1, 10), kv(2, 20))); - check_coder(make_pair(kv(1, 10), kv(1, 20), kv(2, 30))); - check_coder(make_pair(kv(1, 10), kv(1, 20), kv(2, 10))); + check_coder(make_pair(row(1, 10))); + check_coder(make_pair(row(1, 10), row(1, 20))); + check_coder(make_pair(row(1, 10), row(2, 20))); + check_coder(make_pair(row(1, 10), row(1, 20), row(2, 30))); + check_coder(make_pair(row(1, 10), row(1, 20), row(2, 10))); struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 100; ++iterations) - check_coder(make_rng_pairs(&rng)); + check_coder(make_rng_rows(&rng)); return true; } diff --git a/test/indexer_test.c b/test/indexer_test.c index 8185f81..804ba2b 100644 --- a/test/indexer_test.c +++ b/test/indexer_test.c @@ -6,9 +6,9 @@ // utils // ----------------------------------------------------------------------------- -static struct index *index_alloc(size_t pairs) +static struct index *index_alloc(size_t rows) { - struct index *index = calloc(1, index_cap(pairs)); + struct index *index = calloc(1, index_cap(rows)); assert(index); assert(index->len == 0); @@ -23,17 +23,17 @@ static struct index *index_alloc(size_t pairs) static bool test_index_build(void) { - enum { pairs = 10 }; + enum { rows = 10 }; - struct index *index = index_alloc(pairs); + struct index *index = index_alloc(rows); - rill_key_t data[pairs] = {0}; - for (size_t i = 1; i < pairs; ++i) data[i] = data[i - 1] += 2; + rill_val_t data[rows] = {0}; + for (size_t i = 1; i < rows; ++i) data[i] = data[i - 1] += 2; - for (size_t i = 0; i < pairs; i++) + for (size_t i = 0; i < rows; i++) index_put(index, data[i], i); - assert(index->len == pairs); + assert(index->len == rows); for (size_t i = 0; i < index->len; i++) assert(index_get(index, i) == data[i]); @@ -48,7 +48,7 @@ static bool test_index_build(void) // test_index_lookup // ----------------------------------------------------------------------------- -static struct index *make_index(rill_key_t *data, size_t n) +static struct index *make_index(rill_val_t *data, size_t n) { struct index *index = index_alloc(n); for (size_t i = 0; i < n; i++) @@ -59,12 +59,12 @@ static struct index *make_index(rill_key_t *data, size_t n) #define index_from_keys(...) \ ({ \ - rill_key_t keys[] = { __VA_ARGS__ }; \ + rill_val_t keys[] = { __VA_ARGS__ }; \ make_index(keys, sizeof(keys) / sizeof(keys[0])); \ }) #define assert_found(index, ...) { \ - rill_key_t keys[] = { __VA_ARGS__ }; \ + rill_val_t keys[] = { __VA_ARGS__ }; \ size_t key_idx; \ uint64_t val; \ for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); i++) { \ @@ -75,7 +75,7 @@ static struct index *make_index(rill_key_t *data, size_t n) } #define assert_not_found(index, ...) { \ - rill_key_t keys[] = { __VA_ARGS__ }; \ + rill_val_t keys[] = { __VA_ARGS__ }; \ size_t key_idx; \ uint64_t val; \ for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); i++) \ diff --git a/test/query_test.c b/test/query_test.c index 760c146..33b0101 100644 --- a/test/query_test.c +++ b/test/query_test.c @@ -7,51 +7,51 @@ bool test_sequence() const size_t max_keys = 1000; const size_t max_values = 100; - struct rill_pairs* pairs = rill_pairs_new(1024); + struct rill_rows* rows = rill_rows_new(1024); for (size_t i = 1; i < max_keys + 1; ++i) for (size_t j = 1; j < max_values + 1; ++j) - pairs = rill_pairs_push(pairs, i, j); + rows = rill_rows_push(rows, i, j); - rill_store_write(name, 666, 666, pairs); - rill_pairs_free(pairs); + rill_store_write(name, 666, 666, rows); + rill_rows_free(rows); struct rill_store* store = rill_store_open(name); { - struct rill_pairs* result = rill_pairs_new(256); + struct rill_rows* result = rill_rows_new(256); for (size_t i = 1; i < max_keys + 1; ++i) { result = rill_store_query_key(store, i, result); - rill_pairs_compact(result); + rill_rows_compact(result); assert(result->len == max_values); for (size_t x = 0; x < max_values; ++x) assert(result->data[x].key == i && result->data[x].val == x + 1); - rill_pairs_clear(result); + rill_rows_clear(result); } - rill_pairs_free(result); + rill_rows_free(result); } { - struct rill_pairs* result = rill_pairs_new(256); + struct rill_rows* result = rill_rows_new(256); for (size_t i = 1; i < max_values + 1; ++i) { result = rill_store_query_value(store, i, result); - rill_pairs_compact(result); + rill_rows_compact(result); assert(result->len == max_keys); for (size_t x = 0; x < max_keys; ++x) assert(result->data[x].key == i && result->data[x].val == x + 1); - rill_pairs_clear(result); + rill_rows_clear(result); } - rill_pairs_free(result); + rill_rows_free(result); } rill_store_close(store); diff --git a/test/rill_generate.c b/test/rill_generate.c index f04ea70..0d49222 100644 --- a/test/rill_generate.c +++ b/test/rill_generate.c @@ -1,33 +1,33 @@ #include "test.h" static struct rill_store *make_store( - const char *name, struct rill_pairs *pairs) + const char *name, struct rill_rows *rows) { unlink(name); - assert(rill_store_write(name, 0, 0, pairs)); + assert(rill_store_write(name, 0, 0, rows)); return 0; } bool generate() { struct rng rng = rng_make(0); - struct rill_pairs *pairs = make_rng_pairs(&rng); - make_store("test.store.generated", pairs); - rill_pairs_free(pairs); + struct rill_rows *rows = make_rng_rows(&rng); + make_store("test.store.generated", rows); + rill_rows_free(rows); return true; } bool generate_simple() { const size_t len = 20; - struct rill_pairs *pairs = rill_pairs_new(len); + struct rill_rows *rows = rill_rows_new(len); for (size_t i = 0; i < len; ++i) { - rill_pairs_push(pairs, i + 1, 3 + i * 20); + rill_rows_push(rows, i + 1, 3 + i * 20); } - make_store("test.store.simple", pairs); - rill_pairs_free(pairs); + make_store("test.store.simple", rows); + rill_rows_free(rows); return true; } @@ -35,14 +35,14 @@ bool generate_simple() bool generate_with_multiple_values() { const size_t len = 100; - struct rill_pairs *pairs = rill_pairs_new(len); + struct rill_rows *rows = rill_rows_new(len); for (size_t i = 0; i < 20; ++i) for (size_t j = 1; j < 5; ++j) - rill_pairs_push(pairs, i + 1, j * 100); + rill_rows_push(rows, i + 1, j * 100); - make_store("test.store.multvals", pairs); - rill_pairs_free(pairs); + make_store("test.store.multvals", rows); + rill_rows_free(rows); return true; } diff --git a/test/rotate_test.c b/test/rotate_test.c index bd8e28a..2cee814 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -43,17 +43,17 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_pairs *pairs = rill_query_keys(query, &key, 1, rill_pairs_new(1)); + struct rill_rows *rows = rill_query_keys(query, &key, 1, rill_rows_new(1)); rill_query_close(query); size_t i = 0; for (rill_ts_t ts = 0; ts < expire_secs; ts += step) { - assert(pairs->data[i].key == key); - assert(pairs->data[i].val == ts + 1); + assert(rows->data[i].key == key); + assert(rows->data[i].val == ts + 1); ++i; } - rill_pairs_free(pairs); + rill_rows_free(rows); } for (size_t i = 1; i <= 6; ++i) { @@ -66,15 +66,15 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_pairs *pairs = rill_query_keys(query, &key, 1, rill_pairs_new(1)); + struct rill_rows *rows = rill_query_keys(query, &key, 1, rill_rows_new(1)); rill_query_close(query); - for (size_t i = 0; i < pairs->len; ++i) { - assert(pairs->data[i].key == key); - assert(pairs->data[i].val >= (5 * month_secs) + 1); + for (size_t i = 0; i < rows->len; ++i) { + assert(rows->data[i].key == key); + assert(rows->data[i].val >= (5 * month_secs) + 1); } - rill_pairs_free(pairs); + rill_rows_free(rows); } rm(dir); diff --git a/test/store_test.c b/test/store_test.c index 654a65d..42e8b7b 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -10,10 +10,10 @@ // utils // ----------------------------------------------------------------------------- -static struct rill_store *make_store(const char *name, struct rill_pairs *pairs) +static struct rill_store *make_store(const char *name, struct rill_rows *rows) { unlink(name); - assert(rill_store_write(name, 0, 0, pairs)); + assert(rill_store_write(name, 0, 0, rows)); struct rill_store *store = rill_store_open(name); assert(store); @@ -58,48 +58,48 @@ static struct list *make_rng_list(struct rng *rng, uint64_t max) // query_key // ----------------------------------------------------------------------------- -static struct rill_pairs* duplicate_pairs(struct rill_pairs* pairs) +static struct rill_rows* duplicate_rows(struct rill_rows* rows) { - struct rill_pairs* copy = rill_pairs_new(pairs->len); - for (size_t i = 0; i < pairs->len; ++i) - rill_pairs_push(copy, pairs->data[i].key, pairs->data[i].val); + struct rill_rows* copy = rill_rows_new(rows->len); + for (size_t i = 0; i < rows->len; ++i) + rill_rows_push(copy, rows->data[i].key, rows->data[i].val); return copy; } -static void check_query_key(struct rill_pairs *pairs) +static void check_query_key(struct rill_rows *rows) { - struct rill_pairs *expected = duplicate_pairs(pairs); - struct rill_store *store = make_store("test.store.query_key", pairs); - struct rill_pairs *result = rill_pairs_new(128); + struct rill_rows *expected = duplicate_rows(rows); + struct rill_store *store = make_store("test.store.query_key", rows); + struct rill_rows *result = rill_rows_new(128); - rill_pairs_compact(pairs); - rill_pairs_compact(expected); + rill_rows_compact(rows); + rill_rows_compact(expected); for (size_t i = 0; i < expected->len;) { - rill_pairs_clear(result); + rill_rows_clear(result); result = rill_store_query_key(store, expected->data[i].key, result); assert(expected->len - i >= result->len); for (size_t j = 0; j < result->len; ++j, ++i) - assert(!rill_kv_cmp(&expected->data[i], &result->data[j])); + assert(!rill_row_cmp(&expected->data[i], &result->data[j])); } free(result); rill_store_close(store); - rill_pairs_free(pairs); - rill_pairs_free(expected); + rill_rows_free(rows); + rill_rows_free(expected); } bool test_query_key(void) { - check_query_key(make_pair(kv(1, 10))); - check_query_key(make_pair(kv(1, 10), kv(2, 20))); - check_query_key(make_pair(kv(1, 10), kv(1, 20), kv(2, 20))); - check_query_key(make_pair(kv(1, 10), kv(1, 20), kv(1, 20), kv(1, 30))); + check_query_key(make_pair(row(1, 10))); + check_query_key(make_pair(row(1, 10), row(2, 20))); + check_query_key(make_pair(row(1, 10), row(1, 20), row(2, 20))); + check_query_key(make_pair(row(1, 10), row(1, 20), row(1, 20), row(1, 30))); struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 10; ++iterations) - check_query_key(make_rng_pairs(&rng)); + check_query_key(make_rng_rows(&rng)); return true; } @@ -110,25 +110,25 @@ bool test_query_key(void) // ----------------------------------------------------------------------------- static void check_scan_keys( - struct rill_store *store, struct rill_pairs *pairs, struct list *keys) + struct rill_store *store, struct rill_rows *rows, struct list *keys) { - struct rill_pairs *result = rill_pairs_new(128); - rill_pairs_compact(pairs); + struct rill_rows *result = rill_rows_new(128); + rill_rows_compact(rows); for (size_t i = 0; i < keys->len; ++i) result = rill_store_query_key(store, keys->data[i], result); - struct rill_pairs *exp = rill_pairs_new(128); - for (size_t i = 0; i < pairs->len; ++i) { + struct rill_rows *exp = rill_rows_new(128); + for (size_t i = 0; i < rows->len; ++i) { for (size_t j = 0; j < keys->len; ++j) { - struct rill_kv *kv = &pairs->data[i]; - if (kv->key == keys->data[j]) exp = rill_pairs_push(exp, kv->key, kv->val); + struct rill_row *row = &rows->data[i]; + if (row->key == keys->data[j]) exp = rill_rows_push(exp, row->key, row->val); } } assert(exp->len == result->len); for (size_t i = 0; i < exp->len; ++i) - assert(!rill_kv_cmp(&exp->data[i], &result->data[i])); + assert(!rill_row_cmp(&exp->data[i], &result->data[i])); free(exp); free(result); @@ -140,9 +140,9 @@ bool test_scan_keys(void) static const char *name = "test.store.scan_keys"; { - struct rill_pairs *pairs = make_pair(kv(2, 10)); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_pair(row(2, 10)); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); check_scan_keys(store, copy, make_list(1)); check_scan_keys(store, copy, make_list(2)); @@ -153,14 +153,14 @@ bool test_scan_keys(void) rill_store_close(store); free(copy); - free(pairs); + free(rows); } { - struct rill_pairs *pairs = make_pair(kv(2, 10), kv(3, 10), kv(3, 20), kv(4, 30)); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_pair(row(2, 10), row(3, 10), row(3, 20), row(4, 30)); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); check_scan_keys(store, copy, make_list(1)); check_scan_keys(store, copy, make_list(3)); @@ -172,20 +172,20 @@ bool test_scan_keys(void) rill_store_close(store); free(copy); - free(pairs); + free(rows); } { struct rng rng = rng_make(0); - struct rill_pairs *pairs = make_rng_pairs(&rng); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_rng_rows(&rng); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); for (size_t iterations = 0; iterations < 10; ++iterations) check_scan_keys(store, copy, make_rng_list(&rng, rng_range_key)); rill_store_close(store); - free(pairs); + free(rows); free(copy); } @@ -198,28 +198,28 @@ bool test_scan_keys(void) // ----------------------------------------------------------------------------- static void check_scan_vals( - struct rill_store *store, struct rill_pairs *pairs, struct list *vals) + struct rill_store *store, struct rill_rows *rows, struct list *vals) { - struct rill_pairs *result = rill_pairs_new(128); - rill_pairs_compact(pairs); + struct rill_rows *result = rill_rows_new(128); + rill_rows_compact(rows); for (size_t i = 0; i < vals->len; ++i) result = rill_store_query_value(store, vals->data[i], result); - struct rill_pairs *exp = rill_pairs_new(128); - for (size_t i = 0; i < pairs->len; ++i) { + struct rill_rows *exp = rill_rows_new(128); + for (size_t i = 0; i < rows->len; ++i) { for (size_t j = 0; j < vals->len; ++j) { - struct rill_kv *kv = &pairs->data[i]; - if (kv->val == vals->data[j]) - exp = rill_pairs_push(exp, kv->val, kv->key); + struct rill_row *row = &rows->data[i]; + if (row->val == vals->data[j]) + exp = rill_rows_push(exp, row->val, row->key); } } - rill_pairs_compact(exp); + rill_rows_compact(exp); assert(exp->len == result->len); for (size_t i = 0; i < exp->len; ++i) - assert(!rill_kv_cmp(&exp->data[i], &result->data[i])); + assert(!rill_row_cmp(&exp->data[i], &result->data[i])); free(exp); free(result); @@ -231,9 +231,9 @@ bool test_scan_vals(void) static const char *name = "test.store.scan_vals"; { - struct rill_pairs *pairs = make_pair(kv(2, 20)); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_pair(row(2, 20)); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); check_scan_vals(store, copy, make_list(10)); check_scan_vals(store, copy, make_list(20)); @@ -243,14 +243,14 @@ bool test_scan_vals(void) check_scan_vals(store, copy, make_list(10, 30)); rill_store_close(store); - free(pairs); + free(rows); free(copy); } { - struct rill_pairs *pairs = make_pair(kv(2, 20), kv(3, 20), kv(3, 30), kv(4, 40)); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_pair(row(2, 20), row(3, 20), row(3, 30), row(4, 40)); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); check_scan_vals(store, copy, make_list(10)); check_scan_vals(store, copy, make_list(20)); @@ -262,21 +262,21 @@ bool test_scan_vals(void) check_scan_vals(store, copy, make_list(20, 30, 40)); rill_store_close(store); - free(pairs); + free(rows); free(copy); } { struct rng rng = rng_make(0); - struct rill_pairs *pairs = make_rng_pairs(&rng); - struct rill_pairs *copy = duplicate_pairs(pairs); - struct rill_store *store = make_store(name, pairs); + struct rill_rows *rows = make_rng_rows(&rng); + struct rill_rows *copy = duplicate_rows(rows); + struct rill_store *store = make_store(name, rows); for (size_t iterations = 0; iterations < 10; ++iterations) check_scan_vals(store, copy, make_rng_list(&rng, rng_range_val)); rill_store_close(store); - free(pairs); + free(rows); free(copy); } diff --git a/test/test.h b/test/test.h index af02df0..429a560 100644 --- a/test/test.h +++ b/test/test.h @@ -21,45 +21,45 @@ // ----------------------------------------------------------------------------- -// pairs +// rows // ----------------------------------------------------------------------------- -struct rill_kv kv(rill_key_t key, rill_val_t val) +struct rill_row row(rill_val_t key, rill_val_t val) { - return (struct rill_kv) { .key = key, .val = val }; + return (struct rill_row) { .key = key, .val = val }; } #define make_pair(...) \ ({ \ - struct rill_kv kvs[] = { __VA_ARGS__ }; \ - make_pair_impl(kvs, sizeof(kvs) / sizeof(kvs[0])); \ + struct rill_row rows[] = { __VA_ARGS__ }; \ + make_pair_impl(rows, sizeof(rows) / sizeof(rows[0])); \ }) -struct rill_pairs *make_pair_impl(const struct rill_kv *kv, size_t len) +struct rill_rows *make_pair_impl(const struct rill_row *row, size_t len) { - struct rill_pairs *pairs = rill_pairs_new(len); + struct rill_rows *rows = rill_rows_new(len); for (size_t i = 0; i < len; ++i) - pairs = rill_pairs_push(pairs, kv[i].key, kv[i].val); - return pairs; + rows = rill_rows_push(rows, row[i].key, row[i].val); + return rows; } enum { rng_range_key = 500, rng_range_val = 100 }; -struct rill_pairs *make_rng_pairs(struct rng *rng) +struct rill_rows *make_rng_rows(struct rng *rng) { enum { len = 1000 }; - struct rill_pairs *pairs = rill_pairs_new(len); + struct rill_rows *rows = rill_rows_new(len); for (size_t i = 0; i < len; ++i) { uint64_t key = rng_gen_range(rng, 1, rng_range_key); uint64_t val = rng_gen_range(rng, 1, rng_range_val); - pairs = rill_pairs_push(pairs, key, val); - assert(pairs); + rows = rill_rows_push(rows, key, val); + assert(rows); } - rill_pairs_compact(pairs); + rill_rows_compact(rows); - return pairs; + return rows; } From dc20b2b9c352b4fa473ab401a30ed201e56cc057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 14 Jun 2018 12:14:30 +0200 Subject: [PATCH 67/91] rework rill_rows interface prevents all the hard to fix memory leaks throughout the code --- compile.sh | 2 +- src/rill.h | 18 +++++-------- src/{pairs.c => rows.c} | 59 ++++++++++++++--------------------------- test/coder_test.c | 6 ++--- test/test.h | 4 +-- 5 files changed, 33 insertions(+), 56 deletions(-) rename src/{pairs.c => rows.c} (62%) diff --git a/compile.sh b/compile.sh index 2c615da..ef72129 100755 --- a/compile.sh +++ b/compile.sh @@ -5,7 +5,7 @@ set -o errexit -o nounset -o pipefail -o xtrace : ${PREFIX:="."} declare -a SRC -SRC=(htable rng utils pairs store acc rotate query) +SRC=(htable rng utils rows store acc rotate query) CC=${OTHERC:-gcc} LEAKCHECK_ENABLED=${LEAKCHECK_ENABLED:-} diff --git a/src/rill.h b/src/rill.h index b505d65..eb3a8be 100644 --- a/src/rill.h +++ b/src/rill.h @@ -75,20 +75,16 @@ struct rill_rows struct rill_row *data; }; -struct rill_rows *rill_rows_new(size_t cap); -void rill_rows_free(struct rill_rows *rows); -void rill_rows_clear(struct rill_rows *rows); +void rill_rows_free(struct rill_rows *); -struct rill_rows *rill_rows_reserve(struct rill_rows *rows, size_t cap); +bool rill_rows_push(struct rill_rows *, rill_val_t a, rill_val_t b); +bool rill_rows_reserve(struct rill_rows *, size_t cap); +void rill_rows_clear(struct rill_rows *); -struct rill_rows *rill_rows_push( - struct rill_rows *rows, rill_val_t key, rill_val_t val); +void rill_rows_invert(struct rill_rows *); +void rill_rows_compact(struct rill_rows *); -void rill_rows_compact(struct rill_rows *rows); - -void rill_rows_print(const struct rill_rows *rows); - -void rill_rows_invert(struct rill_rows* rows); +void rill_rows_print(const struct rill_rows *); // ----------------------------------------------------------------------------- diff --git a/src/pairs.c b/src/rows.c similarity index 62% rename from src/pairs.c rename to src/rows.c index 0b11073..cea3504 100644 --- a/src/pairs.c +++ b/src/rows.c @@ -25,59 +25,40 @@ extern inline int rill_row_cmp(const struct rill_row *, const struct rill_row *) static size_t adjust_cap(size_t cap, size_t len) { - while (len > cap) cap *= 2; return cap; } -struct rill_rows *rill_rows_new(size_t cap) -{ - cap = adjust_cap(1, cap); - - struct rill_rows *rows = - calloc(1, sizeof(*rows) + cap * sizeof(rows->data[0])); - if (!rows) { - rill_fail("unable to alloc rows: cap=%lu", cap); - return NULL; - } - - rows->cap = cap; - return rows; -} - - void rill_rows_free(struct rill_rows *rows) { - free(rows); + free(rows->data); } - void rill_rows_clear(struct rill_rows *rows) { rows->len = 0; } -struct rill_rows *rill_rows_reserve(struct rill_rows *rows, size_t cap) +bool rill_rows_reserve(struct rill_rows *rows, size_t cap) { - if (rill_likely(cap <= rows->cap)) return rows; - cap = adjust_cap(rows->cap, cap); + if (rill_likely(cap <= rows->cap)) return true; - rows = realloc(rows, sizeof(*rows) + cap * sizeof(rows->data[0])); - if (!rows) { - rill_fail("unable to realloc rows: cap=%lu", cap); - return NULL; + size_t new_cap = rows->cap; + while (new_cap < cap) new_cap *= 2; + + rows->data = realloc(rows->data, new_cap * sizeof(rows->data[0])); + if (!rows->data) { + rill_fail("unable to realloc rows: cap=%lu", new_cap); + return false; } - rows->cap = cap; - return rows; + rows->cap = new_cap; + return true; } -struct rill_rows *rill_rows_push( - struct rill_rows *rows, rill_val_t key, rill_val_t val) +bool rill_rows_push(struct rill_rows *rows, rill_val_t a, rill_val_t b) { - assert(key && val && rows); - - rows = rill_rows_reserve(rows, rows->len + 1); - if (!rows) return NULL; + assert(a && b); + if (!rill_rows_reserve(rows, rows->len + 1)) return false; rows->data[rows->len] = (struct rill_row) { .key = key, .val = val }; rows->len++; @@ -111,15 +92,15 @@ void rill_rows_print(const struct rill_rows *rows) const rill_val_t no_key = -1ULL; rill_val_t key = no_key; - printf("rows(%p, %lu, %lu):\n", (void *) rows, rows->len, rows->cap); + printf("rows(%lu, %lu):\n", rows->len, rows->cap); for (size_t i = 0; i < rows->len; ++i) { const struct rill_row *row = &rows->data[i]; - if (row->key == key) printf(", %lu", row->val); + if (row->key == key) printf(", %p", row->b); else { if (key != no_key) printf("]\n"); - printf(" %p: [ %lu", (void *) row->key, row->val); + printf(" %p: [ %p", (void *) row->a, row->b); key = row->key; } } @@ -131,8 +112,8 @@ void rill_rows_invert(struct rill_rows* rows) { for (size_t i = 0; i < rows->len; ++i) { rows->data[i] = (struct rill_row) { - .key = rows->data[i].val, - .val = rows->data[i].key, + .a = rows->data[i].b, + .b = rows->data[i].a, }; } } diff --git a/test/coder_test.c b/test/coder_test.c index b31df3b..e4a39eb 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -76,11 +76,11 @@ bool test_leb128(void) #define make_index(...) \ ({ \ - rill_val_t rows[] = { __VA_ARGS__ }; \ - size_t len = sizeof(rows) / sizeof(rows[0]); \ + rill_val_t rows[] = { __VA_ARGS__ }; \ + size_t len = sizeof(rows) / sizeof(rows[0]); \ struct index *index = index_alloc(len); \ for (size_t i = 0; i < len; ++i) \ - index_put(index, rows[i], 1); \ + index_put(index, rows[i], 1); \ index; \ }) diff --git a/test/test.h b/test/test.h index 429a560..10705bc 100644 --- a/test/test.h +++ b/test/test.h @@ -31,8 +31,8 @@ struct rill_row row(rill_val_t key, rill_val_t val) #define make_pair(...) \ ({ \ - struct rill_row rows[] = { __VA_ARGS__ }; \ - make_pair_impl(rows, sizeof(rows) / sizeof(rows[0])); \ + struct rill_row rows[] = { __VA_ARGS__ }; \ + make_pair_impl(rows, sizeof(rows) / sizeof(rows[0])); \ }) struct rill_rows *make_pair_impl(const struct rill_row *row, size_t len) From 5b1572557b51b574835101e2177f63581c69c7d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Thu, 14 Jun 2018 12:34:59 +0200 Subject: [PATCH 68/91] fix store --- src/coder.c | 2 + src/index.c | 16 +- src/rill.h | 69 +++++---- src/rows.c | 22 +-- src/store.c | 434 +++++++++++++++++++--------------------------------- src/vals.c | 40 ++++- 6 files changed, 258 insertions(+), 325 deletions(-) diff --git a/src/coder.c b/src/coder.c index f29404a..9a9fa6d 100644 --- a/src/coder.c +++ b/src/coder.c @@ -93,6 +93,8 @@ static inline bool coder_write_sep(struct encoder *coder) return true; } +// \todo might want to just write directly to region since out-of-bounds are the +// rare case. static inline bool coder_write_val(struct encoder *coder, rill_val_t val) { val = vals_vtoi(&coder->rev, val); diff --git a/src/index.c b/src/index.c index 93a83ee..02348ec 100644 --- a/src/index.c +++ b/src/index.c @@ -8,9 +8,9 @@ // config // ----------------------------------------------------------------------------- -struct rill_packed index_row +struct rill_packed index_kv { - rill_val_t key; + uint64_t key; uint64_t off; }; @@ -18,17 +18,17 @@ struct rill_packed index { uint64_t len; uint64_t __unused; // kept for backwards compatibility - struct index_row data[]; + struct index_kv data[]; }; -static size_t index_cap(size_t rows) +static size_t index_cap(size_t len) { - return sizeof(struct index) + rows * sizeof(struct index_row); + return sizeof(struct index) + len * sizeof(struct index_kv); } static void index_put(struct index *index, rill_val_t key, uint64_t off) { - index->data[index->len] = (struct index_row) { .key = key, .off = off }; + index->data[index->len] = (struct index_kv) { .key = key, .off = off }; index->len++; } @@ -38,7 +38,7 @@ static bool index_find( { size_t idx = 0; size_t len = index->len; - struct index_row *low = index->data; + struct index_kv *low = index->data; while (len > 1) { size_t mid = len / 2; @@ -46,7 +46,7 @@ static bool index_find( else { low += mid; len -= mid; idx += mid;} } - struct index_row *row = &index->data[idx]; + struct index_kv *row = &index->data[idx]; if (row->key != key) return false; *key_idx = idx; *off = row->off; diff --git a/src/rill.h b/src/rill.h index eb3a8be..90f6360 100644 --- a/src/rill.h +++ b/src/rill.h @@ -39,6 +39,19 @@ typedef uint64_t rill_ts_t; typedef uint64_t rill_val_t; +// ----------------------------------------------------------------------------- +// col +// ----------------------------------------------------------------------------- + +enum { rill_cols = 2 }; +enum rill_col { rill_col_a = 0, rill_col_b = 1 }; + +inline enum rill_col rill_col_flip(enum rill_col col) +{ + return 1 - col; +} + + // ----------------------------------------------------------------------------- // row // ----------------------------------------------------------------------------- @@ -64,6 +77,12 @@ inline int rill_row_cmp(const struct rill_row *lhs, const struct rill_row *rhs) return 0; } +inline rill_val_t rill_row_get(const struct rill_row *row, enum rill_col col) +{ + // Avoids branches but could be dangerous if col happens to be giberrish. + return ((rill_val_t *) row)[col]; +} + // ----------------------------------------------------------------------------- // rows @@ -91,11 +110,8 @@ void rill_rows_print(const struct rill_rows *); // store // ----------------------------------------------------------------------------- -enum rill_col { rill_col_a = 0, rill_col_b = 1 }; - struct rill_store; struct rill_store_it; -struct rill_space; struct rill_store *rill_store_open(const char *file); void rill_store_close(struct rill_store *store); @@ -111,38 +127,33 @@ bool rill_store_merge( rill_ts_t ts, size_t quant, struct rill_store **list, size_t len); -bool rill_store_rm(struct rill_store *store); +bool rill_store_rm(struct rill_store *); -const char * rill_store_file(const struct rill_store *store); -unsigned rill_store_version(const struct rill_store *store); -rill_ts_t rill_store_ts(const struct rill_store *store); -size_t rill_store_quant(const struct rill_store *store); -size_t rill_store_keys_count(const struct rill_store *store, enum rill_col column); -size_t rill_store_rows(const struct rill_store *store); -size_t rill_store_index_len(const struct rill_store *store, enum rill_col col); +const char * rill_store_file(const struct rill_store *); +unsigned rill_store_version(const struct rill_store *); +rill_ts_t rill_store_ts(const struct rill_store *); +size_t rill_store_quant(const struct rill_store *); +size_t rill_store_rows(const struct rill_store *); +size_t rill_store_vals( + const struct rill_store *, enum rill_col, rill_val_t *out, size_t len); +size_t rill_store_vals_count(const struct rill_store *, enum rill_col); -struct rill_space* rill_store_space(struct rill_store *store); -size_t rill_store_space_header(struct rill_space *space); -size_t rill_store_space_index(struct rill_space *space, enum rill_col col); -size_t rill_store_space_rows(struct rill_space *space, enum rill_col col); -void rill_space_free(struct rill_space* space); +ssize_t rill_store_query( + const struct rill_store *, enum rill_col, rill_val_t, struct rill_rows *out); +struct rill_store_it *rill_store_begin(const const struct rill_store *, enum rill_col); +void rill_store_it_free(struct rill_store_it *); +bool rill_store_it_next(struct rill_store_it *, struct rill_row *out); -struct rill_rows *rill_store_query_value( - struct rill_store *store, rill_val_t val, struct rill_rows *out); -struct rill_rows *rill_store_query_key( - struct rill_store *store, rill_val_t key, struct rill_rows *out); - - -size_t rill_store_keys( - const struct rill_store *store, rill_val_t *out, size_t cap, - enum rill_col column); +struct rill_store_stats +{ + size_t header_bytes; + size_t index_bytes[2]; + size_t rows_bytes[2]; +}; -struct rill_store_it *rill_store_begin( - struct rill_store *store, enum rill_col column); -void rill_store_it_free(struct rill_store_it *it); -bool rill_store_it_next(struct rill_store_it *it, struct rill_row *row); +bool rill_store_stats(const struct rill_store *, struct rill_store_stats *); // ----------------------------------------------------------------------------- diff --git a/src/rows.c b/src/rows.c index cea3504..3d62988 100644 --- a/src/rows.c +++ b/src/rows.c @@ -87,6 +87,18 @@ void rill_rows_compact(struct rill_rows *rows) rows->len = j + 1; } +void rill_rows_invert(struct rill_rows* rows) +{ + for (size_t i = 0; i < rows->len; ++i) { + rows->data[i] = (struct rill_row) { + .a = rows->data[i].b, + .b = rows->data[i].a, + }; + } + + qsort(rows->data, rows->len, sizeof(*rows->data), &row_cmp); +} + void rill_rows_print(const struct rill_rows *rows) { const rill_val_t no_key = -1ULL; @@ -107,13 +119,3 @@ void rill_rows_print(const struct rill_rows *rows) if (rows->len) printf(" ]\n"); } - -void rill_rows_invert(struct rill_rows* rows) -{ - for (size_t i = 0; i < rows->len; ++i) { - rows->data[i] = (struct rill_row) { - .a = rows->data[i].b, - .b = rows->data[i].a, - }; - } -} diff --git a/src/store.c b/src/store.c index 73edf43..4939275 100644 --- a/src/store.c +++ b/src/store.c @@ -24,8 +24,8 @@ // impl // ----------------------------------------------------------------------------- -#include "vals.c" #include "index.c" +#include "vals.c" #include "coder.c" // ----------------------------------------------------------------------------- @@ -50,13 +50,10 @@ struct rill_packed header uint64_t rows; - uint64_t data_a_off; - uint64_t data_b_off; + uint64_t data_off[rill_cols]; + uint64_t index_off[rill_cols]; - uint64_t index_a_off; - uint64_t index_b_off; - - uint64_t reserved[2]; + uint64_t __unused[2]; uint64_t stamp; }; @@ -72,101 +69,68 @@ struct rill_store struct header *head; struct vals *vals; - uint8_t *data_a; - uint8_t *data_b; - struct index *index_a; - struct index *index_b; + uint8_t *data[rill_cols]; + struct index *index[rill_cols]; uint8_t *end; }; -struct rill_space -{ - size_t header_bytes; - size_t index_bytes[2]; - size_t rows_bytes[2]; -}; // ----------------------------------------------------------------------------- // coder // ----------------------------------------------------------------------------- +static inline void *store_ptr(struct rill_store *store, uint64_t off) +{ + return (void *) ((uintptr_t) store->vma + off); +} + static struct encoder store_encoder( struct rill_store *store, - struct index *index, - struct vals* vals, - uint64_t offset) + enum rill_col col, + struct vals *vals[rill_cols]) { + size_t start = store->head->data_off[col]; + size_t end = col == rill_col_a ? + store->head->data_off[other_col] : store->vma_len; + return make_encoder( - store->vma + offset, - store->vma + store->vma_len, - vals, - index); + store->vma + start, + store->vma + end, + vals[col], + store->index[col]); } static struct decoder store_decoder_at( struct rill_store *store, + enum rill_col col, size_t key_idx, - uint64_t curr_off, - enum rill_col column) + uint64_t off) { - size_t offset = 0; - size_t offset_end = 0; - struct index* lookup = NULL; - struct index* index = NULL; - - switch (column) { - case rill_col_a: - lookup = store->index_b; - index = store->index_a; - offset = store->head->data_a_off; - offset_end = store->head->data_b_off; - break; - case rill_col_b: - lookup = store->index_a; - index = store->index_b; - offset = store->head->data_b_off; - offset_end = store->vma_len; - break; - default: - rill_fail("improper rill col passed: %d", column); - break; - } - - return make_decoder_at( - store->vma + offset + curr_off, - store->vma + offset_end, - lookup, - index, - key_idx); -} + enum rill_col other_col = rill_col_flip(col); -static struct decoder store_decoder( - struct rill_store *store, - enum rill_col column) -{ - return store_decoder_at(store, 0, 0, column); -} + struct index *index = store->index[col]; + struct index *lookup = store->index[other_col]; -// ----------------------------------------------------------------------------- -// vma -// ----------------------------------------------------------------------------- + size_t start = store->head->data_off[col]; + size_t end = col == rill_col_a ? + store->head->data_off[other_col] : store->vma_len; -static inline void vma_will_need(struct rill_store *store) -{ - if (madvise(store->vma, store->vma_len, MADV_WILLNEED) == -1) - rill_fail("unable to madvise '%s'", store->file); + return make_decoder_at( + store->vma + start + off, + store->vma + end, + lookup, index, + key_idx); } -static inline void vma_dont_need(struct rill_store *store) +static struct decoder store_decoder(struct rill_store *store, enum rill_col col) { - if (madvise(store->vma, store->vma_len, MADV_DONTNEED) == -1) - rill_fail("unable to madvise '%s'", store->file); + return store_decoder_at(store, 0, 0, col); } // ----------------------------------------------------------------------------- -// reader +// open // ----------------------------------------------------------------------------- static bool is_supported_version(uint32_t version) @@ -217,11 +181,11 @@ struct rill_store *rill_store_open(const char *file) } store->head = store->vma; - store->index_a = (void *) ((uintptr_t) store->vma + store->head->index_a_off); - store->index_b = (void *) ((uintptr_t) store->vma + store->head->index_b_off); - store->data_a = (void *) ((uintptr_t) store->vma + store->head->data_a_off); - store->data_b = (void *) ((uintptr_t) store->vma + store->head->data_b_off); - store->end = (void *) ((uintptr_t) store->vma + store->vma_len); + for (size_t col = 0; col < rill_cols; ++col) { + store->index[col] = store_ptr(store, store->head->index_off[col]); + store->data[col] = store_ptr(store, store->head->data_off[col]); + } + store->end = store_ptr(store, store->vma_len); if (store->head->magic != magic) { rill_fail("invalid magic '0x%x' for '%s'", store->head->magic, file); @@ -283,8 +247,7 @@ bool rill_store_rm(struct rill_store *store) static bool writer_open( struct rill_store *store, const char *file, - struct vals *vals, - struct vals *inverted_vals, + struct vals *vals[rill_cols], size_t rows, rill_ts_t ts, size_t quant) @@ -297,12 +260,11 @@ static bool writer_open( goto fail_open; } - size_t len = - sizeof(struct header) + - index_cap(inverted_vals->len) + - index_cap(vals->len) + - coder_cap(vals->len, rows) + - coder_cap(inverted_vals->len, rows); + size_t len = sizeof(struct header); + for (size_t col = 0; col < rill_cols; ++col) { + len += index_cap(vals[col]->len); + len += coder_cap(vals[col]->len, rows); + } if (ftruncate(store->fd, len) == -1) { rill_fail_errno("unable to resize '%s'", file); @@ -317,7 +279,7 @@ static bool writer_open( } store->head = store->vma; - store->end = (void *) ((uintptr_t) store->vma + store->vma_len); + store->end = store_ptr(store, store->vma_len); *store->head = (struct header) { .magic = magic, @@ -337,7 +299,7 @@ static bool writer_open( } static void writer_close( - struct rill_store *store, size_t len) + struct rill_store *store, size_t len) { if (len) { assert(len <= store->vma_len); @@ -363,78 +325,78 @@ static void writer_close( close(store->fd); } -static void init_store_offsets( - struct rill_store* store, size_t vals, size_t inverse_vals) +static void writer_offsets_init( + struct rill_store *store, struct vals *vals[rill_cols]) { - store->head->index_a_off = sizeof(struct header); - store->head->index_b_off = store->head->index_a_off + index_cap(inverse_vals); - store->head->data_a_off = store->head->index_b_off + index_cap(vals); + off = sizeof(struct header); + + store->head->index_off[rill_col_a] = off; + store->index[rill_col_a] = store_ptr(store, off); + + off += index_cap(vals[rill_col_b]->len); - store->index_a = (void *) ((uintptr_t) store->vma + store->head->index_a_off); - store->index_b = (void *) ((uintptr_t) store->vma + store->head->index_b_off); - store->data_a = (void *) ((uintptr_t) store->vma + store->head->data_a_off); + store->head->index_off[rill_col_b] = off; + store->index[rill_col_b] = store_ptr(store, off); + + off += index_cap(vals[rill_col_a]->len); + + store->head->data_off[rill_col_a] = off; + store->data[rill_col_a] = store_ptr(store, off); } -static void prepare_col_b_offsets( - struct rill_store* store, struct encoder* coder_a) +static void writer_offsets_finish(struct rill_store *store, size_t off) { - store->head->data_b_off = store->head->data_a_off + coder_off(coder_a); - store->data_b = (void *) ((uintptr_t) store->vma + store->head->data_b_off); + store->head->data_off[rill_col_b] = off; + store->data[rill_col_b] = store_ptr(store, off); } bool rill_store_write( const char *file, - rill_ts_t ts, - size_t quant, + rill_ts_t ts, size_t quant, struct rill_rows *rows) { rill_rows_compact(rows); if (!rows->len) return true; - struct vals *vals = vals_cols_from_rows(rows, rill_col_b); - if (!vals) goto fail_vals; - struct vals *invert_vals = vals_cols_from_rows(rows, rill_col_a); - if (!invert_vals) goto fail_invert_vals; + struct vals *vals[rill_cols] = {0}; + for (size_t col = 0; col < rill_cols; ++col) { + vals[i] = vals_for_col(rows, i); + if (!vals[i]) goto fail_vals; + } struct rill_store store = {0}; - if (!writer_open(&store, file, vals, invert_vals, - rows->len, ts, quant)) { + if (!writer_open(&store, file, vals[rill_col_a], vals[rill_col_b], + rows->len, ts, quant)) + { rill_fail("unable to create '%s'", file); goto fail_open; } - init_store_offsets(&store, vals->len, invert_vals->len); - - struct encoder coder_a = - store_encoder(&store, store.index_a, vals, store.head->data_a_off); + writer_offsets_init(&store, vals); + struct encoder coder_a = store_encoder(&store, rill_col_a, vals); for (size_t i = 0; i < rows->len; ++i) { if (!coder_encode(&coder_a, &rows->data[i])) goto fail_encode_a; } if (!coder_finish(&coder_a)) goto fail_encode_a; - prepare_col_b_offsets(&store, &coder_a); - - struct encoder coder_b = - store_encoder(&store, store.index_b, invert_vals, store.head->data_b_off); - + writer_offsets_finish(&store, coder_off(&coder_a)); rill_rows_invert(rows); - rill_rows_compact(rows); /* recompact mainly for sort */ + struct encoder coder_b = store_encoder(&store, rill_col_b, vals); for (size_t i = 0; i < rows->len; ++i) { if (!coder_encode(&coder_b, &rows->data[i])) goto fail_encode_b; } if (!coder_finish(&coder_b)) goto fail_encode_b; - store.head->rows = coder_a.rows; - - writer_close(&store, store.head->data_b_off + coder_off(&coder_b)); + store.head->rows = rows->len; + writer_close(&store, store.head->data_off[rill_col_b] + coder_off(&coder_b)); coder_close(&coder_a); coder_close(&coder_b); - free(vals); - free(invert_vals); + for (size_t col = 0; col < rill_cols; ++col) + free(vals[col]); return true; @@ -444,61 +406,19 @@ bool rill_store_write( coder_close(&coder_a); writer_close(&store, 0); fail_open: - free(invert_vals); - fail_invert_vals: - free(vals); + for (size_t col = 0; col < rill_cols; ++col) free(vals[col]); fail_vals: return false; } -static struct vals *vals_merge_from_index(struct vals *vals, struct index *merge) -{ - assert(merge); - - if (!vals) { - size_t len = sizeof(*vals) + sizeof(vals->data[0]) * merge->len; - vals = calloc(1, len); - if (!vals) { - rill_fail("unable to allocate memory for vals: %lu", merge->len); - return NULL; - } - - for (size_t i = 0; i < merge->len; ++i) - vals->data[i] = merge->data[i].key; - - vals->len = merge->len; - return vals; - } - - vals = realloc(vals, - sizeof(*vals) + sizeof(vals->data[0]) * (vals->len + merge->len)); - - if (!vals) { - rill_fail("unable to allocate memory for vals: %lu + %lu", - vals->len, merge->len); - return NULL; - } - - for (size_t i = 0; i < merge->len; ++i) - vals->data[vals->len + i] = merge->data[i].key; - - vals->len += merge->len; - - vals_compact(vals); - - return vals; -} - - -static bool merge_with_config( - struct encoder* coder, - struct rill_store** list, - size_t list_len, - enum rill_col col) +static bool store_merge_col( + struct rill_store** list, + size_t list_len, + enum rill_col col, + struct encoder* coder) { struct rill_row rows[list_len]; - struct decoder decoders[list_len]; size_t it_len = 0; @@ -514,7 +434,6 @@ static bool merge_with_config( } struct rill_row prev = {0}; - while (it_len > 0) { size_t target = 0; @@ -557,54 +476,47 @@ bool rill_store_merge( assert(list_len > 1); size_t rows = 0; - struct vals *vals = NULL; - struct vals *invert_vals = NULL; + struct vals *vals[rill_cols] = {0}; for (size_t i = 0; i < list_len; ++i) { if (!list[i]) continue; - vma_will_need(list[i]); - struct vals *ret = vals_merge_from_index(vals, list[i]->index_b); - struct vals *iret = vals_merge_from_index(invert_vals, list[i]->index_a); + for (size_t col = 0; i < rill_cols; ++i) { + struct vals *ret = val_add_index(vals[col], list[i]->index[col]); + if (!vals) goto fail_vals; + vals[col] = ret; + } rows += list[i]->head->rows; - - if (ret) vals = ret; else goto fail_vals; - if (iret) invert_vals = iret; else goto fail_invert_vals; } struct rill_store store = {0}; - if (!writer_open(&store, file, vals, invert_vals, - rows, ts, quant)) { + if (!writer_open(&store, file, vals[rill_col_a], vals[rill_col_b], + rows, ts, quant)) + { rill_fail("unable to create '%s'", file); goto fail_open; } - init_store_offsets(&store, vals->len, invert_vals->len); + writer_offsets_init(&store, vals); - struct encoder encoder_a = - store_encoder(&store, store.index_a, vals, store.head->data_a_off); - if (!merge_with_config(&encoder_a, list, list_len, rill_col_a)) goto fail_coder_a; + struct encoder encoder_a = store_encoder(&store, rill_col_a, vals); + if (!store_merge_col(list, list_len, rill_col_a, &encoder_a)) goto fail_coder_a; if (!coder_finish(&encoder_a)) goto fail_coder_a; - prepare_col_b_offsets(&store, &encoder_a); + writer_offsets_finish(&store, coder_off(&encoder_a)); - struct encoder encoder_b = - store_encoder(&store, store.index_b, invert_vals, store.head->data_b_off); - if (!merge_with_config(&encoder_b, list, list_len, rill_col_b)) goto fail_coder_b; + struct encoder encoder_b = store_encoder(&store, rill_col_b, vals); + if (!store_merge_col(list, list_len, rill_col_b, &encoder_b)) goto fail_coder_b; if (!coder_finish(&encoder_b)) goto fail_coder_b; store.head->rows = encoder_a.rows; - writer_close(&store, store.head->data_b_off + coder_off(&encoder_b)); - for (size_t i = 0; i < list_len; ++i) - if (list[i]) vma_dont_need(list[i]); - coder_close(&encoder_a); coder_close(&encoder_b); - free(vals); - free(invert_vals); + + for (size_t col = 0; col < rill_cols; ++col) free(vals[col]); return true; coder_close(&encoder_b); @@ -613,16 +525,14 @@ bool rill_store_merge( fail_coder_a: writer_close(&store, 0); fail_open: - free(invert_vals); - fail_invert_vals: - free(vals); fail_vals: + for (size_t col = 0; col < rill_cols; ++col) free(vals[col]); return false; } // ----------------------------------------------------------------------------- -// scan +// header info // ----------------------------------------------------------------------------- const char * rill_store_file(const struct rill_store *store) @@ -645,97 +555,77 @@ size_t rill_store_quant(const struct rill_store *store) return store->head->quant; } -size_t rill_store_keys_count(const struct rill_store *store, enum rill_col column) +size_t rill_store_rows(const struct rill_store *store) { - assert(column == rill_col_a || column == rill_col_b); - const struct index* - ix = column == rill_col_a ? store->index_a : store->index_b; - return ix->len; + return store->head->rows; } -size_t rill_store_rows(const struct rill_store *store) + +// ----------------------------------------------------------------------------- +// query +// ----------------------------------------------------------------------------- + +size_t rill_store_vals_count(const struct rill_store *store, enum rill_col col) { - return store->head->rows; + return store->index[col]->len; } -size_t rill_store_index_len(const struct rill_store *store, enum rill_col col) +size_t rill_store_vals( + const struct rill_store *store, + enum rill_col col, + rill_val_t *out, + size_t cap) { - assert(col == rill_col_a || col == rill_col_b); - return col == rill_col_a ? store->index_a->len : store->index_b->len; + const struct index* index = store->index[col]; + size_t len = cap < index->len ? cap : index->len; + + for (size_t i = 0; i < len; ++i) + out[i] = index->data[i].key; + + return len; } -static struct rill_rows *store_query_key_or_value( - struct rill_store *store, + +ssize_t rill_store_query( + const struct rill_store *store, + enum rill_col col, rill_val_t key, - struct rill_rows *out, - enum rill_col column) + struct rill_rows *out) { - struct rill_rows *result = out; - size_t key_idx = 0; uint64_t off = 0; - struct index *ix = - column == rill_col_a ? store->index_a : store->index_b; - - if (!index_find(ix, key, &key_idx, &off)) return result; + size_t key_idx = 0; + if (!index_find(store->index[col], key, &key_idx, &off)) return 0; struct rill_row row = {0}; - struct decoder coder = store_decoder_at(store, key_idx, off, column); + struct decoder coder = store_decoder_at(store, col, key_idx, off); + ssize_t count = 0; while (true) { - if (!coder_decode(&coder, &row)) goto fail; + if (!coder_decode(&coder, &row)) return -1; if (rill_row_nil(&row)) break; - if (row.key != key) break; + assert(row.key == key); - result = rill_rows_push(result, row.key, row.val); - if (!result) goto fail; + if (!rill_rows_push(result, row.key, row.val)) return -1; + count++; } - return result; - - fail: - // \todo potentially leaking result - return NULL; -} - -struct rill_rows *rill_store_query_key( - struct rill_store *store, rill_val_t key, struct rill_rows *out) -{ - return store_query_key_or_value(store, key, out, rill_col_a); + return count; } -struct rill_rows *rill_store_query_value( - struct rill_store *store, rill_val_t key, struct rill_rows *out) -{ - return store_query_key_or_value(store, key, out, rill_col_b); -} - -size_t rill_store_keys( - const struct rill_store *store, rill_val_t *out, size_t cap, - enum rill_col column) -{ - assert(column == rill_col_a || column == rill_col_b); - - const struct index* ix = - column == rill_col_a ? store->index_a : store->index_b; - - size_t len = cap < ix->len ? cap : ix->len; - - for (size_t i = 0; i < len; ++i) - out[i] = ix->data[i].key; - - return len; -} +// ----------------------------------------------------------------------------- +// iterators +// ----------------------------------------------------------------------------- struct rill_store_it { struct decoder decoder; }; struct rill_store_it *rill_store_begin( - struct rill_store *store, enum rill_col column) + const struct rill_store *store, enum rill_col col) { struct rill_store_it *it = calloc(1, sizeof(*it)); if (!it) return NULL; - it->decoder = store_decoder(store, column); + it->decoder = store_decoder(store, col); return it; } @@ -749,11 +639,15 @@ bool rill_store_it_next(struct rill_store_it *it, struct rill_row *row) return coder_decode(&it->decoder, row); } -struct rill_space* rill_store_space(struct rill_store* store) -{ - struct rill_space *ret = calloc(1, sizeof(*ret)); - *ret = (struct rill_space) { +// ----------------------------------------------------------------------------- +// stats +// ----------------------------------------------------------------------------- + +void rill_store_space( + const struct rill_store* store, struct rill_store_stats *out) +{ + *out = (struct rill_store_stats) { .header_bytes = sizeof(*store->head), .index_bytes[rill_col_a] = store->head->index_b_off - store->head->index_a_off, .index_bytes[rill_col_b] = store->head->data_a_off - store->head->index_b_off, @@ -763,15 +657,3 @@ struct rill_space* rill_store_space(struct rill_store* store) return ret; } - -size_t rill_store_space_header(struct rill_space* space) { - return space->header_bytes; -} -size_t rill_store_space_index(struct rill_space* space, enum rill_col col) { - assert(col == rill_col_a || col == rill_col_b); - return space->index_bytes[col]; -} -size_t rill_store_space_rows(struct rill_space* space, enum rill_col col) { - assert(col == rill_col_a || col == rill_col_b); - return space->rows_bytes[col]; -} diff --git a/src/vals.c b/src/vals.c index 449db81..8f4f438 100644 --- a/src/vals.c +++ b/src/vals.c @@ -24,6 +24,8 @@ static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) return ret.value; } +// \todo should technically return bool for htable resize errors. Need to fix +// htable interface. static void vals_rev_make(struct vals *vals, vals_rev_t *rev) { htable_reset(rev); @@ -60,7 +62,7 @@ static void vals_compact(struct vals *vals) vals->len = j + 1; } -static struct vals *vals_cols_from_rows(struct rill_rows *rows, enum rill_col col) +static struct vals *vals_for_col(struct rill_rows *rows, enum rill_col col) { struct vals *vals = calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * rows->len); @@ -69,7 +71,41 @@ static struct vals *vals_cols_from_rows(struct rill_rows *rows, enum rill_col co vals->len = rows->len; for (size_t i = 0; i < rows->len; ++i) - vals->data[i] = col == rill_col_a ? rows->data[i].key : rows->data[i].val; + vals->data[i] = rill_row_get(&rows->data[i], col); + + vals_compact(vals); + return vals; +} + +static struct vals *vals_add_index(struct vals *vals, struct index *index) +{ + assert(merge); + + if (!vals) { + vals = calloc(1, sizeof(*vals) + index->len * sizeof(vals->data[0])); + if (!vals) { + rill_fail("unable to allocate memory for vals: %lu", index->len); + return NULL; + } + + for (size_t i = 0; i < index->len; ++i) + vals->data[i] = index->data[i].key; + vals->len = index->len; + + return vals; + } + + size_t len = vals->len + index->len; + vals = realloc(vals, sizeof(*vals) + len * sizeof(vals->data[0])); + if (!vals) { + rill_fail("unable to allocate memory for vals: %lu + %lu", + vals->len, index->len); + return NULL; + } + + for (size_t i = 0; i < index->len; ++i) + vals->data[vals->len + i] = index->data[i].key; + vals->len += index->len; vals_compact(vals); return vals; From 3c02a239c80bdefefca280d4a3a133ba4160635a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 10:57:27 -0400 Subject: [PATCH 69/91] clean up acc and query --- src/acc.c | 13 +++---- src/query.c | 103 ++++++++-------------------------------------------- src/rill.h | 14 ++----- 3 files changed, 24 insertions(+), 106 deletions(-) diff --git a/src/acc.c b/src/acc.c index 0a5f1f0..726bebf 100644 --- a/src/acc.c +++ b/src/acc.c @@ -205,18 +205,14 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) start = end - acc->head->len; } - struct rill_rows *rows = rill_rows_new(end - start); - if (!rows) { - rill_fail("unable to allocate rows for len '%lu'", acc->head->len); - goto fail_rows_alloc; - } + struct rill_rows rows = {0}; + if (!rill_rows_reserve(end - start)) goto fail_rows_reserve; for (size_t i = start; i < end; ++i) { size_t index = i % acc->head->len; struct row *row = &acc->data[index]; - struct rill_rows *ret = rill_rows_push(rows, row->key, row->val); - assert(ret == rows); + if (!rill_rows_push(rows, row->key, row->val)) goto fail_rows_push; } if (!rill_store_write(file, now, 0, rows)) { @@ -230,7 +226,8 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) return true; fail_write: + fail_rows_push: rill_rows_free(rows); - fail_rows_alloc: + fail_rows_reserve: return false; } diff --git a/src/query.c b/src/query.c index 9ccdd38..4cf8464 100644 --- a/src/query.c +++ b/src/query.c @@ -62,111 +62,38 @@ void rill_query_close(struct rill_query *query) free(query); } -struct rill_rows *rill_query_key( - const struct rill_query *query, rill_val_t key, struct rill_rows *out) +bool rill_query_key( + const struct rill_query *query, + enum rill_col col, + rill_val_t key, + struct rill_rows *out) { - if (!key) return out; + if (!key) return false; - struct rill_rows *result = out; for (size_t i = 0; i < query->len; ++i) { - result = rill_store_query_key(query->list[i], key, result); - if (!result) return NULL; + if (!rill_store_query(query->list[i], col, key, out)) + return false; } - rill_rows_compact(result); - return result; + rill_rows_compact(out); + return true; } struct rill_rows *rill_query_keys( const struct rill_query *query, + enum rill_col col, const rill_val_t *keys, size_t len, struct rill_rows *out) { - if (!len) return out; + if (!len) return true; - struct rill_rows *result = out; for (size_t i = 0; i < query->len; ++i) { for (size_t j = 0; i < len; ++j) { - result = rill_store_query_key(query->list[i], keys[j], result); - if (!result) return NULL; - } - } - - rill_rows_compact(result); - return result; -} - -static int compare_rill_values(const void *v1, const void *v2) { - const rill_val_t rv1 = *(rill_val_t*)v1; - const rill_val_t rv2 = *(rill_val_t*)v2; - - if (rv1 > rv2) return 1; - if (rv1 < rv2) return -1; - return 0; -} - -struct rill_rows *rill_query_vals( - const struct rill_query *query, - const rill_val_t *vals, size_t len, - struct rill_rows *out) -{ - if (!len) return out; - - rill_val_t *sorted = malloc(sizeof(vals[0]) * len); - if (!sorted) goto fail_alloc; - - memcpy(sorted, vals, sizeof(vals[0]) * len); - qsort(sorted, len, sizeof(vals[0]), compare_rill_values); - - struct rill_rows *result = out; - for (size_t i = 0; i < query->len; ++i) { - for (size_t j = 0; j < len; ++j) { - result = rill_store_query_value(query->list[i], sorted[j], result); - if (!result) goto fail_scan; - } - } - - rill_rows_compact(result); - free(sorted); - return result; - - fail_scan: - free(sorted); - fail_alloc: - // \todo potentially leaking result - return NULL; -} - -struct rill_rows *rill_query_all( - const struct rill_query *query, enum rill_col col) -{ - struct rill_rows *result = rill_rows_new(1); - for (size_t i = 0; i < query->len; ++i) { - size_t rows = rill_store_rows(query->list[i]); - result = rill_rows_reserve(result, result->len + rows); - if (!result) goto fail_scan; - - struct rill_store_it *it = rill_store_begin(query->list[i], col); - if (!it) goto fail_scan; - - struct rill_row row; - while (true) { - if (!rill_store_it_next(it, &row)) { - rill_store_it_free(it); - goto fail_scan; - } - if (rill_row_nil(&row)) break; - - result = rill_rows_push(result, row.key, row.val); + if (!rill_store_query_key(query->list[i], col, keys[j], out)) + return false; } - rill_store_it_free(it); } rill_rows_compact(result); - return result; - - fail_scan: - free(result); - return NULL; - + return true; } diff --git a/src/rill.h b/src/rill.h index 90f6360..3b1ff14 100644 --- a/src/rill.h +++ b/src/rill.h @@ -187,24 +187,18 @@ struct rill_query; struct rill_query * rill_query_open(const char *dir); void rill_query_close(struct rill_query *db); -struct rill_rows *rill_query_key( +bool rill_query_key( const struct rill_query *query, + enum rill_col col, rill_val_t key, struct rill_rows *out); -struct rill_rows *rill_query_keys( +bool rill_query_keys( const struct rill_query *query, + enum rill_col col, const rill_val_t *keys, size_t len, struct rill_rows *out); -struct rill_rows *rill_query_vals( - const struct rill_query *query, - const rill_val_t *vals, size_t len, - struct rill_rows *out); - -struct rill_rows *rill_query_all( - const struct rill_query *query, enum rill_col col); - // ----------------------------------------------------------------------------- // misc From a7e942948237c40a62dca224de9f0c6e5a792ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 12:03:34 -0400 Subject: [PATCH 70/91] Fix up coder tests --- src/rill.h | 1 + src/rows.c | 10 +++ src/vals.c | 12 +-- test/coder_test.c | 222 +++++++++++++++++----------------------------- test/test.h | 43 ++++++--- 5 files changed, 127 insertions(+), 161 deletions(-) diff --git a/src/rill.h b/src/rill.h index 3b1ff14..bdf234d 100644 --- a/src/rill.h +++ b/src/rill.h @@ -103,6 +103,7 @@ void rill_rows_clear(struct rill_rows *); void rill_rows_invert(struct rill_rows *); void rill_rows_compact(struct rill_rows *); +bool rill_rows_copy(const struct rill_rows *, struct rill_rows *out); void rill_rows_print(const struct rill_rows *); diff --git a/src/rows.c b/src/rows.c index 3d62988..9293420 100644 --- a/src/rows.c +++ b/src/rows.c @@ -99,6 +99,16 @@ void rill_rows_invert(struct rill_rows* rows) qsort(rows->data, rows->len, sizeof(*rows->data), &row_cmp); } +bool rill_rows_copy(const struct rill_rows *rows, struct rill_rows *out) +{ + if (!rill_rows_reserve(out, rows->len)) return false; + + memcpy(out->data, rows->data, rows->len * sizeof(rows->data[0])); + out->len = rows->len; + + return true; +} + void rill_rows_print(const struct rill_rows *rows) { const rill_val_t no_key = -1ULL; diff --git a/src/vals.c b/src/vals.c index 8f4f438..97c8878 100644 --- a/src/vals.c +++ b/src/vals.c @@ -15,7 +15,7 @@ struct rill_packed vals typedef struct htable vals_rev_t; -static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) +static size_t vals_vtoi(const vals_rev_t *rev, rill_val_t val) { if (!val) return 0; // \todo giant hack for coder_finish @@ -26,7 +26,7 @@ static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) // \todo should technically return bool for htable resize errors. Need to fix // htable interface. -static void vals_rev_make(struct vals *vals, vals_rev_t *rev) +static void vals_rev_make(const struct vals *vals, vals_rev_t *rev) { htable_reset(rev); htable_reserve(rev, vals->len); @@ -39,8 +39,8 @@ static void vals_rev_make(struct vals *vals, vals_rev_t *rev) static int val_cmp(const void *l, const void *r) { - rill_val_t lhs = *((rill_val_t *) l); - rill_val_t rhs = *((rill_val_t *) r); + rill_val_t lhs = *((const rill_val_t *) l); + rill_val_t rhs = *((const rill_val_t *) r); if (lhs < rhs) return -1; if (lhs > rhs) return 1; @@ -62,7 +62,7 @@ static void vals_compact(struct vals *vals) vals->len = j + 1; } -static struct vals *vals_for_col(struct rill_rows *rows, enum rill_col col) +static struct vals *vals_for_col(const struct rill_rows *rows, enum rill_col col) { struct vals *vals = calloc(1, sizeof(*vals) + sizeof(vals->data[0]) * rows->len); @@ -77,7 +77,7 @@ static struct vals *vals_for_col(struct rill_rows *rows, enum rill_col col) return vals; } -static struct vals *vals_add_index(struct vals *vals, struct index *index) +static struct vals *vals_add_index(struct vals *vals, const struct index *index) { assert(merge); diff --git a/test/coder_test.c b/test/coder_test.c index e4a39eb..db1061e 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -76,11 +76,11 @@ bool test_leb128(void) #define make_index(...) \ ({ \ - rill_val_t rows[] = { __VA_ARGS__ }; \ - size_t len = sizeof(rows) / sizeof(rows[0]); \ + rill_val_t vals[] = { __VA_ARGS__ }; \ + size_t len = sizeof(vals) / sizeof(vals[0]); \ struct index *index = index_alloc(len); \ for (size_t i = 0; i < len; ++i) \ - index_put(index, rows[i], 1); \ + index_put(index, vals[i], 1); \ index; \ }) @@ -95,9 +95,9 @@ static struct vals *make_vals_impl(rill_val_t *list, size_t len) return vals; } -static void check_vals(struct rill_rows *rows, struct vals *exp) +static void check_vals(struct rill_rows rows, struct vals *exp) { - struct vals *vals = vals_cols_from_rows(rows, rill_col_b); + struct vals *vals = vals_for_col(&rows, rill_col_b); assert(vals->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -113,13 +113,13 @@ static void check_vals(struct rill_rows *rows, struct vals *exp) free(vals); free(exp); - free(rows); + rill_rows_free(rows); htable_reset(&rev); } static void check_vals_merge(struct vals *a, struct index *b, struct vals *exp) { - struct vals *result = vals_merge_from_index(a, b); + struct vals *result = vals_add_index(a, b); assert(result->len == exp->len); for (size_t i = 0; i < exp->len; ++i) @@ -132,16 +132,16 @@ static void check_vals_merge(struct vals *a, struct index *b, struct vals *exp) bool test_vals(void) { - check_vals(make_pair(row(1, 10)), make_vals(10)); + check_vals(make_rows(row(1, 10)), make_vals(10)); - check_vals(make_pair(row(1, 10), row(1, 10)), make_vals(10)); - check_vals(make_pair(row(1, 10), row(2, 10)), make_vals(10)); + check_vals(make_rows(row(1, 10), row(1, 10)), make_vals(10)); + check_vals(make_rows(row(1, 10), row(2, 10)), make_vals(10)); - check_vals(make_pair(row(1, 10), row(1, 20)), make_vals(10, 20)); - check_vals(make_pair(row(1, 10), row(2, 20)), make_vals(10, 20)); + check_vals(make_rows(row(1, 10), row(1, 20)), make_vals(10, 20)); + check_vals(make_rows(row(1, 10), row(2, 20)), make_vals(10, 20)); - check_vals(make_pair(row(2, 20), row(1, 10)), make_vals(10, 20)); - check_vals(make_pair(row(1, 20), row(1, 10)), make_vals(10, 20)); + check_vals(make_rows(row(2, 20), row(1, 10)), make_vals(10, 20)); + check_vals(make_rows(row(1, 20), row(1, 10)), make_vals(10, 20)); check_vals_merge(make_vals(10), make_index(10), make_vals(10)); check_vals_merge(make_vals(10), make_index(20), make_vals(10, 20)); @@ -160,171 +160,107 @@ bool test_vals(void) // coder // ----------------------------------------------------------------------------- -void check_coder(struct rill_rows *rows) +static struct index *lookup_alloc(struct vals *vals) { - rill_rows_compact(rows); - - struct rill_rows *inverted = rill_rows_new(rows->len); - for (size_t i = 0; i < rows->len; ++i) - rill_rows_push(inverted, rows->data[i].val, rows->data[i].key); - rill_rows_compact(inverted); + struct index *lookup = calloc(1, index_cap(vals->len)); + lookup->len = vals->len; - struct vals *vals_a = vals_cols_from_rows(rows, rill_col_b); - struct vals *vals_b = vals_cols_from_rows(inverted, rill_col_b); + for (size_t i = 0; i < lookup->len; ++i) + lookup->data[i].key = vals->data[i]; - const size_t rows_a_cap = coder_cap(vals_a->len, rows->len); - const size_t rows_b_cap = coder_cap(vals_b->len, inverted->len); + return lookup; +} - size_t cap = rows_a_cap + rows_b_cap; - uint8_t *buffer = calloc(1, cap); - struct index *index_a = index_alloc(vals_b->len); - struct index *index_b = index_alloc(vals_a->len); +static void check_coder(struct rill_rows rows) +{ + rill_rows_compact(rows); - size_t len = 0, len_a = 0, len_b = 0; - { - struct encoder coder_a = - make_encoder(buffer, buffer + cap, vals_a, index_a); + struct vals *vals[2] = { + vals_for_col(&rows, rill_col_a), + vals_for_col(&rows, rill_col_b), + }; - for (size_t i = 0; i < rows->len; ++i) - assert(coder_encode(&coder_a, &rows->data[i])); - assert(coder_finish(&coder_a)); + struct index *index = index_alloc(vals[rill_col_a]->len); + struct index *lookup = lookup_alloc(vals[rill_col_b]); - len_a = len = coder_a.it - buffer; - assert(len <= rows_a_cap); + size_t cap = coder_cap(vals[rill_col_b]->len, rows.len); + uint8_t buffer = calloc(1, cap); - struct encoder coder_b = - make_encoder(buffer + len_a, buffer + cap, vals_b, index_b); - for (size_t i = 0; i < inverted->len; ++i) - assert(coder_encode(&coder_b, &inverted->data[i])); - assert(coder_finish(&coder_b)); + size_t len = 0; + { + struct encoder coder = + make_encoder(buffer, buffer + cap, vals[rill_col_b], index); - len_b = coder_b.it - coder_a.it; - assert(len_b <= rows_b_cap); + for (size_t i = 0; i < rows.len; ++i) + assert(coder_encode(&coder, &rows->data[i])); - len = coder_b.it - buffer; - coder_close(&coder_a); - coder_close(&coder_b); + assert(coder_finish(&coder)); + len = len = coder.it - buffer; } - if (false) { // hex dump for debuging - rill_rows_print(rows); - printf("buffer: start=%p, len=%lu(%lu, %lu)\n", (void *) buffer, len, len_a, len_b); - for (size_t i = 0; i < cap;) { - printf("%6p: ", (void *) i); - for (size_t j = 0; j < 16 && i < cap; ++i, ++j) { - if (j % 2 == 0) printf(" "); - printf("%02x", buffer[i]); - } - printf("\n"); - } + if (false) { + printf("input: "); rill_rows_print(rows); - printf("index_a: [ "); - for (size_t i = 0; i < index_a->len; ++i) { - struct index_row *row = &index_a->data[i]; - printf("{%p, %p} ", (void *) row->key, (void *) row->off); - } - printf("]\n"); + printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); + hexdump(buffer, cap); - printf("index_b: [ "); - for (size_t i = 0; i < index_b->len; ++i) { - struct index_row *row = &index_b->data[i]; + printf("index: [ "); + for (size_t i = 0; i < index->len; ++i) { + struct index_row *row = &index->data[i]; printf("{%p, %p} ", (void *) row->key, (void *) row->off); } printf("]\n"); - } - - { /* Coder A */ - uint8_t *start = buffer; - struct decoder coder = - make_decoder_at(start, - start + len_a, - index_b, index_a, 0); - struct rill_row row = {0}; - for (size_t i = 0; i < rows->len; ++i) { - assert(coder_decode(&coder, &row)); - assert(rill_row_cmp(&row, &rows->data[i]) == 0); + printf("lookup: [ "); + for (size_t i = 0; i < lookup->len; ++i) { + struct index_row *row = &lookup->data[i]; + printf("%p ", (void *) row->key); } - - assert(coder_decode(&coder, &row)); - assert(rill_row_nil(&row)); + printf("]\n"); } - { /* Coder B */ - uint8_t *start = buffer + len_a; + { struct decoder coder = - make_decoder_at(start, - start + len_b, - index_a, index_b, 0); + make_decoder_at(buffer, buffer + len, lookup, index, 0); - struct rill_row row = {0}; + struct rill_kv kv = {0}; for (size_t i = 0; i < rows->len; ++i) { - assert(coder_decode(&coder, &row)); - assert(rill_row_cmp(&row, &inverted->data[i]) == 0); + assert(coder_decode(&coder, &kv)); + assert(rill_kv_cmp(&kv, &rows->data[i]) == 0); } - assert(coder_decode(&coder, &row)); - assert(rill_row_nil(&row)); + assert(coder_decode(&coder, &kv)); + assert(rill_kv_nil(&kv)); } - { /* Decode A */ - for (size_t i = 0; i < rows->len; ++i) { - size_t key_idx = 0; - uint64_t off = 0; - - assert(index_find(index_a, rows->data[i].key, &key_idx, &off)); - - uint8_t *start = buffer; - struct decoder coder = make_decoder_at( - start + off, start + len_a, index_b, index_a, key_idx); - - struct rill_row row = {0}; - do { - assert(coder_decode(&coder, &row)); - assert(row.key == rows->data[i].key); - } while (row.val != rows->data[i].val); - } - } - - { /* Decode B */ - for (size_t i = 0; i < inverted->len; ++i) { - size_t key_idx = 0; - uint64_t off = 0; - - assert(index_find(index_b, inverted->data[i].key, &key_idx, &off)); - - uint8_t *start = buffer + len_a; - struct decoder coder = make_decoder_at( - start + off, start + len_b, - index_a, index_b, - key_idx); - - struct rill_row row = {0}; - do { - assert(coder_decode(&coder, &row)); - assert(row.key && row.val); - assert(row.key == inverted->data[i].key); - } while (row.val != inverted->data[i].val); - } + for (size_t i = 0; i < rows->len; ++i) { + size_t key_idx; uint64_t off; + assert(index_find(index, rows->data[i].key, &key_idx, &off)); + struct decoder coder = make_decoder_at( + buffer + off, buffer + len, vals, index, key_idx); + + struct rill_kv kv = {0}; + do { + assert(coder_decode(&coder, &kv)); + assert(kv.key == rows->data[i].key); + } while (kv.val != rows->data[i].val); } free(buffer); - free(index_a); - free(index_b); - free(vals_a); - free(vals_b); - free(rows); - free(inverted); + free(lookup); + free(index); + for (size_t col = 0; col < rill_cols; ++col) free(vals[col]); + rill_rows_free(&rows); } bool test_coder(void) { - check_coder(make_pair(row(1, 10))); - check_coder(make_pair(row(1, 10), row(1, 20))); - check_coder(make_pair(row(1, 10), row(2, 20))); - check_coder(make_pair(row(1, 10), row(1, 20), row(2, 30))); - check_coder(make_pair(row(1, 10), row(1, 20), row(2, 10))); + check_coder(make_rows(row(1, 10))); + check_coder(make_rows(row(1, 10), row(1, 20))); + check_coder(make_rows(row(1, 10), row(2, 20))); + check_coder(make_rows(row(1, 10), row(1, 20), row(2, 30))); + check_coder(make_rows(row(1, 10), row(1, 20), row(2, 10))); struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 100; ++iterations) diff --git a/test/test.h b/test/test.h index 10705bc..2babe66 100644 --- a/test/test.h +++ b/test/test.h @@ -29,36 +29,38 @@ struct rill_row row(rill_val_t key, rill_val_t val) return (struct rill_row) { .key = key, .val = val }; } -#define make_pair(...) \ +#define make_rows(...) \ ({ \ struct rill_row rows[] = { __VA_ARGS__ }; \ - make_pair_impl(rows, sizeof(rows) / sizeof(rows[0])); \ + make_row_impl(rows, sizeof(rows) / sizeof(rows[0])); \ }) -struct rill_rows *make_pair_impl(const struct rill_row *row, size_t len) +struct rill_rows make_rows_impl(const struct rill_row *rows, size_t len) { - struct rill_rows *rows = rill_rows_new(len); + struct rill_rows result = {0}; + assert(rill_rows_reserve(&result, len)); + for (size_t i = 0; i < len; ++i) - rows = rill_rows_push(rows, row[i].key, row[i].val); - return rows; + assert(rill_rows_push(result, rows[i].key, rows[i].val)); + + return result; } enum { rng_range_key = 500, rng_range_val = 100 }; -struct rill_rows *make_rng_rows(struct rng *rng) +struct rill_rows make_rng_rows(struct rng *rng) { enum { len = 1000 }; - struct rill_rows *rows = rill_rows_new(len); + struct rill_rows rows = {0}; + rill_rows_reserve(&rows, len); for (size_t i = 0; i < len; ++i) { uint64_t key = rng_gen_range(rng, 1, rng_range_key); uint64_t val = rng_gen_range(rng, 1, rng_range_val); - rows = rill_rows_push(rows, key, val); - assert(rows); + rill_rows_push(&rows, key, val); } - rill_rows_compact(rows); - + rill_rows_compact(&rows); return rows; } @@ -85,3 +87,20 @@ void rm(const char *path) closedir(dir); rmdir(path); } + + +// ----------------------------------------------------------------------------- +// hexdump +// ----------------------------------------------------------------------------- + +void hexdump(const void *buffer, size_t len) +{ + for (size_t i = 0; i < len;) { + printf("%6p: ", (void *) i); + for (size_t j = 0; j < 16 && i < len; ++i, ++j) { + if (j % 2 == 0) printf(" "); + printf("%02x", buffer[i]); + } + printf("\n"); + } +} From 6bb5bcf2868f7ff96463ee0ecac11fd29bafb264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:35:02 -0400 Subject: [PATCH 71/91] fix rotate test --- test/rotate_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/rotate_test.c b/test/rotate_test.c index 2cee814..f2b7a97 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -43,7 +43,8 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_rows *rows = rill_query_keys(query, &key, 1, rill_rows_new(1)); + struct rill_rows rows = {0}; + assert(rill_query_key(query, rill_col_a, key, &rows)); rill_query_close(query); size_t i = 0; @@ -66,7 +67,8 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); - struct rill_rows *rows = rill_query_keys(query, &key, 1, rill_rows_new(1)); + struct rill_rows rows = {0}; + assert(rill_query_keys(query, rill_col_a, key, 1, &rows)); rill_query_close(query); for (size_t i = 0; i < rows->len; ++i) { From ec5793463da519a0b6c303bb79c1a77827be8b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:35:09 -0400 Subject: [PATCH 72/91] fix store test --- test/store_test.c | 286 +++++++++++++--------------------------------- test/test.h | 2 +- 2 files changed, 82 insertions(+), 206 deletions(-) diff --git a/test/store_test.c b/test/store_test.c index 42e8b7b..2acef9f 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -21,81 +21,45 @@ static struct rill_store *make_store(const char *name, struct rill_rows *rows) return store; } -struct list { size_t len; uint64_t data[]; }; - -#define make_list(...) \ - ({ \ - uint64_t list[] = { __VA_ARGS__ }; \ - make_list_impl(list, sizeof(list) / sizeof(list[0])); \ - }) - -static struct list *make_list_impl(uint64_t *data, size_t len) -{ - struct list *list = calloc(1, sizeof(struct list) + sizeof(data[0]) * len); - - list->len = len; - memcpy(list->data, data, sizeof(data[0]) * len); - - return list; -} - -static struct list *make_rng_list(struct rng *rng, uint64_t max) -{ - struct list *list = calloc(1, sizeof(struct list) + sizeof(list->data[0]) * max); - - for (uint64_t val = 0; val < max; ++val) { - if (rng_gen(rng) > rng_max() / 2) continue; - - list->data[list->len] = val; - list->len++; - } - - return list; -} - // ----------------------------------------------------------------------------- -// query_key +// query // ----------------------------------------------------------------------------- -static struct rill_rows* duplicate_rows(struct rill_rows* rows) +static void check_query(struct rill_rows rows) { - struct rill_rows* copy = rill_rows_new(rows->len); - for (size_t i = 0; i < rows->len; ++i) - rill_rows_push(copy, rows->data[i].key, rows->data[i].val); - return copy; -} + struct rill_rows expected = {0}; + rill_rows_copy(&rows, &expected); + rill_rows_compact(&expected); -static void check_query_key(struct rill_rows *rows) -{ - struct rill_rows *expected = duplicate_rows(rows); - struct rill_store *store = make_store("test.store.query_key", rows); - struct rill_rows *result = rill_rows_new(128); + struct rill_store *store = make_store("test.store.query", &rows); + struct rill_rows result = {0}; - rill_rows_compact(rows); - rill_rows_compact(expected); + for (size_t col = 0; col < rill_cols; ++col) { + for (size_t i = 0; i < expected.len;) { + rill_rows_clear(&result); + assert(rill_store_query(store, col, expected.data[i].key, &result)); - for (size_t i = 0; i < expected->len;) { - rill_rows_clear(result); - result = rill_store_query_key(store, expected->data[i].key, result); + assert(expected.len - i >= result.len); + for (size_t j = 0; j < result.len; ++j, ++i) + assert(!rill_row_cmp(&expected.data[i], &result.data[j])); + } - assert(expected->len - i >= result->len); - for (size_t j = 0; j < result->len; ++j, ++i) - assert(!rill_row_cmp(&expected->data[i], &result->data[j])); + rill_rows_invert(&expected); // setup for next iteration. } - free(result); rill_store_close(store); rill_rows_free(rows); rill_rows_free(expected); + rill_rows_free(result); } -bool test_query_key(void) +bool test_query(void) { - check_query_key(make_pair(row(1, 10))); - check_query_key(make_pair(row(1, 10), row(2, 20))); - check_query_key(make_pair(row(1, 10), row(1, 20), row(2, 20))); - check_query_key(make_pair(row(1, 10), row(1, 20), row(1, 20), row(1, 30))); + check_query(make_rows(row(1, 10))); + check_query(make_rows(row(1, 10), row(2, 20))); + check_query(make_rows(row(1, 10), row(1, 20), row(2, 20))); + check_query(make_rows(row(1, 10), row(1, 20), row(1, 20), row(1, 30))); struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 10; ++iterations) @@ -106,179 +70,91 @@ bool test_query_key(void) // ----------------------------------------------------------------------------- -// scan_keys +// vals // ----------------------------------------------------------------------------- -static void check_scan_keys( - struct rill_store *store, struct rill_rows *rows, struct list *keys) +static void check_vals(struct rill_rows rows) { - struct rill_rows *result = rill_rows_new(128); - rill_rows_compact(rows); + struct vals *exp[2] = { + vals_for_col(&rows, rill_col_a), + vals_for_col(&rows, rill_col_b), + }; - for (size_t i = 0; i < keys->len; ++i) - result = rill_store_query_key(store, keys->data[i], result); + struct rill_store *store = make_store("test.store.vals", &rows); - struct rill_rows *exp = rill_rows_new(128); - for (size_t i = 0; i < rows->len; ++i) { - for (size_t j = 0; j < keys->len; ++j) { - struct rill_row *row = &rows->data[i]; - if (row->key == keys->data[j]) exp = rill_rows_push(exp, row->key, row->val); - } - } + for (size_t col = 0; col < rill_cols; ++col) { + size_t len = rill_store_vals_count(store, col); + rill_val_t vals[len] = {0}; - assert(exp->len == result->len); - for (size_t i = 0; i < exp->len; ++i) - assert(!rill_row_cmp(&exp->data[i], &result->data[i])); + assert(rill_store_vals(store, col, vals, len) == len); - free(exp); - free(result); - free(keys); -} + for (size_t i = 0; i < len; ++i) + assert(vals[i] == exp[col]->data[i]); -bool test_scan_keys(void) -{ - static const char *name = "test.store.scan_keys"; - - { - struct rill_rows *rows = make_pair(row(2, 10)); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); - - check_scan_keys(store, copy, make_list(1)); - check_scan_keys(store, copy, make_list(2)); - check_scan_keys(store, copy, make_list(3)); - check_scan_keys(store, copy, make_list(1, 2)); - check_scan_keys(store, copy, make_list(2, 3)); - check_scan_keys(store, copy, make_list(1, 3)); - - rill_store_close(store); - free(copy); - free(rows); + free(exp[col]; } +} +bool test_vals(void) +{ + check_vals(make_rows(row(1, 10))); + check_vals(make_rows(row(1, 10), row(1, 20))); + check_vals(make_rows(row(1, 10), row(2, 10))); + check_vals(make_rows(row(1, 10), row(1, 20), row(2, 10), row(2, 20))); + check_vals(make_rows(row(1, 10), row(1, 20), row(2, 20), row(3, 30))); - { - struct rill_rows *rows = make_pair(row(2, 10), row(3, 10), row(3, 20), row(4, 30)); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); - - check_scan_keys(store, copy, make_list(1)); - check_scan_keys(store, copy, make_list(3)); - check_scan_keys(store, copy, make_list(5)); - check_scan_keys(store, copy, make_list(1, 3)); - check_scan_keys(store, copy, make_list(3, 5)); - check_scan_keys(store, copy, make_list(2, 3)); - check_scan_keys(store, copy, make_list(2, 3, 4)); - - rill_store_close(store); - free(copy); - free(rows); - } - - { - struct rng rng = rng_make(0); - struct rill_rows *rows = make_rng_rows(&rng); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); - - for (size_t iterations = 0; iterations < 10; ++iterations) - check_scan_keys(store, copy, make_rng_list(&rng, rng_range_key)); - - rill_store_close(store); - free(rows); - free(copy); - } + struct rng rng = rng_make(0); + for (size_t iterations = 0; iterations < 10; ++iterations) + check_query_key(make_rng_rows(&rng)); return true; } // ----------------------------------------------------------------------------- -// scan_vals +// it // ----------------------------------------------------------------------------- -static void check_scan_vals( - struct rill_store *store, struct rill_rows *rows, struct list *vals) +static void check_it(struct rill_rows rows) { - struct rill_rows *result = rill_rows_new(128); - rill_rows_compact(rows); - - for (size_t i = 0; i < vals->len; ++i) - result = rill_store_query_value(store, vals->data[i], result); - - struct rill_rows *exp = rill_rows_new(128); - for (size_t i = 0; i < rows->len; ++i) { - for (size_t j = 0; j < vals->len; ++j) { - struct rill_row *row = &rows->data[i]; - if (row->val == vals->data[j]) - exp = rill_rows_push(exp, row->val, row->key); - } - } + struct rill_rows expected = {0}; + rill_rows_copy(&rows, &expected); + rill_rows_compact(&expected); - rill_rows_compact(exp); + struct rill_store *store = make_store("test.store.query", &rows); - assert(exp->len == result->len); - for (size_t i = 0; i < exp->len; ++i) - assert(!rill_row_cmp(&exp->data[i], &result->data[i])); + for (size_t col = 0; col < rill_cols; ++col) { + struct rill_store_it *it = rill_store_begin(store, col); - free(exp); - free(result); - free(vals); -} + struct rill_row row = {0}; + for (size_t i = 0; i < expected.len; ++i) { + assert(rill_store_it_next(it, &row)); + assert(!rill_row_cmp(&expected.data[i], &row)); + } -bool test_scan_vals(void) -{ - static const char *name = "test.store.scan_vals"; - - { - struct rill_rows *rows = make_pair(row(2, 20)); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); - - check_scan_vals(store, copy, make_list(10)); - check_scan_vals(store, copy, make_list(20)); - check_scan_vals(store, copy, make_list(30)); - check_scan_vals(store, copy, make_list(10, 20)); - check_scan_vals(store, copy, make_list(20, 30)); - check_scan_vals(store, copy, make_list(10, 30)); - - rill_store_close(store); - free(rows); - free(copy); - } + assert(rill_store_it_next(it, &row)); + assert(rill_row_nil(&row)); + + rill_store_it_free(it); - { - struct rill_rows *rows = make_pair(row(2, 20), row(3, 20), row(3, 30), row(4, 40)); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); - - check_scan_vals(store, copy, make_list(10)); - check_scan_vals(store, copy, make_list(20)); - check_scan_vals(store, copy, make_list(30)); - check_scan_vals(store, copy, make_list(50)); - check_scan_vals(store, copy, make_list(10, 20)); - check_scan_vals(store, copy, make_list(20, 40)); - check_scan_vals(store, copy, make_list(20, 50)); - check_scan_vals(store, copy, make_list(20, 30, 40)); - - rill_store_close(store); - free(rows); - free(copy); + rill_rows_invert(&expected); // setup for next iteration. } - { - struct rng rng = rng_make(0); - struct rill_rows *rows = make_rng_rows(&rng); - struct rill_rows *copy = duplicate_rows(rows); - struct rill_store *store = make_store(name, rows); + rill_store_close(store); + rill_rows_free(rows); + rill_rows_free(expected); +} - for (size_t iterations = 0; iterations < 10; ++iterations) - check_scan_vals(store, copy, make_rng_list(&rng, rng_range_val)); +bool test_it(void) +{ + check_it(make_rows(row(1, 10))); + check_it(make_rows(row(1, 10), row(2, 20))); + check_it(make_rows(row(1, 10), row(1, 20), row(2, 20))); + check_it(make_rows(row(1, 10), row(1, 20), row(1, 20), row(1, 30))); - rill_store_close(store); - free(rows); - free(copy); - } + struct rng rng = rng_make(0); + for (size_t iterations = 0; iterations < 10; ++iterations) + check_query_key(make_rng_rows(&rng)); return true; } @@ -293,9 +169,9 @@ int main(int argc, char **argv) (void) argc, (void) argv; bool ret = true; - ret = ret && test_query_key(); - ret = ret && test_scan_keys(); - ret = ret && test_scan_vals(); + ret = ret && test_query(); + ret = ret && test_vals(); + ret = ret && test_it(); return ret ? 0 : 1; } diff --git a/test/test.h b/test/test.h index 2babe66..aca2b03 100644 --- a/test/test.h +++ b/test/test.h @@ -46,7 +46,7 @@ struct rill_rows make_rows_impl(const struct rill_row *rows, size_t len) return result; } -enum { rng_range_key = 500, rng_range_val = 100 }; +enum { rng_range_key = 250, rng_range_val = 100 }; struct rill_rows make_rng_rows(struct rng *rng) { From 47be1e9b7f05f71f135b3e3772f840a4d22e2606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:36:15 -0400 Subject: [PATCH 73/91] remove query test largely made redundant by the store tests and did not actually test the query interface. --- test/query_test.c | 69 ----------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 test/query_test.c diff --git a/test/query_test.c b/test/query_test.c deleted file mode 100644 index 33b0101..0000000 --- a/test/query_test.c +++ /dev/null @@ -1,69 +0,0 @@ -#include "test.h" - -bool test_sequence() -{ - const char* name = "test.query.sequence.rill"; - unlink(name); - - const size_t max_keys = 1000; - const size_t max_values = 100; - struct rill_rows* rows = rill_rows_new(1024); - - for (size_t i = 1; i < max_keys + 1; ++i) - for (size_t j = 1; j < max_values + 1; ++j) - rows = rill_rows_push(rows, i, j); - - rill_store_write(name, 666, 666, rows); - rill_rows_free(rows); - - struct rill_store* store = rill_store_open(name); - - { - struct rill_rows* result = rill_rows_new(256); - - for (size_t i = 1; i < max_keys + 1; ++i) { - result = rill_store_query_key(store, i, result); - rill_rows_compact(result); - - assert(result->len == max_values); - for (size_t x = 0; x < max_values; ++x) - assert(result->data[x].key == i && - result->data[x].val == x + 1); - - rill_rows_clear(result); - } - - rill_rows_free(result); - } - - { - struct rill_rows* result = rill_rows_new(256); - - for (size_t i = 1; i < max_values + 1; ++i) { - result = rill_store_query_value(store, i, result); - rill_rows_compact(result); - - assert(result->len == max_keys); - for (size_t x = 0; x < max_keys; ++x) - assert(result->data[x].key == i && - result->data[x].val == x + 1); - - rill_rows_clear(result); - } - - rill_rows_free(result); - } - - rill_store_close(store); - - unlink(name); - - unlink(name); - return true; -} - -int main(int argc, char **argv) -{ - (void) argc, (void) argv; - return test_sequence() ? 0 : 1; -} From f7b1ea44ecc090ad9958013cbbd0347f374291c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:37:13 -0400 Subject: [PATCH 74/91] Move rill_generate to src folder --- {test => src}/rill_generate.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {test => src}/rill_generate.c (100%) diff --git a/test/rill_generate.c b/src/rill_generate.c similarity index 100% rename from test/rill_generate.c rename to src/rill_generate.c From 519dc8fa44e57a6ac64551e4dd16d37080ee2e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:54:06 -0400 Subject: [PATCH 75/91] rename indexer_test --- test/{indexer_test.c => index_test.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{indexer_test.c => index_test.c} (100%) diff --git a/test/indexer_test.c b/test/index_test.c similarity index 100% rename from test/indexer_test.c rename to test/index_test.c From f9fd5566f72a66b15cc800aaec07fc70d9390924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 13:54:14 -0400 Subject: [PATCH 76/91] Fix compilation errors --- compile.sh | 7 ++----- src/acc.c | 20 +++++++++---------- src/coder.c | 20 +++++++++---------- src/query.c | 6 +++--- src/rill.h | 6 +++--- src/rows.c | 19 +++++++----------- src/store.c | 57 +++++++++++++++++++++++++++-------------------------- src/vals.c | 4 ++-- 8 files changed, 66 insertions(+), 73 deletions(-) diff --git a/compile.sh b/compile.sh index ef72129..4ae8254 100755 --- a/compile.sh +++ b/compile.sh @@ -42,13 +42,12 @@ $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS $CC -o rill_count "${PREFIX}/src/rill_count.c" librill.a $CFLAGS +$CC -o rill_count "${PREFIX}/src/rill_generate.c" librill.a $CFLAGS -$CC -o rill_generate "${PREFIX}/test/rill_generate.c" librill.a $CFLAGS -$CC -o test_indexer "${PREFIX}/test/indexer_test.c" librill.a $CFLAGS && ./test_indexer +$CC -o test_indexer "${PREFIX}/test/index_test.c" librill.a $CFLAGS && ./test_index $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder $CC -o test_store "${PREFIX}/test/store_test.c" librill.a $CFLAGS && ./test_store $CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS -$CC -o test_query "${PREFIX}/test/query_test.c" librill.a $CFLAGS && ./test_query if [ -n "$LEAKCHECK_ENABLED" ] then @@ -58,6 +57,4 @@ then $LEAKCHECK $LEAKCHECK_ARGS ./test_coder echo test_store ========================================= $LEAKCHECK $LEAKCHECK_ARGS ./test_store - echo test_query ========================================= - $LEAKCHECK $LEAKCHECK_ARGS ./test_query fi diff --git a/src/acc.c b/src/acc.c index 726bebf..eff7527 100644 --- a/src/acc.c +++ b/src/acc.c @@ -40,7 +40,7 @@ struct rill_packed header struct rill_packed row { - uint64_t key, val; + uint64_t a, b; }; struct rill_acc @@ -178,16 +178,16 @@ void rill_acc_close(struct rill_acc *acc) free(acc); } -void rill_acc_ingest(struct rill_acc *acc, rill_val_t key, rill_val_t val) +void rill_acc_ingest(struct rill_acc *acc, rill_val_t a, rill_val_t b) { - assert(key && val); + assert(a && b); size_t write = atomic_load_explicit(&acc->head->write, memory_order_relaxed); size_t index = write % acc->head->len; struct row *row = &acc->data[index]; - row->key = key; - row->val = val; + row->a = a; + row->b = b; atomic_store_explicit(&acc->head->write, write + 1, memory_order_release); } @@ -206,28 +206,28 @@ bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now) } struct rill_rows rows = {0}; - if (!rill_rows_reserve(end - start)) goto fail_rows_reserve; + if (!rill_rows_reserve(&rows, end - start)) goto fail_rows_reserve; for (size_t i = start; i < end; ++i) { size_t index = i % acc->head->len; struct row *row = &acc->data[index]; - if (!rill_rows_push(rows, row->key, row->val)) goto fail_rows_push; + if (!rill_rows_push(&rows, row->a, row->b)) goto fail_rows_push; } - if (!rill_store_write(file, now, 0, rows)) { + if (!rill_store_write(file, now, 0, &rows)) { rill_fail("unable to write acc file '%s'", file); goto fail_write; } atomic_store_explicit(&acc->head->read, end, memory_order_release); - rill_rows_free(rows); + rill_rows_free(&rows); return true; fail_write: fail_rows_push: - rill_rows_free(rows); + rill_rows_free(&rows); fail_rows_reserve: return false; } diff --git a/src/coder.c b/src/coder.c index 9a9fa6d..d6176ae 100644 --- a/src/coder.c +++ b/src/coder.c @@ -116,17 +116,17 @@ static inline bool coder_write_val(struct encoder *coder, rill_val_t val) static bool coder_encode(struct encoder *coder, const struct rill_row *row) { - if (coder->key != row->key) { + if (coder->key != row->a) { if (rill_likely(coder->key)) { if (!coder_write_sep(coder)) return false; } - index_put(coder->index, row->key, coder_off(coder)); - coder->key = row->key; + index_put(coder->index, row->a, coder_off(coder)); + coder->key = row->a; coder->keys++; } - if (!coder_write_val(coder, row->val)) return false; + if (!coder_write_val(coder, row->b)) return false; coder->rows++; return true; @@ -192,18 +192,18 @@ static inline bool coder_read_val(struct decoder *coder, rill_val_t *val) static bool coder_decode(struct decoder *coder, struct rill_row *row) { if (rill_likely(coder->key)) { - row->key = coder->key; - if (!coder_read_val(coder, &row->val)) return false; - if (row->val) return true; + row->a = coder->key; + if (!coder_read_val(coder, &row->b)) return false; + if (row->b) return true; } coder->key = index_get(coder->index, coder->keys); coder->keys++; - row->key = coder->key; - if (!row->key) return true; // eof + row->a = coder->key; + if (!row->a) return true; // eof - return coder_read_val(coder, &row->val); + return coder_read_val(coder, &row->b); } static struct decoder make_decoder_at( diff --git a/src/query.c b/src/query.c index 4cf8464..7470c1d 100644 --- a/src/query.c +++ b/src/query.c @@ -79,7 +79,7 @@ bool rill_query_key( return true; } -struct rill_rows *rill_query_keys( +bool rill_query_keys( const struct rill_query *query, enum rill_col col, const rill_val_t *keys, size_t len, @@ -89,11 +89,11 @@ struct rill_rows *rill_query_keys( for (size_t i = 0; i < query->len; ++i) { for (size_t j = 0; i < len; ++j) { - if (!rill_store_query_key(query->list[i], col, keys[j], out)) + if (!rill_store_query(query->list[i], col, keys[j], out)) return false; } } - rill_rows_compact(result); + rill_rows_compact(out); return true; } diff --git a/src/rill.h b/src/rill.h index bdf234d..d5427b4 100644 --- a/src/rill.h +++ b/src/rill.h @@ -140,10 +140,10 @@ size_t rill_store_vals( const struct rill_store *, enum rill_col, rill_val_t *out, size_t len); size_t rill_store_vals_count(const struct rill_store *, enum rill_col); -ssize_t rill_store_query( +bool rill_store_query( const struct rill_store *, enum rill_col, rill_val_t, struct rill_rows *out); -struct rill_store_it *rill_store_begin(const const struct rill_store *, enum rill_col); +struct rill_store_it *rill_store_begin(const struct rill_store *, enum rill_col); void rill_store_it_free(struct rill_store_it *); bool rill_store_it_next(struct rill_store_it *, struct rill_row *out); @@ -168,7 +168,7 @@ enum { rill_acc_read_only = 0 }; struct rill_acc *rill_acc_open(const char *dir, size_t cap); void rill_acc_close(struct rill_acc *acc); -void rill_acc_ingest(struct rill_acc *acc, rill_val_t key, rill_val_t val); +void rill_acc_ingest(struct rill_acc *acc, rill_val_t a, rill_val_t b); bool rill_acc_write(struct rill_acc *acc, const char *file, rill_ts_t now); diff --git a/src/rows.c b/src/rows.c index 9293420..2a2bdb1 100644 --- a/src/rows.c +++ b/src/rows.c @@ -23,11 +23,6 @@ extern inline int rill_row_cmp(const struct rill_row *, const struct rill_row *) // rows // ----------------------------------------------------------------------------- -static size_t adjust_cap(size_t cap, size_t len) -{ - return cap; -} - void rill_rows_free(struct rill_rows *rows) { free(rows->data); @@ -60,7 +55,7 @@ bool rill_rows_push(struct rill_rows *rows, rill_val_t a, rill_val_t b) assert(a && b); if (!rill_rows_reserve(rows, rows->len + 1)) return false; - rows->data[rows->len] = (struct rill_row) { .key = key, .val = val }; + rows->data[rows->len] = (struct rill_row) { .a = a, .b = b }; rows->len++; return rows; @@ -111,19 +106,19 @@ bool rill_rows_copy(const struct rill_rows *rows, struct rill_rows *out) void rill_rows_print(const struct rill_rows *rows) { - const rill_val_t no_key = -1ULL; - rill_val_t key = no_key; + const rill_val_t nil = -1ULL; + rill_val_t key = nil; printf("rows(%lu, %lu):\n", rows->len, rows->cap); for (size_t i = 0; i < rows->len; ++i) { const struct rill_row *row = &rows->data[i]; - if (row->key == key) printf(", %p", row->b); + if (row->a == key) printf(", %p", (void *) row->b); else { - if (key != no_key) printf("]\n"); - printf(" %p: [ %p", (void *) row->a, row->b); - key = row->key; + if (key != nil) printf("]\n"); + printf(" %p: [ %p", (void *) row->a, (void *) row->b); + key = row->a; } } diff --git a/src/store.c b/src/store.c index 4939275..a41d7cd 100644 --- a/src/store.c +++ b/src/store.c @@ -90,6 +90,8 @@ static struct encoder store_encoder( enum rill_col col, struct vals *vals[rill_cols]) { + enum rill_col other_col = rill_col_flip(col); + size_t start = store->head->data_off[col]; size_t end = col == rill_col_a ? store->head->data_off[other_col] : store->vma_len; @@ -102,7 +104,7 @@ static struct encoder store_encoder( } static struct decoder store_decoder_at( - struct rill_store *store, + const struct rill_store *store, enum rill_col col, size_t key_idx, uint64_t off) @@ -123,7 +125,8 @@ static struct decoder store_decoder_at( key_idx); } -static struct decoder store_decoder(struct rill_store *store, enum rill_col col) +static struct decoder store_decoder( + const struct rill_store *store, enum rill_col col) { return store_decoder_at(store, 0, 0, col); } @@ -328,7 +331,7 @@ static void writer_close( static void writer_offsets_init( struct rill_store *store, struct vals *vals[rill_cols]) { - off = sizeof(struct header); + uint64_t off = sizeof(struct header); store->head->index_off[rill_col_a] = off; store->index[rill_col_a] = store_ptr(store, off); @@ -360,14 +363,12 @@ bool rill_store_write( struct vals *vals[rill_cols] = {0}; for (size_t col = 0; col < rill_cols; ++col) { - vals[i] = vals_for_col(rows, i); - if (!vals[i]) goto fail_vals; + vals[col] = vals_for_col(rows, col); + if (!vals[col]) goto fail_vals; } struct rill_store store = {0}; - if (!writer_open(&store, file, vals[rill_col_a], vals[rill_col_b], - rows->len, ts, quant)) - { + if (!writer_open(&store, file, vals, rows->len, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } @@ -482,8 +483,8 @@ bool rill_store_merge( if (!list[i]) continue; for (size_t col = 0; i < rill_cols; ++i) { - struct vals *ret = val_add_index(vals[col], list[i]->index[col]); - if (!vals) goto fail_vals; + struct vals *ret = vals_add_index(vals[col], list[i]->index[col]); + if (!ret) goto fail_vals; vals[col] = ret; } @@ -491,9 +492,7 @@ bool rill_store_merge( } struct rill_store store = {0}; - if (!writer_open(&store, file, vals[rill_col_a], vals[rill_col_b], - rows, ts, quant)) - { + if (!writer_open(&store, file, vals, rows, ts, quant)) { rill_fail("unable to create '%s'", file); goto fail_open; } @@ -511,7 +510,7 @@ bool rill_store_merge( if (!coder_finish(&encoder_b)) goto fail_coder_b; store.head->rows = encoder_a.rows; - writer_close(&store, store.head->data_b_off + coder_off(&encoder_b)); + writer_close(&store, store.head->data_off[rill_col_b] + coder_off(&encoder_b)); coder_close(&encoder_a); coder_close(&encoder_b); @@ -586,7 +585,7 @@ size_t rill_store_vals( } -ssize_t rill_store_query( +bool rill_store_query( const struct rill_store *store, enum rill_col col, rill_val_t key, @@ -594,22 +593,20 @@ ssize_t rill_store_query( { uint64_t off = 0; size_t key_idx = 0; - if (!index_find(store->index[col], key, &key_idx, &off)) return 0; + if (!index_find(store->index[col], key, &key_idx, &off)) return true; struct rill_row row = {0}; struct decoder coder = store_decoder_at(store, col, key_idx, off); - ssize_t count = 0; while (true) { - if (!coder_decode(&coder, &row)) return -1; + if (!coder_decode(&coder, &row)) return false; if (rill_row_nil(&row)) break; - assert(row.key == key); + assert(row.a == key); - if (!rill_rows_push(result, row.key, row.val)) return -1; - count++; + if (!rill_rows_push(out, row.a, row.b)) return false; } - return count; + return true; } @@ -649,11 +646,15 @@ void rill_store_space( { *out = (struct rill_store_stats) { .header_bytes = sizeof(*store->head), - .index_bytes[rill_col_a] = store->head->index_b_off - store->head->index_a_off, - .index_bytes[rill_col_b] = store->head->data_a_off - store->head->index_b_off, - .rows_bytes[rill_col_a] = store->head->data_b_off - store->head->data_a_off, - .rows_bytes[rill_col_b] = store->vma_len - store->head->data_b_off, + + .index_bytes[rill_col_a] = store->head->index_off[rill_col_b] - + store->head->index_off[rill_col_a], + .index_bytes[rill_col_b] = store->head->data_off[rill_col_a] - + store->head->index_off[rill_col_b], + + .rows_bytes[rill_col_a] = store->head->data_off[rill_col_b] - + store->head->data_off[rill_col_a], + .rows_bytes[rill_col_b] = store->vma_len - + store->head->data_off[rill_col_b], }; - - return ret; } diff --git a/src/vals.c b/src/vals.c index 97c8878..c7c64f8 100644 --- a/src/vals.c +++ b/src/vals.c @@ -15,7 +15,7 @@ struct rill_packed vals typedef struct htable vals_rev_t; -static size_t vals_vtoi(const vals_rev_t *rev, rill_val_t val) +static size_t vals_vtoi(vals_rev_t *rev, rill_val_t val) { if (!val) return 0; // \todo giant hack for coder_finish @@ -79,7 +79,7 @@ static struct vals *vals_for_col(const struct rill_rows *rows, enum rill_col col static struct vals *vals_add_index(struct vals *vals, const struct index *index) { - assert(merge); + assert(index); if (!vals) { vals = calloc(1, sizeof(*vals) + index->len * sizeof(vals->data[0])); From a1757d662ce3be0866b8a9085c635d1640c52349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:21:04 -0400 Subject: [PATCH 77/91] fix rill_dump --- src/rill.h | 2 +- src/rill_dump.c | 151 +++++++++++++++++++++++++----------------------- src/store.c | 4 +- 3 files changed, 82 insertions(+), 75 deletions(-) diff --git a/src/rill.h b/src/rill.h index d5427b4..dae5b7c 100644 --- a/src/rill.h +++ b/src/rill.h @@ -154,7 +154,7 @@ struct rill_store_stats size_t rows_bytes[2]; }; -bool rill_store_stats(const struct rill_store *, struct rill_store_stats *); +void rill_store_stats(const struct rill_store *, struct rill_store_stats *); // ----------------------------------------------------------------------------- diff --git a/src/rill_dump.c b/src/rill_dump.c index a02ee25..bdb43c4 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -10,105 +10,112 @@ #include #include -void usage() + +// ----------------------------------------------------------------------------- +// dump +// ----------------------------------------------------------------------------- + +static void dump_headers(struct rill_store *store) +{ + printf("file: %s\n", rill_store_file(store)); + printf("version: %u\n", rill_store_version(store)); + printf("ts: %lu\n", rill_store_ts(store)); + printf("quant: %lu\n", rill_store_quant(store)); + printf("rows: %lu\n", rill_store_rows(store)); + printf("vals[a]: %zu\n", rill_store_vals_count(store, rill_col_a)); + printf("vals[b]: %zu\n", rill_store_vals_count(store, rill_col_b)); +} + +static void dump_stats(struct rill_store *store) +{ + struct rill_store_stats stats = {0}; + rill_store_stats(store, &stats); + + printf("file: %s\n", rill_store_file(store)); + printf("header: %zu\n", stats.header_bytes); + printf("index[a]: %zu\n", stats.index_bytes[rill_col_a]); + printf("index[b]: %zu\n", stats.index_bytes[rill_col_b]); + printf("rows[a]: %zu\n", stats.rows_bytes[rill_col_a]); + printf("rows[b]: %zu\n", stats.rows_bytes[rill_col_b]); +} + +static void dump_vals(struct rill_store *store, enum rill_col col) +{ + const size_t vals_len = rill_store_vals_count(store, col); + rill_val_t *vals = calloc(vals_len, sizeof(*vals)); + + (void) rill_store_vals(store, col, vals, vals_len); + + for (size_t i = 0; i < vals_len; ++i) + printf("0x%lx\n", vals[i]); + + free(vals); +} + +static void dump_rows(struct rill_store *store, enum rill_col col) { - fprintf(stderr, "rill_dump [-h] [-k] [-p] [-m] - \n"); + struct rill_store_it *it = rill_store_begin(store, col); + struct rill_row row = {0}; + + while (rill_store_it_next(it, &row)) { + if (rill_row_nil(&row)) break; + printf("0x%lx 0x%lx\n", row.a, row.b); + } + + rill_store_it_free(it); +} + + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +static void usage() +{ + fprintf(stderr, "rill_dump - \n"); + fprintf(stderr, "rill_dump - - \n"); exit(1); } int main(int argc, char **argv) { - bool header = false; - bool key = false; + bool headers = false; + bool stats = false; + bool vals = false; bool rows = false; bool a = false; bool b = false; - bool space = false; int opt = 0; - while ((opt = getopt(argc, argv, "+habkpm")) != -1) { + while ((opt = getopt(argc, argv, "+hsvrab")) != -1) { switch (opt) { - case 'h': header = true; break; - case 'k': key = true; break; - case 'p': rows = true; break; + case 'h': headers = true; break; + case 's': stats = true; break; + case 'v': vals = true; break; + case 'r': rows = true; break; case 'a': a = true; break; case 'b': b = true; break; - case 'm': space = true; break; default: fprintf(stderr, "unknown argument: %c\n", opt); usage(); } } - if (!header && !a && !b && !a && !rows && !key && !space) usage(); if (optind >= argc) usage(); struct rill_store *store = rill_store_open(argv[optind]); if (!store) rill_exit(1); - if (header) { - printf("file: %s\n", rill_store_file(store)); - printf("version: %u\n", rill_store_version(store)); - printf("ts: %lu\n", rill_store_ts(store)); - printf("quant: %lu\n", rill_store_quant(store)); - printf("keys data a: %zu\n", rill_store_keys_count(store, rill_col_a)); - printf("keys data b: %zu\n", rill_store_keys_count(store, rill_col_b)); - printf("rows: %lu\n", rill_store_rows(store)); - printf("index a len: %zu\n", rill_store_index_len(store, rill_col_a)); - printf("index b len: %zu\n", rill_store_index_len(store, rill_col_b)); - } - - if ((key || rows) && !a && !b) { - fprintf(stderr, "you need to specify column a or b\n"); - return -1; - } - - if (key) { - const enum rill_col col = a ? rill_col_a : rill_col_b; - const size_t keys_len = rill_store_keys_count(store, col); - rill_val_t *keys = calloc(keys_len, sizeof(*keys)); - - (void) rill_store_keys(store, keys, keys_len, col); + if (!headers && !stats && !vals && !rows) usage(); + + if (headers) dump_headers(store); + if (stats) dump_stats(store); - printf("vals %c:\n", col ? 'b' : 'a'); + if ((a && b) || (!a && !b)) usage(); + enum rill_col col = a ? rill_col_a : rill_col_b; - for (size_t i = 0; i < keys_len; ++i) - printf(" 0x%lx\n", keys[i]); - } - - if (rows) { - struct rill_row row = {0}; - const enum rill_col col = a ? rill_col_a : rill_col_b; - struct rill_store_it *it = rill_store_begin(store, col); - - printf("rows %c:\n", a ? 'a' : 'b'); - while (rill_store_it_next(it, &row)) { - if (rill_row_nil(&row)) break; - printf(" 0x%lx 0x%lx\n", row.key, row.val); - } - - rill_store_it_free(it); - } - - if (space) { - struct rill_space* space = rill_store_space(store); - - printf( - "size stats : %s\n" - "header size : %zu\n" - "index a size: %zu\n" - "index b size: %zu\n" - "data a size : %zu\n" - "data b size : %zu\n", - rill_store_file(store), - rill_store_space_header(space), - rill_store_space_index(space, rill_col_a), - rill_store_space_index(space, rill_col_b), - rill_store_space_rows(space, rill_col_a), - rill_store_space_rows(space, rill_col_b)); - - free(space); - } + if (vals) dump_vals(store, col); + if (rows) dump_rows(store, col); rill_store_close(store); return 0; diff --git a/src/store.c b/src/store.c index a41d7cd..23861c4 100644 --- a/src/store.c +++ b/src/store.c @@ -641,8 +641,8 @@ bool rill_store_it_next(struct rill_store_it *it, struct rill_row *row) // stats // ----------------------------------------------------------------------------- -void rill_store_space( - const struct rill_store* store, struct rill_store_stats *out) +void rill_store_stats( + const struct rill_store *store, struct rill_store_stats *out) { *out = (struct rill_store_stats) { .header_bytes = sizeof(*store->head), From e6bf3c0deb5e31cbb720c7c5966e7db5ad7d42e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:34:53 -0400 Subject: [PATCH 78/91] fix rill_query --- src/rill_query.c | 91 ++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/src/rill_query.c b/src/rill_query.c index 8cf7b68..4f6da70 100644 --- a/src/rill_query.c +++ b/src/rill_query.c @@ -13,20 +13,57 @@ #include #include -bool is_file(const char *path) + +// ----------------------------------------------------------------------------- +// query +// ----------------------------------------------------------------------------- + +static bool is_file(const char *path) { struct stat st = {0}; stat(path, &st); return S_ISREG(st.st_mode); } -void usage() +static void query(const char *db, enum rill_col col, rill_val_t val) +{ + struct rill_rows rows = {0}; + + if (is_file(db)) { + struct rill_store *store = rill_store_open(db); + if (!store) rill_exit(1); + + if (!rill_store_query(store, col, val, &rows)) rill_exit(1); + + rill_store_close(store); + } + else { + struct rill_query *query = rill_query_open(db); + if (!query) rill_exit(1); + + if (!rill_query_key(query, col, val, &rows)) rill_exit(1); + + rill_query_close(query); + } + + for (size_t i = 0; i < rows.len; ++i) + printf("0x%lx 0x%lx\n", rows.data[i].a, rows.data[i].b); + + rill_rows_free(&rows); +} + + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +static void usage() { - fprintf(stderr, "rill_query [-k |-v ] \n"); + fprintf(stderr, "rill_query - \n"); exit(1); } -uint64_t read_u64(char *arg) +static uint64_t read_u64(char *arg) { size_t n = strnlen(arg, 128); @@ -60,49 +97,27 @@ uint64_t read_u64(char *arg) int main(int argc, char *argv[]) { - rill_val_t key = 0; - rill_val_t val = 0; + bool a = false; + bool b = false; int opt = 0; - while ((opt = getopt(argc, argv, "k:v:")) != -1) { + while ((opt = getopt(argc, argv, "+ab")) != -1) { switch (opt) { - case 'k': key = read_u64(optarg); break; - case 'v': val = read_u64(optarg); break; + case 'a': a = true; break; + case 'b': b = true; break; default: usage(); exit(1); } } - if (key && val) { usage(); } - if (!key && !val) { usage(); } - if (optind >= argc) { usage(); } - - const char *db = argv[optind]; - struct rill_rows *rows = rill_rows_new(100); - - if (is_file(db)) { - struct rill_store *store = rill_store_open(db); - if (!store) rill_exit(1); - - if (key) rows = rill_store_query_key(store, key, rows); - else rows = rill_store_query_value(store, val, rows); - - rill_store_close(store); - } - else { - struct rill_query *query = rill_query_open(db); - if (!query) rill_exit(1); - - if (key) rows = rill_query_key(query, key, rows); - else rows = rill_query_vals(query, &val, 1, rows); - - rill_query_close(query); - } + if (optind + 1 >= argc) usage(); - if (!rows) rill_exit(1); + if ((a && b) || (!a && !b)) usage(); + enum rill_col col = a ? rill_col_a : rill_col_b; - for (size_t i = 0; i < rows->len; ++i) - printf("0x%lx 0x%lx\n", rows->data[i].key, rows->data[i].val); + rill_val_t val = read_u64(argv[optind]); + const char *db = argv[optind + 1]; - rill_rows_free(rows); + query(db, col, val); + return 0; } From e1a1de7a52cf0781f83f510fb29f1d9be7e29dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:41:32 -0400 Subject: [PATCH 79/91] Centralized a,b args handling --- src/rill_dump.c | 12 ++++++------ src/rill_query.c | 12 ++++++------ src/utils.h | 12 ++++++++++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/rill_dump.c b/src/rill_dump.c index bdb43c4..e33dc4a 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -83,8 +83,8 @@ int main(int argc, char **argv) bool stats = false; bool vals = false; bool rows = false; - bool a = false; - bool b = false; + bool col_a = false; + bool col_b = false; int opt = 0; while ((opt = getopt(argc, argv, "+hsvrab")) != -1) { @@ -93,8 +93,8 @@ int main(int argc, char **argv) case 's': stats = true; break; case 'v': vals = true; break; case 'r': rows = true; break; - case 'a': a = true; break; - case 'b': b = true; break; + case 'a': col_a = true; break; + case 'b': col_b = true; break; default: fprintf(stderr, "unknown argument: %c\n", opt); usage(); @@ -111,8 +111,8 @@ int main(int argc, char **argv) if (headers) dump_headers(store); if (stats) dump_stats(store); - if ((a && b) || (!a && !b)) usage(); - enum rill_col col = a ? rill_col_a : rill_col_b; + enum rill_col col; + if (!rill_args_col(col_a, col_b, &col)) usage(); if (vals) dump_vals(store, col); if (rows) dump_rows(store, col); diff --git a/src/rill_query.c b/src/rill_query.c index 4f6da70..32bd86c 100644 --- a/src/rill_query.c +++ b/src/rill_query.c @@ -97,22 +97,22 @@ static uint64_t read_u64(char *arg) int main(int argc, char *argv[]) { - bool a = false; - bool b = false; + bool col_a = false; + bool col_b = false; int opt = 0; while ((opt = getopt(argc, argv, "+ab")) != -1) { switch (opt) { - case 'a': a = true; break; - case 'b': b = true; break; + case 'a': col_a = true; break; + case 'b': col_b = true; break; default: usage(); exit(1); } } if (optind + 1 >= argc) usage(); - if ((a && b) || (!a && !b)) usage(); - enum rill_col col = a ? rill_col_a : rill_col_b; + enum rill_col col; + if (!rill_args_col(col_a, col_b, &col)) usage(); rill_val_t val = read_u64(argv[optind]); const char *db = argv[optind + 1]; diff --git a/src/utils.h b/src/utils.h index 2e15d85..93b5eb0 100644 --- a/src/utils.h +++ b/src/utils.h @@ -84,3 +84,15 @@ static inline size_t to_vma_len(size_t len) if (!(len % page_len)) return len; return (len & ~(page_len - 1)) + page_len; } + + +// ----------------------------------------------------------------------------- +// args +// ----------------------------------------------------------------------------- + +inline bool rill_args_col(bool a, bool b, enum rill_col *out) +{ + if ((a && b) || (!a && !b)) return false; + *out = a ? rill_col_a : rill_col_b; + return true; +} From c61988f0c0a99f4684625247ea55a761544ba306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:45:57 -0400 Subject: [PATCH 80/91] fix rill_ingest --- src/rill_ingest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rill_ingest.c b/src/rill_ingest.c index 2b640b5..f3d5d2c 100644 --- a/src/rill_ingest.c +++ b/src/rill_ingest.c @@ -55,9 +55,9 @@ struct rill_store *load_file(const char *file, rill_ts_t ts, rill_ts_t quant) struct rill_row *it = data; struct rill_row *end = it + (st.st_size / sizeof(*it)); for (; it < end; ++it) { - rill_val_t key = endian_btol(it->val); - rill_val_t val = endian_btol(it->key); - *it = (struct rill_row) { .key = key, .val = val }; + rill_val_t a = endian_btol(it->a); + rill_val_t b = endian_btol(it->b); + *it = (struct rill_row) { .a = a, .b = b }; } struct rill_rows *rows = ((struct rill_rows *)data) - 1; From f7cb23dd9d40a252feab9627049601d48a2e7d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:46:14 -0400 Subject: [PATCH 81/91] Fix rill_count --- src/rill_count.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/rill_count.c b/src/rill_count.c index 8026b48..0a388a3 100644 --- a/src/rill_count.c +++ b/src/rill_count.c @@ -10,13 +10,12 @@ #include #include -void usage() -{ - fprintf(stderr, "rill_count <-a|-b> \n"); - exit(1); -} -void count(struct rill_store *store, enum rill_col col) +// ----------------------------------------------------------------------------- +// count +// ----------------------------------------------------------------------------- + +static void count(struct rill_store *store, enum rill_col col) { struct rill_row row; struct rill_store_it *it = rill_store_begin(store, col); @@ -26,17 +25,28 @@ void count(struct rill_store *store, enum rill_col col) while (rill_store_it_next(it, &row)) { if (rill_row_nil(&row)) break; - if (row.key == key) count++; + if (row.a == key) count++; else { if (key) printf("%lu %p\n", count, (void *) key); count = 1; - key = row.key; + key = row.a; } } rill_store_it_free(it); } + +// ----------------------------------------------------------------------------- +// main +// ----------------------------------------------------------------------------- + +static void usage() +{ + fprintf(stderr, "rill_count - \n"); + exit(1); +} + int main(int argc, char **argv) { if (argc != 3) usage(); @@ -44,7 +54,7 @@ int main(int argc, char **argv) int opt = 0; bool col_a = false, col_b = false; - while ((opt = getopt(argc, argv, "ab")) != -1) { + while ((opt = getopt(argc, argv, "+ab")) != -1) { switch(opt) { case 'a': col_a = true; break; case 'b': col_b = true; break; @@ -52,11 +62,15 @@ int main(int argc, char **argv) } } + if (optind >= argc) usage(); + + enum rill_col col; + if (!rill_args_col(col_a, col_b, &col)) usage(); + struct rill_store *store = rill_store_open(argv[optind]); if (!store) rill_exit(1); - if (col_a) count(store, rill_col_a); - if (col_b) count(store, rill_col_b); + count(store, col); rill_store_close(store); return 0; From c221f3550693bd03eccf5532d06352f5bd2ce30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:51:24 -0400 Subject: [PATCH 82/91] Remove generate for now --- compile.sh | 1 - src/rill_generate.c | 64 --------------------------------------------- 2 files changed, 65 deletions(-) delete mode 100644 src/rill_generate.c diff --git a/compile.sh b/compile.sh index 4ae8254..2619b82 100755 --- a/compile.sh +++ b/compile.sh @@ -42,7 +42,6 @@ $CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS $CC -o rill_count "${PREFIX}/src/rill_count.c" librill.a $CFLAGS -$CC -o rill_count "${PREFIX}/src/rill_generate.c" librill.a $CFLAGS $CC -o test_indexer "${PREFIX}/test/index_test.c" librill.a $CFLAGS && ./test_index $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder diff --git a/src/rill_generate.c b/src/rill_generate.c deleted file mode 100644 index 0d49222..0000000 --- a/src/rill_generate.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "test.h" - -static struct rill_store *make_store( - const char *name, struct rill_rows *rows) -{ - unlink(name); - assert(rill_store_write(name, 0, 0, rows)); - return 0; -} - -bool generate() -{ - struct rng rng = rng_make(0); - struct rill_rows *rows = make_rng_rows(&rng); - make_store("test.store.generated", rows); - rill_rows_free(rows); - return true; -} - -bool generate_simple() -{ - const size_t len = 20; - struct rill_rows *rows = rill_rows_new(len); - - for (size_t i = 0; i < len; ++i) { - rill_rows_push(rows, i + 1, 3 + i * 20); - } - - make_store("test.store.simple", rows); - rill_rows_free(rows); - - return true; -} - -bool generate_with_multiple_values() -{ - const size_t len = 100; - struct rill_rows *rows = rill_rows_new(len); - - for (size_t i = 0; i < 20; ++i) - for (size_t j = 1; j < 5; ++j) - rill_rows_push(rows, i + 1, j * 100); - - make_store("test.store.multvals", rows); - rill_rows_free(rows); - - return true; -} - -// ----------------------------------------------------------------------------- -// main -// ----------------------------------------------------------------------------- - -int main(int argc, char **argv) -{ - (void) argc, (void) argv; - - (void) generate(); - (void) generate_simple(); - (void) generate_with_multiple_values(); - - printf("generated some rill database(s)\n"); - return 0; -} From 0e738003d00ac3b6aaafaeba50e2140c011bdeeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 14:58:17 -0400 Subject: [PATCH 83/91] compile tests --- test/coder_test.c | 42 +++++++++++++++++++++--------------------- test/rotate_test.c | 16 ++++++++-------- test/store_test.c | 24 +++++++++++++----------- test/test.h | 18 +++++++++--------- 4 files changed, 51 insertions(+), 49 deletions(-) diff --git a/test/coder_test.c b/test/coder_test.c index db1061e..036c8ee 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -113,7 +113,7 @@ static void check_vals(struct rill_rows rows, struct vals *exp) free(vals); free(exp); - rill_rows_free(rows); + rill_rows_free(&rows); htable_reset(&rev); } @@ -173,7 +173,7 @@ static struct index *lookup_alloc(struct vals *vals) static void check_coder(struct rill_rows rows) { - rill_rows_compact(rows); + rill_rows_compact(&rows); struct vals *vals[2] = { vals_for_col(&rows, rill_col_a), @@ -184,7 +184,7 @@ static void check_coder(struct rill_rows rows) struct index *lookup = lookup_alloc(vals[rill_col_b]); size_t cap = coder_cap(vals[rill_col_b]->len, rows.len); - uint8_t buffer = calloc(1, cap); + uint8_t *buffer = calloc(1, cap); size_t len = 0; { @@ -192,28 +192,28 @@ static void check_coder(struct rill_rows rows) make_encoder(buffer, buffer + cap, vals[rill_col_b], index); for (size_t i = 0; i < rows.len; ++i) - assert(coder_encode(&coder, &rows->data[i])); + assert(coder_encode(&coder, &rows.data[i])); assert(coder_finish(&coder)); - len = len = coder.it - buffer; + len = coder.it - buffer; } if (false) { - printf("input: "); rill_rows_print(rows); + printf("input: "); rill_rows_print(&rows); printf("buffer: start=%p, len=%lu\n", (void *) buffer, len); hexdump(buffer, cap); printf("index: [ "); for (size_t i = 0; i < index->len; ++i) { - struct index_row *row = &index->data[i]; + struct index_kv *row = &index->data[i]; printf("{%p, %p} ", (void *) row->key, (void *) row->off); } printf("]\n"); printf("lookup: [ "); for (size_t i = 0; i < lookup->len; ++i) { - struct index_row *row = &lookup->data[i]; + struct index_kv *row = &lookup->data[i]; printf("%p ", (void *) row->key); } printf("]\n"); @@ -223,27 +223,27 @@ static void check_coder(struct rill_rows rows) struct decoder coder = make_decoder_at(buffer, buffer + len, lookup, index, 0); - struct rill_kv kv = {0}; - for (size_t i = 0; i < rows->len; ++i) { - assert(coder_decode(&coder, &kv)); - assert(rill_kv_cmp(&kv, &rows->data[i]) == 0); + struct rill_row row = {0}; + for (size_t i = 0; i < rows.len; ++i) { + assert(coder_decode(&coder, &row)); + assert(rill_row_cmp(&row, &rows.data[i]) == 0); } - assert(coder_decode(&coder, &kv)); - assert(rill_kv_nil(&kv)); + assert(coder_decode(&coder, &row)); + assert(rill_row_nil(&row)); } - for (size_t i = 0; i < rows->len; ++i) { + for (size_t i = 0; i < rows.len; ++i) { size_t key_idx; uint64_t off; - assert(index_find(index, rows->data[i].key, &key_idx, &off)); + assert(index_find(index, rows.data[i].a, &key_idx, &off)); struct decoder coder = make_decoder_at( - buffer + off, buffer + len, vals, index, key_idx); + buffer + off, buffer + len, lookup, index, key_idx); - struct rill_kv kv = {0}; + struct rill_row row = {0}; do { - assert(coder_decode(&coder, &kv)); - assert(kv.key == rows->data[i].key); - } while (kv.val != rows->data[i].val); + assert(coder_decode(&coder, &row)); + assert(row.a == rows.data[i].a); + } while (row.b != rows.data[i].b); } free(buffer); diff --git a/test/rotate_test.c b/test/rotate_test.c index f2b7a97..bfa4b09 100644 --- a/test/rotate_test.c +++ b/test/rotate_test.c @@ -49,12 +49,12 @@ bool test_rotate(void) size_t i = 0; for (rill_ts_t ts = 0; ts < expire_secs; ts += step) { - assert(rows->data[i].key == key); - assert(rows->data[i].val == ts + 1); + assert(rows.data[i].a == key); + assert(rows.data[i].b == ts + 1); ++i; } - rill_rows_free(rows); + rill_rows_free(&rows); } for (size_t i = 1; i <= 6; ++i) { @@ -68,15 +68,15 @@ bool test_rotate(void) { struct rill_query *query = rill_query_open(dir); struct rill_rows rows = {0}; - assert(rill_query_keys(query, rill_col_a, key, 1, &rows)); + assert(rill_query_key(query, rill_col_a, key, &rows)); rill_query_close(query); - for (size_t i = 0; i < rows->len; ++i) { - assert(rows->data[i].key == key); - assert(rows->data[i].val >= (5 * month_secs) + 1); + for (size_t i = 0; i < rows.len; ++i) { + assert(rows.data[i].a == key); + assert(rows.data[i].b >= (5 * month_secs) + 1); } - rill_rows_free(rows); + rill_rows_free(&rows); } rm(dir); diff --git a/test/store_test.c b/test/store_test.c index 2acef9f..be07906 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -4,6 +4,7 @@ */ #include "test.h" +#include "store.c" // ----------------------------------------------------------------------------- @@ -38,7 +39,7 @@ static void check_query(struct rill_rows rows) for (size_t col = 0; col < rill_cols; ++col) { for (size_t i = 0; i < expected.len;) { rill_rows_clear(&result); - assert(rill_store_query(store, col, expected.data[i].key, &result)); + assert(rill_store_query(store, col, expected.data[i].a, &result)); assert(expected.len - i >= result.len); for (size_t j = 0; j < result.len; ++j, ++i) @@ -49,9 +50,9 @@ static void check_query(struct rill_rows rows) } rill_store_close(store); - rill_rows_free(rows); - rill_rows_free(expected); - rill_rows_free(result); + rill_rows_free(&rows); + rill_rows_free(&expected); + rill_rows_free(&result); } bool test_query(void) @@ -63,7 +64,7 @@ bool test_query(void) struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 10; ++iterations) - check_query_key(make_rng_rows(&rng)); + check_query(make_rng_rows(&rng)); return true; } @@ -84,14 +85,15 @@ static void check_vals(struct rill_rows rows) for (size_t col = 0; col < rill_cols; ++col) { size_t len = rill_store_vals_count(store, col); - rill_val_t vals[len] = {0}; + rill_val_t *vals = calloc(len, sizeof(*vals)); assert(rill_store_vals(store, col, vals, len) == len); for (size_t i = 0; i < len; ++i) assert(vals[i] == exp[col]->data[i]); - free(exp[col]; + free(exp[col]); + free(vals); } } @@ -105,7 +107,7 @@ bool test_vals(void) struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 10; ++iterations) - check_query_key(make_rng_rows(&rng)); + check_vals(make_rng_rows(&rng)); return true; } @@ -141,8 +143,8 @@ static void check_it(struct rill_rows rows) } rill_store_close(store); - rill_rows_free(rows); - rill_rows_free(expected); + rill_rows_free(&rows); + rill_rows_free(&expected); } bool test_it(void) @@ -154,7 +156,7 @@ bool test_it(void) struct rng rng = rng_make(0); for (size_t iterations = 0; iterations < 10; ++iterations) - check_query_key(make_rng_rows(&rng)); + check_it(make_rng_rows(&rng)); return true; } diff --git a/test/test.h b/test/test.h index aca2b03..d93747d 100644 --- a/test/test.h +++ b/test/test.h @@ -24,15 +24,15 @@ // rows // ----------------------------------------------------------------------------- -struct rill_row row(rill_val_t key, rill_val_t val) +struct rill_row row(rill_val_t a, rill_val_t b) { - return (struct rill_row) { .key = key, .val = val }; + return (struct rill_row) { .a = a, .b = b }; } #define make_rows(...) \ ({ \ struct rill_row rows[] = { __VA_ARGS__ }; \ - make_row_impl(rows, sizeof(rows) / sizeof(rows[0])); \ + make_rows_impl(rows, sizeof(rows) / sizeof(rows[0])); \ }) struct rill_rows make_rows_impl(const struct rill_row *rows, size_t len) @@ -41,12 +41,12 @@ struct rill_rows make_rows_impl(const struct rill_row *rows, size_t len) assert(rill_rows_reserve(&result, len)); for (size_t i = 0; i < len; ++i) - assert(rill_rows_push(result, rows[i].key, rows[i].val)); + assert(rill_rows_push(&result, rows[i].a, rows[i].b)); return result; } -enum { rng_range_key = 250, rng_range_val = 100 }; +enum { rng_range_a = 250, rng_range_b = 100 }; struct rill_rows make_rng_rows(struct rng *rng) { @@ -55,9 +55,9 @@ struct rill_rows make_rng_rows(struct rng *rng) rill_rows_reserve(&rows, len); for (size_t i = 0; i < len; ++i) { - uint64_t key = rng_gen_range(rng, 1, rng_range_key); - uint64_t val = rng_gen_range(rng, 1, rng_range_val); - rill_rows_push(&rows, key, val); + uint64_t a = rng_gen_range(rng, 1, rng_range_a); + uint64_t b = rng_gen_range(rng, 1, rng_range_b); + rill_rows_push(&rows, a, b); } rill_rows_compact(&rows); @@ -93,7 +93,7 @@ void rm(const char *path) // hexdump // ----------------------------------------------------------------------------- -void hexdump(const void *buffer, size_t len) +void hexdump(const uint8_t *buffer, size_t len) { for (size_t i = 0; i < len;) { printf("%6p: ", (void *) i); From 740f0a9fd0134d185a195d10250b898ecf013681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 6 Jul 2018 15:27:52 -0400 Subject: [PATCH 84/91] dumb bugs --- compile.sh | 2 +- src/rows.c | 2 +- src/store.c | 11 +++++------ test/store_test.c | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/compile.sh b/compile.sh index 2619b82..54a3379 100755 --- a/compile.sh +++ b/compile.sh @@ -43,7 +43,7 @@ $CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS $CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS $CC -o rill_count "${PREFIX}/src/rill_count.c" librill.a $CFLAGS -$CC -o test_indexer "${PREFIX}/test/index_test.c" librill.a $CFLAGS && ./test_index +$CC -o test_index "${PREFIX}/test/index_test.c" librill.a $CFLAGS && ./test_index $CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder $CC -o test_store "${PREFIX}/test/store_test.c" librill.a $CFLAGS && ./test_store $CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS diff --git a/src/rows.c b/src/rows.c index 2a2bdb1..24504fe 100644 --- a/src/rows.c +++ b/src/rows.c @@ -37,7 +37,7 @@ bool rill_rows_reserve(struct rill_rows *rows, size_t cap) { if (rill_likely(cap <= rows->cap)) return true; - size_t new_cap = rows->cap; + size_t new_cap = rows->cap ? rows->cap : 1; while (new_cap < cap) new_cap *= 2; rows->data = realloc(rows->data, new_cap * sizeof(rows->data[0])); diff --git a/src/store.c b/src/store.c index 23861c4..db7a447 100644 --- a/src/store.c +++ b/src/store.c @@ -91,15 +91,14 @@ static struct encoder store_encoder( struct vals *vals[rill_cols]) { enum rill_col other_col = rill_col_flip(col); - + size_t start = store->head->data_off[col]; - size_t end = col == rill_col_a ? - store->head->data_off[other_col] : store->vma_len; + size_t end = store->vma_len; return make_encoder( store->vma + start, store->vma + end, - vals[col], + vals[other_col], store->index[col]); } @@ -646,12 +645,12 @@ void rill_store_stats( { *out = (struct rill_store_stats) { .header_bytes = sizeof(*store->head), - + .index_bytes[rill_col_a] = store->head->index_off[rill_col_b] - store->head->index_off[rill_col_a], .index_bytes[rill_col_b] = store->head->data_off[rill_col_a] - store->head->index_off[rill_col_b], - + .rows_bytes[rill_col_a] = store->head->data_off[rill_col_b] - store->head->data_off[rill_col_a], .rows_bytes[rill_col_b] = store->vma_len - diff --git a/test/store_test.c b/test/store_test.c index be07906..16fbef6 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -17,7 +17,7 @@ static struct rill_store *make_store(const char *name, struct rill_rows *rows) assert(rill_store_write(name, 0, 0, rows)); struct rill_store *store = rill_store_open(name); - assert(store); + if (!store) rill_abort(); return store; } From e9104bb18c7f1ae93b5b8260dfa3fcdb247b7660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Fri, 10 Aug 2018 16:42:15 -0400 Subject: [PATCH 85/91] Fix store --- src/store.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/store.c b/src/store.c index db7a447..fabcb28 100644 --- a/src/store.c +++ b/src/store.c @@ -164,7 +164,7 @@ struct rill_store *rill_store_open(const char *file) size_t len = stat_ret.st_size; if (len < sizeof(struct header)) { - rill_fail("invalid size for '%s'", file); + rill_fail("invalid size '%lu' for '%s'", len, file); goto fail_size; } @@ -348,7 +348,7 @@ static void writer_offsets_init( static void writer_offsets_finish(struct rill_store *store, size_t off) { - store->head->data_off[rill_col_b] = off; + store->head->data_off[rill_col_b] = store->head->data_off[rill_col_a] + off; store->data[rill_col_b] = store_ptr(store, off); } @@ -367,10 +367,7 @@ bool rill_store_write( } struct rill_store store = {0}; - if (!writer_open(&store, file, vals, rows->len, ts, quant)) { - rill_fail("unable to create '%s'", file); - goto fail_open; - } + if (!writer_open(&store, file, vals, rows->len, ts, quant)) goto fail_open; writer_offsets_init(&store, vals); @@ -491,10 +488,7 @@ bool rill_store_merge( } struct rill_store store = {0}; - if (!writer_open(&store, file, vals, rows, ts, quant)) { - rill_fail("unable to create '%s'", file); - goto fail_open; - } + if (!writer_open(&store, file, vals, rows, ts, quant)) goto fail_open; writer_offsets_init(&store, vals); From b976d79d044c9e9fae53ce68d616e3280fb73a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 14:06:16 -0400 Subject: [PATCH 86/91] Tweak compile script --- compile.sh | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/compile.sh b/compile.sh index 54a3379..0e9793f 100755 --- a/compile.sh +++ b/compile.sh @@ -6,13 +6,17 @@ set -o errexit -o nounset -o pipefail -o xtrace declare -a SRC SRC=(htable rng utils rows store acc rotate query) -CC=${OTHERC:-gcc} +declare -a BIN +BIN=(load dump query rotate ingest merge count) + +declare -a TEST +TEST=(index coder store rotate) + +CC=${OTHERC:-gcc} LEAKCHECK_ENABLED=${LEAKCHECK_ENABLED:-} -LEAKCHECK=${OTHERMEMCHECK:-valgrind} -LEAKCHECK_ARGS="--leak-check=full --track-origins=yes --trace-children=yes --error-exitcode=1" -CFLAGS="-g -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" +CFLAGS="-ggdb -O3 -march=native -pipe -std=gnu11 -D_GNU_SOURCE" CFLAGS="$CFLAGS -I${PREFIX}/src" CFLAGS="$CFLAGS -Werror -Wall -Wextra" @@ -35,25 +39,22 @@ for src in "${SRC[@]}"; do done ar rcs librill.a $OBJ -$CC -o rill_load "${PREFIX}/src/rill_load.c" librill.a $CFLAGS -$CC -o rill_dump "${PREFIX}/src/rill_dump.c" librill.a $CFLAGS -$CC -o rill_query "${PREFIX}/src/rill_query.c" librill.a $CFLAGS -$CC -o rill_rotate "${PREFIX}/src/rill_rotate.c" librill.a $CFLAGS -$CC -o rill_ingest "${PREFIX}/src/rill_ingest.c" librill.a $CFLAGS -$CC -o rill_merge "${PREFIX}/src/rill_merge.c" librill.a $CFLAGS -$CC -o rill_count "${PREFIX}/src/rill_count.c" librill.a $CFLAGS - -$CC -o test_index "${PREFIX}/test/index_test.c" librill.a $CFLAGS && ./test_index -$CC -o test_coder "${PREFIX}/test/coder_test.c" librill.a $CFLAGS && ./test_coder -$CC -o test_store "${PREFIX}/test/store_test.c" librill.a $CFLAGS && ./test_store -$CC -o test_rotate "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS - -if [ -n "$LEAKCHECK_ENABLED" ] -then - echo test_indexer ======================================= - $LEAKCHECK $LEAKCHECK_ARGS ./test_indexer - echo test_coder ========================================= - $LEAKCHECK $LEAKCHECK_ARGS ./test_coder - echo test_store ========================================= - $LEAKCHECK $LEAKCHECK_ARGS ./test_store +for bin in "${BIN[@]}"; do + $CC -o "rill_$bin" "${PREFIX}/src/rill_$bin.c" librill.a $CFLAGS +done + +for test in "${TEST[@]}"; do + $CC -o "test_$test" "${PREFIX}/test/${test}_test.c" librill.a $CFLAGS + "./test_$test" +done + +if [ -n "$LEAKCHECK_ENABLED" ]; then + for test in "{TEST[@]}"; do + valgrind \ + --leak-check=full \ + --track-origins=yes \ + --trace-children=yes \ + --error-exitcode=1 \ + "./test_$test" + done fi From 8000b65b3d69a33796d3e9540604745c2db93622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 14:56:51 -0400 Subject: [PATCH 87/91] Fix store test --- src/store.c | 9 ++++----- test/store_test.c | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/store.c b/src/store.c index fabcb28..8225003 100644 --- a/src/store.c +++ b/src/store.c @@ -127,7 +127,7 @@ static struct decoder store_decoder_at( static struct decoder store_decoder( const struct rill_store *store, enum rill_col col) { - return store_decoder_at(store, 0, 0, col); + return store_decoder_at(store, col, 0, 0); } @@ -335,12 +335,12 @@ static void writer_offsets_init( store->head->index_off[rill_col_a] = off; store->index[rill_col_a] = store_ptr(store, off); - off += index_cap(vals[rill_col_b]->len); + off += index_cap(vals[rill_col_a]->len); store->head->index_off[rill_col_b] = off; store->index[rill_col_b] = store_ptr(store, off); - off += index_cap(vals[rill_col_a]->len); + off += index_cap(vals[rill_col_b]->len); store->head->data_off[rill_col_a] = off; store->data[rill_col_a] = store_ptr(store, off); @@ -593,8 +593,7 @@ bool rill_store_query( while (true) { if (!coder_decode(&coder, &row)) return false; - if (rill_row_nil(&row)) break; - assert(row.a == key); + if (rill_row_nil(&row) || row.a != key) break; if (!rill_rows_push(out, row.a, row.b)) return false; } diff --git a/test/store_test.c b/test/store_test.c index 16fbef6..11d72bb 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -123,7 +123,7 @@ static void check_it(struct rill_rows rows) rill_rows_copy(&rows, &expected); rill_rows_compact(&expected); - struct rill_store *store = make_store("test.store.query", &rows); + struct rill_store *store = make_store("test.store.it", &rows); for (size_t col = 0; col < rill_cols; ++col) { struct rill_store_it *it = rill_store_begin(store, col); From a1a001a259232fc47276507d2a91934daedf3b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 15:21:07 -0400 Subject: [PATCH 88/91] Fix merge typo --- src/store.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/store.c b/src/store.c index 8225003..2007fe7 100644 --- a/src/store.c +++ b/src/store.c @@ -478,7 +478,7 @@ bool rill_store_merge( for (size_t i = 0; i < list_len; ++i) { if (!list[i]) continue; - for (size_t col = 0; i < rill_cols; ++i) { + for (size_t col = 0; col < rill_cols; ++col) { struct vals *ret = vals_add_index(vals[col], list[i]->index[col]); if (!ret) goto fail_vals; vals[col] = ret; From cfd68e0530c78f40652ceacb9c02339a2af92f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 15:28:44 -0400 Subject: [PATCH 89/91] Tweak rill_dump a bit --- src/rill_dump.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/rill_dump.c b/src/rill_dump.c index e33dc4a..686a89c 100644 --- a/src/rill_dump.c +++ b/src/rill_dump.c @@ -17,7 +17,6 @@ static void dump_headers(struct rill_store *store) { - printf("file: %s\n", rill_store_file(store)); printf("version: %u\n", rill_store_version(store)); printf("ts: %lu\n", rill_store_ts(store)); printf("quant: %lu\n", rill_store_quant(store)); @@ -31,7 +30,6 @@ static void dump_stats(struct rill_store *store) struct rill_store_stats stats = {0}; rill_store_stats(store, &stats); - printf("file: %s\n", rill_store_file(store)); printf("header: %zu\n", stats.header_bytes); printf("index[a]: %zu\n", stats.index_bytes[rill_col_a]); printf("index[b]: %zu\n", stats.index_bytes[rill_col_b]); @@ -108,8 +106,12 @@ int main(int argc, char **argv) if (!headers && !stats && !vals && !rows) usage(); - if (headers) dump_headers(store); - if (stats) dump_stats(store); + if (headers || stats) { + printf("file: %s\n", rill_store_file(store)); + if (headers) dump_headers(store); + if (stats) dump_stats(store); + return 0; + } enum rill_col col; if (!rill_args_col(col_a, col_b, &col)) usage(); From cce2168bf0a1950d58b2f92277ca20f4ed214e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 15:29:46 -0400 Subject: [PATCH 90/91] Singled out rotate_test in build --- compile.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/compile.sh b/compile.sh index 0e9793f..c5b3ed0 100755 --- a/compile.sh +++ b/compile.sh @@ -11,7 +11,7 @@ declare -a BIN BIN=(load dump query rotate ingest merge count) declare -a TEST -TEST=(index coder store rotate) +TEST=(index coder store) CC=${OTHERC:-gcc} LEAKCHECK_ENABLED=${LEAKCHECK_ENABLED:-} @@ -48,6 +48,10 @@ for test in "${TEST[@]}"; do "./test_$test" done +# this one takes a while so it's usually run manually +$CC -o "test_rotate" "${PREFIX}/test/rotate_test.c" librill.a $CFLAGS + + if [ -n "$LEAKCHECK_ENABLED" ]; then for test in "{TEST[@]}"; do valgrind \ From d31de7fe91719d1fab49bf6027046f7cfdfba019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Attab?= Date: Sat, 6 Oct 2018 15:49:56 -0400 Subject: [PATCH 91/91] Add merge test to store tests --- src/rill.h | 1 + src/rows.c | 10 ++++++ test/store_test.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++ test/test.h | 3 +- 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/rill.h b/src/rill.h index dae5b7c..8e2e107 100644 --- a/src/rill.h +++ b/src/rill.h @@ -102,6 +102,7 @@ void rill_rows_clear(struct rill_rows *); void rill_rows_invert(struct rill_rows *); void rill_rows_compact(struct rill_rows *); +bool rill_rows_append(struct rill_rows *, struct rill_rows *other); bool rill_rows_copy(const struct rill_rows *, struct rill_rows *out); void rill_rows_print(const struct rill_rows *); diff --git a/src/rows.c b/src/rows.c index 24504fe..b666091 100644 --- a/src/rows.c +++ b/src/rows.c @@ -104,6 +104,16 @@ bool rill_rows_copy(const struct rill_rows *rows, struct rill_rows *out) return true; } +bool rill_rows_append(struct rill_rows *rows, struct rill_rows *other) +{ + if (!rill_rows_reserve(rows, rows->len + other->len)) return false; + memcpy(rows->data + rows->len, other->data, other->len * sizeof(other->data[0])); + rows->len += other->len; + + return true; +} + + void rill_rows_print(const struct rill_rows *rows) { const rill_val_t nil = -1ULL; diff --git a/test/store_test.c b/test/store_test.c index 11d72bb..317d03f 100644 --- a/test/store_test.c +++ b/test/store_test.c @@ -162,6 +162,84 @@ bool test_it(void) } +// ----------------------------------------------------------------------------- +// merge +// ----------------------------------------------------------------------------- + +static void check_merge(struct rill_rows a, struct rill_rows b) +{ + struct rill_rows expected = {0}; + rill_rows_append(&expected, &a); + rill_rows_append(&expected, &b); + rill_rows_compact(&expected); + + struct rill_store *to_merge[] = { + make_store("test.store.merge.a", &a), + make_store("test.store.merge.b", &b) + }; + + const char *file = "test.store.merge.result"; + unlink(file); + if (!rill_store_merge(file, 0, 0, to_merge, 2)) rill_abort(); + struct rill_store *store = rill_store_open(file); + assert(store); + + + for (size_t col = 0; col < rill_cols; ++col) { + struct rill_store_it *it = rill_store_begin(store, col); + + struct rill_row row = {0}; + for (size_t i = 0; i < expected.len; ++i) { + assert(rill_store_it_next(it, &row)); + assert(!rill_row_cmp(&expected.data[i], &row)); + } + + assert(rill_store_it_next(it, &row)); + assert(rill_row_nil(&row)); + + rill_store_it_free(it); + + rill_rows_invert(&expected); // setup for next iteration. + } + + rill_store_close(to_merge[0]); + rill_store_close(to_merge[1]); + rill_store_close(store); + rill_rows_free(&a); + rill_rows_free(&b); + rill_rows_free(&expected); +} + +bool test_merge(void) +{ + check_merge( + make_rows(row(1, 10)), + make_rows(row(1, 10))); + check_merge( + make_rows(row(1, 10)), + make_rows(row(2, 20))); + check_merge( + make_rows(row(1, 10), row(1, 20)), + make_rows(row(2, 10), row(2, 20))); + check_merge( + make_rows(row(1, 10), row(1, 20)), + make_rows(row(1, 20), row(2, 10))); + check_merge( + make_rows(row(1, 10)), + make_rows(row(1, 10), row(2, 10), row(2, 20))); + check_merge( + make_rows(row(1, 10), row(2, 10), row(2, 20)), + make_rows(row(1, 10))); + + struct rng rng = rng_make(0); + for (size_t iterations = 0; iterations < 10; ++iterations) + check_merge(make_rng_rows(&rng), make_rng_rows(&rng)); + + return true; + +} + + // ----------------------------------------------------------------------------- // main // ----------------------------------------------------------------------------- @@ -174,6 +252,7 @@ int main(int argc, char **argv) ret = ret && test_query(); ret = ret && test_vals(); ret = ret && test_it(); + ret = ret && test_merge(); return ret ? 0 : 1; } diff --git a/test/test.h b/test/test.h index d93747d..9d4df57 100644 --- a/test/test.h +++ b/test/test.h @@ -50,7 +50,7 @@ enum { rng_range_a = 250, rng_range_b = 100 }; struct rill_rows make_rng_rows(struct rng *rng) { - enum { len = 1000 }; + const size_t len = 1UL << rng_gen_range(rng, 9, 12); struct rill_rows rows = {0}; rill_rows_reserve(&rows, len); @@ -60,7 +60,6 @@ struct rill_rows make_rng_rows(struct rng *rng) rill_rows_push(&rows, a, b); } - rill_rows_compact(&rows); return rows; }