diff --git a/.gitignore b/.gitignore index 7367812..c925fbb 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,6 @@ build-packages/ package/ packages/ cscope.* +lib-tzcode/libtz.a +target/ +output/ diff --git a/GNUmakefile b/GNUmakefile index 1433ce2..e688b82 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,17 +1,22 @@ -LIBRARIES= kit +DLLIBRARIES = libkit +MAKE_DEBS = 1 DST.lib+= $(DST.dir)/libkit$(EXT.lib) libsources = $(wildcard $(1)/*.c) $(subst $(OS_class)/,,$(wildcard $(1)/$(OS_class)/*.c)) libobjects = $(foreach SOURCE,$(call libsources,$(1)),$(patsubst $(1)/%.c,$(1)/$(DST.dir)/%$(EXT.obj),$(SOURCE))) -DST.obj = $(foreach PACKAGE,$(ALL_LIBRARIES),$(call libobjects,$(COM.dir)/lib-$(PACKAGE))) +DST.obj = $(foreach PACKAGE,$(filter-out tzcode,$(ALL_LIBRARIES)),$(call libobjects,$(COM.dir)/lib-$(PACKAGE))) +TZCODE.dir = $(COM.dir)/lib-tzcode +DST.obj += $(TZCODE.dir)/asctime.o $(TZCODE.dir)/difftime.o $(TZCODE.dir)/localtime.o $(TZCODE.dir)/strftime.o libheaders= $(wildcard $(1)/*.h) $(wildcard $(1)/$(DST.dir)/*.h) $(wildcard $(1)/$(OS_class)/*.h) -SRC.inc.lib= $(filter-out %-private.h,$(foreach PACKAGE,$(ALL_LIBRARIES),$(call libheaders,$(COM.dir)/lib-$(PACKAGE)))) -DST.inc.lib= $(filter-out %-private.h,$(foreach PACKAGE,$(ALL_LIBRARIES),$(subst /$(OS_class),,$(subst ./lib-$(PACKAGE),$(DST.dir)/include,$(call libheaders,$(COM.dir)/lib-$(PACKAGE)))))) +SRC.inc.lib= $(filter-out %/private.h %-private.h,$(foreach PACKAGE,$(ALL_LIBRARIES),$(call libheaders,$(COM.dir)/lib-$(PACKAGE)))) +DST.inc.lib= $(filter-out %/private.h %-private.h,$(foreach PACKAGE,$(ALL_LIBRARIES),$(subst /$(OS_class),,$(subst ./lib-$(PACKAGE),$(DST.dir)/include,$(call libheaders,$(COM.dir)/lib-$(PACKAGE)))))) SRC.inc= $(SRC.inc.lib) DST.inc= $(DST.inc.lib) +DEB_BUILD_NUMBER= $(shell echo -$${BUILD_NUMBER-dev}) + include dependencies.mak ifdef MAKE_DEBUG @@ -21,9 +26,28 @@ endif include: @$(MAKE_PERL_ECHO_BOLD) "make[$(MAKELEVEL)]: updating: $(DST.dir)/include" -$(MAKE_RUN)$(MKDIR) $(call OSPATH,$(DST.dir)/include) $(TO_NUL) $(FAKE_PASS) - $(MAKE_RUN) $(COPYFILES2DIR) $(SRC.inc.lib) $(DST.dir)/include + $(COPYFILES2DIR) $(SRC.inc.lib) $(DST.dir)/include -ifeq ($(filter remote,$(MAKECMDGOALS)),) clean:: $(DEP.dirs) + rm -rf target + rm -rf output + realclean:: $(DEP.dirs) -endif + rm -rf target + rm -rf output + +package: + if [ ! -e build-linux-64-release/libkit.a ]; then $(MAKE) release; fi + mkdir -p target/include + cp -rp build-linux-64-release/include/*.h target/include + cp build-linux-64-release/libkit.a target + cp build-linux-64-release/libkit.so.$(DEB.ver) target + cp -p bin/kit-alloc-analyze target + ln -sf libkit.so.$(DEB.ver) target/libkit.so.$(DEB.ver.maj) + ln -sf libkit.so.$(DEB.ver.maj) target/libkit.so + cp -rp debian target + @err=$$(dpkg-parsechangelog -l target/debian/changelog 2>&1 >/dev/null); [ -z "$$err" ] || { echo "$$err"; false; } + perl -nale '$$_ =~ s/libkit \(([0-9.-]+)\) (\w+); urgency=(\w+)/libkit ($$1$(DEB_BUILD_NUMBER)) $$2; urgency=$$3/g;print' target/debian/changelog + cd target && dpkg-buildpackage -b -rfakeroot -uc -us + mkdir -p output + mv libkit_*_amd64.deb libkit-dev_*_amd64.deb libkit_*_amd64.changes libkit_*_amd64.buildinfo output diff --git a/README.md b/README.md index 39de189..eeb8eaf 100644 --- a/README.md +++ b/README.md @@ -4,5 +4,9 @@ libkit This library contains useful standalone routines, supplying a basic "kit" for developing C code. +The library contains tensor mathematical operations in kit-tensor.c +that implement operations similar to those in PyTorch. For documentation +see: pytorch.org. + The build depends on the mak, libsxe and libjemalloc submodules which must be checked out alongside libkit. diff --git a/bin/kit-alloc-analyze b/bin/kit-alloc-analyze index b4aee63..4abc62f 100755 --- a/bin/kit-alloc-analyze +++ b/bin/kit-alloc-analyze @@ -60,7 +60,7 @@ for (my $line_number = 1; my $line = <>; $line_number++) { } } - if ($line =~ m{- ([^: ]+): (?:\d+): 0x([\da-f]+) = kit_((?:m|c|re)alloc|strdup)\((?:0x([\da-f]+)|\(nil\))?(?:, |\[)?(\d+)}) { + if ($line =~ m{- ([^: ]+): (?:\d+): 0x([\da-f]+) = kit_((?:m|c|re)alloc|memalign|strn?dup)\((?:0x([\da-f]+)|\(nil\))?(?:, |\[)?(\d+)}) { # This is an alloc, realloc, or strdup, lines look like this: # 20210407 222641.037 T 26795 ------ 6 - conf.c: 119: 0x7f8b69c17180 = kit_realloc((nil), 88) # 20210407 222641.037 T 26795 ------ 6 - conf.c: 670: 0x7f8b69c17180 = kit_realloc(0x7f8b69c17180, 88) @@ -120,4 +120,4 @@ foreach my $address (sort {$mem{$a}{line_number} cmp $mem{$b}{line_number}} keys print " " . ($opts{'n'} ? "$mem{$address}{line_number}:" : "") . " $mem{$address}{line}"; } -print "kit_alloc debug logging may not be enabled\n" if not %filestats; \ No newline at end of file +print "kit_alloc debug logging may not be enabled\n" if not %filestats; diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..f49bd20 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,159 @@ +libkit (2.11) stable; urgency=low + * DPT-3563 - SonarQube 'bug's reported in 2.10 release + * DPT-3592 - Need sxe-cdb.h module in libkit + + -- Jim Belton Thu, 32 Jul 2025 15:35:42 +0000 + +libkit (2.10) stable; urgency=low + * DPT-3518 - Handle invalid timezones passed into kit_timezone_time_to_localtime + + -- Sean Mooney Mon, 14 Jul 2025 05:39:18 +0000 + +libkit (2.9) stable; urgency=low + * DPT-3064 - Support concatenated arrays + + -- Jim Belton Wed, 18 Jun 2025 14:54:42 +0000 + +libkit (2.8) stable; urgency=low + * DPT-3415 - Handle cached timezones with no tzdata + + -- Brian Somers Tue, 6 May 2025 21:11:42 +0000 + +libkit (2.7) stable; urgency=low + * DPT-3261 - sxe_jitson_stack_push_string_reversed + * DPT-1364 - Added new guid, deviceid and strto utility functions + * DPT-3307 - Support the SXE_JITSON_TYPE_REVERSED flag + + -- Jim Belton Mon, 24 Mar 2025 15:21:42 +0000 + +libkit (2.6) stable; urgency=low + * DPT-3232 - Support Intersect operator in WHERE + + -- Jim Belton Thu, 20 Feb 2025 14:50:42 +0000 + +libkit (2.5) stable; urgency=low + * DPT-3141 - sxe-jitson: yet another unicode bug + + -- Jim Belton Wed, 8 Jan 2025 12:22:42 +0000 + +libkit (2.4) stable; urgency=low + * DPT-3052 - Further fix to unicode parsing + * DPT-3072 - Reduce lock contentions during sxe-jitson object/array indexing + + -- Jim Belton Tue, 03 Dec 2024 01:03:42 +0000 + +libkit (2.3) stable; urgency=low + * DPT-2866 - Make object indexing thread safe + * DPT-2771 - Deprecate support for Debian-10 + * DPT-3052 - Don't overallocate UTF-8 strings + + -- Jim Belton Tue, 12 Nov 2024 12:21:42 +0000 + +libkit (2.2) stable; urgency=low + * DPT-2828 - Add a build-number suffix to package names + * DPT-2930 - Optimize tensor calculations + * DPT-2866 - Try to make indexing thread safe + + -- Brian Somers Sat, 7 Sep 2024 00:29:42 +0000 + +libkit (2.1) stable; urgency=low + * DPT-2823 - SXE_TIME values are not Y2038 safe + * DPT-2833 - Review security hotspots in sonarqube for libkit + * DPT-2909 - Dereference operator arguments + * THR-4904 - Remove memory allocations from kit_tensor_matmul + + -- Jim Belton Wed, 18 Jul 2024 17:16:42 +0000 + +libkit (2.00) stable; urgency=low + * DPT-2718 - Combine nonevent-driven libsxe modules into libkit + * DPT-2719 - Create a libkit-2.0 debian package build + * DPT-2739 - Integrate libkit-2.0 dpkg into opendnscache + * DPT-2745 - Core Dump in sxe_jitson_is_reference + * DPT-2787 - libkit should build on ubuntu 22.04 and debian 12 + * DPT-2793 - Build .deb packages that aptly 1.3 supports + * DPT-2742 - Changes to allow external casts to use jitson string functions + * DPT-2817 - Silence libkit sonarQube noise + + -- Jim Belton Wed, 03 Jul 2024 01:38:17 +0000 + +libkit (1.20) stable; urgency=low + * DPT-2716 - Clean up code smells + + -- Brian Somers Thu, 14 Sep 2023 22:10:42 +0000 + +libkit (1.19) stable; urgency=low + * DPT-2479 - Add tensor math operations code into libkit + + -- Dejan Donin Wed, 8 May 2024 13:40:42 +0000 + +libkit (1.17) stable; urgency=low + * Added iana.org/time-zones tzcode (version 2023c) + * DPT-2183 - Build tzcode and a kit-timezone wrapper + * DPT-2501 - Add a graphite-thread-started callback + + -- Jim Belton Mon, 19 Feb 2024 18:36:42 +0000 + +libkit (1.16) stable; urgency=low + * DPT-2266 - New MOCKERROR macro + + -- Sean Mooney Tue, 19 Dec 2023 22:23:00 +0000 + +libkit (1.14) stable; urgency=low + * DPT-2262 - Allow graphitelog frequency to be changed quickly + * DPT-1574 - Graphite log write timeout set to log_timeout_ms + + -- Jim Belton Tue, 17 Oct 2023 00:14:01 +0000 + +libkit (1.12) stable; urgency=low + * DPT-2245 - Report graphite statistics relative to wall time + + -- Brian Somers Thu, 14 Sep 2023 22:10:42 +0000 + +libkit (1.11) stable; urgency=low + * DPT-2096 - Make malloc guard bytes work with libssl3 + + -- Jim Belton Tue, 8 Aug 2023 10:35:42 +0000 + +libkit (1.8) stable; urgency=low + * DPT-2004 - Correct an encoding error + * DPT-2052 - Introduce ASAN - the address sanitizer + * DPT-2046 - kit_memory should support guard words + * DPT-2036 - Add additional checks in opendnscache code + * DPT-2046 - Disable memory guard words in the debug build + + -- Brian Somers Fri, 23 Jun 2023 05:08:42 +0000 + +libkit (1.7) stable; urgency=low + * DPT-1946 - Make kit_stub_resolve() more robust + + -- Brian Somers Tue, 9 May 2023 23:37:42 +0000 + +libkit (1.6) stable; urgency=low + * DPT-1699 - Add kit_bool_from_str() + + -- Brian Somers Mon, 24 Apr 2023 17:21:42 +0000 + +libkit (1.5) stable; urgency=low + * DPT-1854 - Remove ZEUS artifacts + + -- Brian Somers Tue, 4 Apr 2023 19:06:42 +0000 + +libkit (1.4) stable; urgency=low + * DPT-1769 - Add support for Ubuntu-22.04 + + -- Prashanth Suvarna Tue, 28 Mar 2023 03:17:42 +0000 + +libkit (1.3) stable; urgency=low + * DPT-1812 - ok to call kit_memory_allocations before kit_counters_initialize + + -- Jim Belton Mon, 20 Mar 2023 16:20:42 +0000 + +libkit (1.2) stable; urgency=low + * DPT-1632 - SCR0: Handle hardcoded references to opendns.com + + -- Jake Zhang Wed, 1 Mar 2023 22:14:42 +0000 + +libkit (1.1) stable; urgency=low + * DPT-1771 - Fix policy server memory instability in kit-counters + + -- Jim Belton Fri, 24 Feb 2023 19:39:42 +0000 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..f599e28 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +10 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..7068f44 --- /dev/null +++ b/debian/control @@ -0,0 +1,18 @@ +Source: libkit +Maintainer: cie-eng.dns-platform@cisco.com +Section: devel +Priority: optional +Build-Depends: debhelper (>= 9) +Standards-Version: 4.7.0.0 + +Package: libkit-dev +Section: libdevel +Architecture: any +Depends: libkit (= ${binary:Version}), ${misc:Depends} +Description: Cisco C toolkit library headers + +Package: libkit +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: Cisco C toolkit shared library diff --git a/debian/libkit-dev.install b/debian/libkit-dev.install new file mode 100644 index 0000000..7d81eb1 --- /dev/null +++ b/debian/libkit-dev.install @@ -0,0 +1,4 @@ +libkit.a usr/lib +include/*.h usr/include/kit +libkit.so usr/lib/x86_64-linux-gnu +kit-alloc-analyze usr/bin diff --git a/debian/libkit.install b/debian/libkit.install new file mode 100644 index 0000000..7f8251c --- /dev/null +++ b/debian/libkit.install @@ -0,0 +1 @@ +libkit.so.* usr/lib/x86_64-linux-gnu diff --git a/debian/rules b/debian/rules new file mode 100644 index 0000000..ad5c05b --- /dev/null +++ b/debian/rules @@ -0,0 +1,20 @@ +#!/usr/bin/make -f + +export DEB_BUILD_MAINT_OPTIONS = hardening=+all +export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed + +%: + dh $@ + +override_dh_auto_test: + dh_auto_test --no-parallel + +# Required until aptly is upgrade to 1.5 or above +override_dh_builddeb: + dh_builddeb -- -Zxz + +override_dh_installchangelogs: + dh_installchangelogs -XChangeLog + +override_dh_strip: + @sync diff --git a/dependencies.mak b/dependencies.mak index 34b0b03..714486d 100644 --- a/dependencies.mak +++ b/dependencies.mak @@ -5,14 +5,24 @@ TOP.dir = $(COM.dir)/.. # This is used by both the package GNUmakefiles and the top level GNUmakefile # remove_to = $(if $(filter $(1),$(2)),$(call remove_to,$(1),$(wordlist 2,$(words $(2)),$(2))),$(2)) -ALL_LIBRARIES = kit +ALL_LIBRARIES = sxe-jitson kit sxe-dict sxe-cdb sxe-hash sxe-pool sxe-thread sxe-mmap sxe-list kit-alloc sxe-md5 sxe-util \ + sxe-log kit-mock sxe-test tzcode LIB_DEPENDENCIES = $(call remove_to,$(LIBRARIES),$(ALL_LIBRARIES)) +MAK_VERSION ?= 2 # By default, use the libtap package; set this to 1 to use libtap built in to libsxe CONVENTION_OPTOUT_LIST = lib-kit/kit-queue.h -MAKE_ALLOW_LOWERCASE_TYPEDEF = 1 +MAKE_ALLOW_LOWERCASE_TYPEDEF = 1 +MAKE_ALLOW_LOWERCASE_HASH_DEFINE = 1 +MAKE_ALLOW_SPACE_AFTER_ASTERISK = 1 + +COVERAGE_OPTOUT_LIST = lib-tzcode include $(TOP.dir)/mak/mak-common.mak -ifneq ($(MAK_VERSION),1) # Versions of mak > 1 use an external tap libary - LINK_FLAGS += -ltap +CFLAGS += -D_GNU_SOURCE=1 -D_FORTIFY_SOURCE=2 -pthread +LINK_DLLS += -lxxhash -lresolv -lrt -rdynamic -pthread -ldl +LINK_FLAGS += -pie -z noexecstack + +ifeq ($(OS_name), linux) +LINK_DLLS += -lbsd -ljemalloc endif diff --git a/dev-setup.sh b/dev-setup.sh new file mode 100644 index 0000000..43f95ca --- /dev/null +++ b/dev-setup.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +# This script should be run with sudo to add the dev packages required to build libkit + +set -e + +case $(uname -o) in +FreeBSD) +# jemalloc is the default memory allocator in FreeBSD +# PATH=/sbin:$PATH + +# for pkg in xxhash; do +# if ! pkg info -q $pkg ; then +# pkg install -y $pkg +# fi +# done + ;; + +GNU/Linux) + export DEBIAN_FRONTEND=noninteractive + apt-get -y install debhelper libbsd-dev libjemalloc-dev libxxhash-dev + ;; + +*) + echo "I don't know what I'm doing" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/lib-kit-alloc/GNUmakefile b/lib-kit-alloc/GNUmakefile new file mode 100644 index 0000000..9657238 --- /dev/null +++ b/lib-kit-alloc/GNUmakefile @@ -0,0 +1,4 @@ +LIBRARIES = kit-alloc + +include ../dependencies.mak + diff --git a/lib-kit/kit-alloc-private.h b/lib-kit-alloc/kit-alloc-private.h similarity index 96% rename from lib-kit/kit-alloc-private.h rename to lib-kit-alloc/kit-alloc-private.h index bfc4bdb..7eb06cd 100644 --- a/lib-kit/kit-alloc-private.h +++ b/lib-kit-alloc/kit-alloc-private.h @@ -26,6 +26,6 @@ #include "kit-alloc.h" -extern void kit_memory_init_internal(bool hard); +void kit_memory_initialize_counters(void); #endif diff --git a/lib-kit-alloc/kit-alloc.c b/lib-kit-alloc/kit-alloc.c new file mode 100644 index 0000000..8a4300d --- /dev/null +++ b/lib-kit-alloc/kit-alloc.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include // CONVENTION EXCLUSION: To support glibc memory growth tracking +#endif + +#include "kit-alloc-private.h" +#include "kit-mockfail.h" +#include "sxe-util.h" + +#ifdef __GLIBC__ +#pragma GCC diagnostic ignored "-Waggregate-return" // To allow use of glibc mallinfo +#endif + +/*- + * As of December 2012, the stock malloc implementation in linux is not + * clever enough to handle the allocations requirements of opendnscache. + * + * This can be demonstrated by monitoring the following data: + * + * VSZ of the process + * ps -axwwovsz,comm | sed -n 's, opednscache,,p' + * + * # Inflight allocations & total + * my %stats = map { m{^"(\S+)\s(.+)"$} } `dig +short txt stats.opendns.com \@127.0.0.1`; + * my $inflight = $stats{'memory.calloc'} + + * $stats{'memory.malloc'} - + * $stats{'memory.free'}; + * my $total = $stats{'memory.bytes'}; + * + * On a busy resolver running with -c 800000000, you will see $inflight + * settle at around 8,044,000 allocations and 1565 MB total. It will + * increase as pref files are loaded and then settle back to these numbers + * afterwards. + * + * When it is settled, using jemalloc, the process VSZ starts at about + * 2000 MB and increases to around 2450 MB after about 10 minutes. + * It stabilizes at this value. + * + * Using the stock system malloc, the process VSZ starts at about + * 2200 MB and increases to around 3219 MB in the first 10 minutes. + * After that, on a machine with 3GB of RAM, it starts to swap and + * takes so long to load a prefs file that there's another one there + * before it's done. + */ + +#define KIT_ALLOC_LOG(...) do { if (kit_memory_diagnostics) SXEL5(__VA_ARGS__); } while (0) + +struct kit_memory_guard { // If overflow detection is enabled, allocate on of these before the memory, and a stamp after + size_t size; // Size of memory allocated (rounded up) + void *stamp; // A stamp value which is the address of the allocated memory +}; + +static_assert(sizeof(struct kit_memory_guard) == 16, "Unexpected size for kit_memory_guard; must be a power of 2"); + +#if SXE_DEBUG // In a debug build of libkit, check for memory overflows by default + static unsigned kit_memory_flags = KIT_MEMORY_CHECK_OVERFLOWS; +#else // In the release build of libkit, overflow checking is disabled by default + static unsigned kit_memory_flags = 0; +#endif + +struct kit_memory_counters kit_memory_counters; // Global counter ids +size_t kit_memory_allocated_max = 0; // Tracks the high watermark ever allocated with jemalloc +int kit_memory_diagnostics = 0; // Set >0 to turn on kit memory log messages +__thread uint64_t kit_thread_allocated_last = 0; // Used by kit_memory_probe diagnostic macro +static bool kit_memory_initialized = false; // Set once initialized +static bool kit_memory_using_counters = false; // Set by kit_counters_initialize + +/* Global counters used until kit-counters are intialized. Not particularly thread safe. + */ +static unsigned long long count_bytes = 0; +static unsigned long long count_calloc = 0; +static unsigned long long count_fail = 0; +static unsigned long long count_free = 0; +static unsigned long long count_malloc = 0; +static unsigned long long count_realloc = 0; + +#ifdef __linux__ +static int proc_statm_fd = -1; // Open file descriptor on /proc//statm; must be opened before calling chroot +#endif + +static unsigned long long +counter_bytes_combine_handler(int threadnum) +{ + return threadnum <= 0 ? kit_allocated_bytes() : 0; +} + +/** + * Set flags that affect the behaviour of the kit memory management interface + * + * @param flags KIT_MEMORY_ABORT_ON_ENOMEM to abort on memory allocation failure (returns ENOMEM if unset) + * KIT_MEMORY_CHECK_OVERFLOWS to check for memory overflows on relloc/free and abort if found + * + * @note The flags must not be changed once this module has been initialized + */ +void +kit_memory_set_flags(unsigned flags) +{ + SXEA1(!kit_memory_initialized, "%s must be called before kit_memory_initialize or kit_counters_initialize", __func__); + SXEA1(count_calloc + count_malloc == 0, "%s must be called before any memory is allocated", __func__); + kit_memory_flags = flags; +} + +/** + * Set the behaviour of the kit memory management interface if an enomem error occurs + * + * @param assert_on_enomem true to abort on memory allocation failure, false (default) to return the error + */ +void +kit_memory_set_assert_on_enomem(bool assert_on_enomem) +{ + if (assert_on_enomem) + kit_memory_flags |= KIT_MEMORY_ABORT_ON_ENOMEM; + else + kit_memory_flags &= ~KIT_MEMORY_ABORT_ON_ENOMEM; +}; + +/** + * Initialize the kit memory management interface; this should done once, normally on an application wide basis + * + * @param flags KIT_MEMORY_ABORT_ON_ENOMEM to abort on memory allocation failure (returns ENOMEM if unset) + * KIT_MEMORY_CHECK_OVERFLOWS to check for memory overflows on relloc/free and abort if found + * + * @note This function should only be used if accurate counters are not required; preferably use kit_counters_initialize + */ +void +kit_memory_initialize(unsigned flags) +{ + SXEA1(!kit_memory_initialized, "Kit memory is already initialized"); + + if (flags != ~0U) { // If not being called by kit_counters_initialize + SXEA1(count_calloc + count_malloc == 0, "%s must be called before any memory is allocated", __func__); + kit_memory_flags = flags; + } + +#ifdef __linux__ + char proc_statm_path[PATH_MAX]; + int pid; + + pid = getpid(); + snprintf(proc_statm_path, sizeof(proc_statm_path), "/proc/%d/statm", pid); + proc_statm_fd = open(proc_statm_path, O_RDONLY); +#endif + +#if SXE_DEBUG + const char *KIT_ALLOC_DIAGNOSTICS = getenv("KIT_ALLOC_DIAGNOSTICS"); + + if (KIT_ALLOC_DIAGNOSTICS && KIT_ALLOC_DIAGNOSTICS[0] && KIT_ALLOC_DIAGNOSTICS[0] != '0') + kit_memory_diagnostics = 1; +#endif + + kit_memory_initialized = true; +} + +/* Called only from kit_counters_initialize + */ +void +kit_memory_initialize_counters(void) +{ + SXEA1(kit_memory_initialized, "Kit memory is not already initialized"); + + kit_memory_counters.bytes = kit_counter_reg_with_combine_handler("memory.bytes", counter_bytes_combine_handler); + kit_memory_counters.calloc = kit_counter_reg("memory.calloc"); + kit_memory_counters.fail = kit_counter_reg("memory.fail"); + kit_memory_counters.free = kit_counter_reg("memory.free"); + kit_memory_counters.malloc = kit_counter_reg("memory.malloc"); + kit_memory_counters.realloc = kit_counter_reg("memory.realloc"); + + /* Switch over to using the counters + */ + kit_counter_add(kit_memory_counters.bytes, count_bytes); + kit_counter_add(kit_memory_counters.calloc, count_calloc); + kit_counter_add(kit_memory_counters.fail, count_fail); + kit_counter_add(kit_memory_counters.free, count_free); + kit_counter_add(kit_memory_counters.malloc, count_malloc); + kit_counter_add(kit_memory_counters.realloc, count_realloc); + + kit_memory_using_counters = true; +} + +/** + * Determine number of outstanding memory allocations + * + * @return The number of memory allocations less the number of frees + * + * @note This function can be called before initializing kit_counters or kit_memory (e.g. at the start of a test program) + */ +uint64_t +kit_memory_allocations(void) +{ + if (!kit_memory_using_counters) + return count_calloc + count_malloc - count_free; + + return kit_counter_get(KIT_COUNTER_MEMORY_CALLOC) + kit_counter_get(KIT_COUNTER_MEMORY_MALLOC) + - kit_counter_get(KIT_COUNTER_MEMORY_FREE); +} + +/** + * Determine the size of memory with guard words + * + * @param size The size allocated + * @param alignment The alignment used or 0 if not allocated with memalign + * + * @return The amount of memory actually allocated; size if !(flags & KIT_MEMORY_CHECK_OVERFLOWS), otherwise > size + */ +size_t +kit_memory_size(size_t size, size_t alignment) +{ + if (kit_memory_flags & KIT_MEMORY_CHECK_OVERFLOWS) { + alignment = alignment >= sizeof(size_t) ? alignment : sizeof(size_t); + size_t offset = sizeof(struct kit_memory_guard) >= alignment ? sizeof(struct kit_memory_guard) : alignment; + size = offset + size + sizeof(void *); + } + + return size; +} + +static void +guard_initialize(char **result_in_out, size_t size, size_t alignment) +{ + if (*result_in_out && kit_memory_flags & KIT_MEMORY_CHECK_OVERFLOWS) { + ((struct kit_memory_guard *)*result_in_out)->size = size; + ((struct kit_memory_guard *)*result_in_out)->stamp = *result_in_out; + *(void **)(*result_in_out + size - sizeof(void *)) = *result_in_out; + + if (alignment > sizeof(struct kit_memory_guard)) { // Need a guard right before the address returned + char *near_guard = *result_in_out + alignment - sizeof(struct kit_memory_guard); + memcpy(near_guard, *result_in_out, sizeof(struct kit_memory_guard)); + *result_in_out += alignment; + } + else + *result_in_out += sizeof(struct kit_memory_guard); + } +} + +/* Internal memory allocator that supports jemalloc mallocx flags + */ +static void * +memory_alloc(size_t size, size_t alignment, int flags) +{ + char *result; + + size = kit_memory_size(size, alignment); + + if ((result = MOCKERROR(kit_malloc_diag, NULL, ENOMEM, mallocx(size, flags)))) + guard_initialize(&result, size, alignment); + else + SXEA1(!(kit_memory_flags & KIT_MEMORY_ABORT_ON_ENOMEM) , ": failed to allocate %zu bytes", size); + + return result; +} + +static void +count_malloc_increment(bool failed) +{ + if (kit_memory_using_counters) { + kit_counter_incr(KIT_COUNTER_MEMORY_MALLOC); + + if (failed) + kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); + } else { + count_malloc++; + + if (failed) + count_fail++; + } +} + +__attribute__((malloc)) void * +kit_malloc_diag(size_t size, const char *file, int line) +{ + char *result = memory_alloc(size, 0, 0); + count_malloc_increment(result == NULL); + KIT_ALLOC_LOG("%s: %d: %p = kit_malloc(%zu)", file, line, result, size); + return result; +} + +__attribute__((malloc)) void * +kit_memalign_diag(size_t alignment, size_t size, const char *file, int line) +{ + int lg_align = sxe_uint64_log2(alignment); + SXEA1(1ULL << lg_align == alignment, ": Alignment %zu is not a power of 2", alignment); + + void *result = memory_alloc(size, alignment, MALLOCX_LG_ALIGN(lg_align)); + count_malloc_increment(result == NULL); + KIT_ALLOC_LOG("%s: %d: %p = kit_memalign(%zu,%zu)", file, line, result, alignment, size); + return result; +} + +__attribute__((malloc)) void * +kit_calloc_diag(size_t num, size_t size, const char *file, int line) +{ + size_t total_size = num * size; + void *result = NULL; + + if (num && size && (total_size < num || total_size < size)) { + SXEA1(!(kit_memory_flags & KIT_MEMORY_ABORT_ON_ENOMEM) || result, ": %zu*%zu exceeds the maximum size %zu", num, size, (size_t)~0UL); + errno = ENOMEM; + } + else + result = memory_alloc(total_size, 0, MALLOCX_ZERO); + + SXEA1(!(kit_memory_flags & KIT_MEMORY_ABORT_ON_ENOMEM) || result, ": failed to allocate %zu %zu byte objects", num, size); + + if (kit_memory_using_counters) { + kit_counter_incr(KIT_COUNTER_MEMORY_CALLOC); + + if (result == NULL) + kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); + } else { + count_calloc++; + + if (result == NULL) + count_fail++; + } + + KIT_ALLOC_LOG("%s: %d: %p = kit_calloc(%zu, %zu)", file, line, result, num, size); + return result; +} + +static void +count_free_increment(void) +{ + if (kit_memory_using_counters) + kit_counter_incr(KIT_COUNTER_MEMORY_FREE); + else + count_free++; +} + +#define KIT_MEMORY_FREE ((void *)~0UL) // Internal size_out value to invalidate the fore guard before a free + +/** + * Check kit allocated memory for overflows + * + * @param ptr Pointer returned by a kit allocation + * @param size_out NULL, a pointer to store the size of the allocation or 0 if not bounds checking, or ~0UL (internal use) + * + * @return ptr if not checking overflows or a pointer to the guard structure + * + * @note Aborts if the guard stamps before or after the memory have been overwritten + */ +void * +kit_memory_check(void *ptr, size_t *size_out) +{ + if (!(kit_memory_flags & KIT_MEMORY_CHECK_OVERFLOWS)) { + if (size_out && size_out != KIT_MEMORY_FREE) + *size_out = 0; + + return ptr; + } + + struct kit_memory_guard *guard = (struct kit_memory_guard *)ptr - 1; + + if (guard->stamp != guard) { // This can happen if alignment > sizeof(kit_memory_guard) + SXEA1(((uintptr_t)guard->stamp & (2 * sizeof(struct kit_memory_guard) - 1)) == 0, + "Unaligned fore guard stamp at %p is not a multiple of %zu", guard->stamp, 2 * sizeof(struct kit_memory_guard)); + SXEA1(guard->stamp < (void *)guard, "Fore guard stamp %p must be less than unaligned guard location %p", guard->stamp, + guard); + guard = guard->stamp; // There should be another guard at the beginning of the malloced area + } + + SXEA1(guard->stamp == guard, "Fore guard stamp at %p corrupted", &guard->stamp); + SXEA1(*(void **)((char *)guard + guard->size - sizeof(void *)) == guard, "Rear guard stamp at %p corrupted", + (char *)guard + guard->size - sizeof(void *)); + + if (size_out == KIT_MEMORY_FREE) + guard->stamp = KIT_MEMORY_FREE; // Invalidate the fore guard (prior to freeing the memory) + else if (size_out) + *size_out = guard->size - ((uintptr_t)ptr - (uintptr_t)guard) - sizeof(void *); + + return guard; +} + +void +kit_free_diag(void *ptr, const char *file, int line) +{ + if (ptr) { + KIT_ALLOC_LOG("%s: %d: kit_free(%p)", file, line, ptr); + SXEA1(!(((long)ptr) & (sizeof(void *) - 1)), "ungranular free(%p)", ptr); + count_free_increment(); + ptr = kit_memory_check(ptr, KIT_MEMORY_FREE); // Get back the actual memory allocated if overflow checking + dallocx(ptr, 0); + } +#if SXE_DEBUG + else if (kit_memory_diagnostics > 1) + SXEL6("%s: %d: kit_free((nil))", file, line); +#endif +} + +/*- + * Special case for realloc() since it can behave like malloc() or free() ! + */ +void * +kit_realloc_diag(void *ptr, size_t size, const char *file, int line) +{ + char *result = NULL; + void *optr = ptr; + size_t osize = size; + + if (!ptr) + result = memory_alloc(size, 0, 0); + else if (size) { + ptr = kit_memory_check(ptr, NULL); // Get back the actual memory allocated (even if bounds checking) + size_t alignment = (char *)optr - (char *)ptr; + size = kit_memory_size(size, alignment); // Keep alignment padding consistent + result = MOCKERROR(kit_realloc_diag, NULL, ENOMEM, rallocx(ptr, size, 0)); + + guard_initialize(&result, size, alignment); + } + else { + ptr = kit_memory_check(ptr, KIT_MEMORY_FREE); // Get back the actual memory allocated if overflow checking + dallocx(ptr, 0); + } + + if (result == NULL && (size || ptr == NULL)) { + SXEA1(!(kit_memory_flags & KIT_MEMORY_ABORT_ON_ENOMEM), ": failed to reallocate object to %zu bytes", size); + + if (kit_memory_using_counters) + kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); + else + count_fail++; + } + + if (ptr == NULL && result != NULL) + count_malloc_increment(false); // Non-failure + else if (ptr && size == 0 && result == NULL) + count_free_increment(); + else if (kit_memory_using_counters) + kit_counter_incr(KIT_COUNTER_MEMORY_REALLOC); + else + count_realloc++; + + KIT_ALLOC_LOG("%s: %d: %p = kit_realloc(%p, %zu)", file, line, result, optr, osize); + return result; +} + +void * +kit_reduce_diag(void *ptr, size_t size, const char *file, int line) +{ + if (ptr == NULL) { + SXEA1(!size, "Cannot kit_reduce() NULL to size %zu", size); + return NULL; + } + + void *result = kit_realloc_diag(ptr, size, file, line); + + if (!result && size) { // Realloc failed (not a free) + kit_counter_decr(KIT_COUNTER_MEMORY_REALLOC); // Memory was not reallocated + return ptr; + } + + return result; +} + +__attribute__((malloc)) char * +kit_strdup_diag(const char *txt, const char *file, int line) +{ + size_t len = strlen(txt); + void *result = memory_alloc(len + 1, 0, 0); + + if (result) + memcpy(result, txt, len + 1); + + count_malloc_increment(result == NULL); + KIT_ALLOC_LOG("%s: %d: %p = kit_strdup(%p[%zu])", file, line, result, txt, len + 1); + return result; +} + +__attribute__((malloc)) char * +kit_strndup_diag(const char *txt, size_t size, const char *file, int line) +{ + size_t len = strnlen(txt, size); + char *result = memory_alloc(len + 1, 0, 0); + + if (result) { + memcpy(result, txt, len); + result[len] = '\0'; + } + + count_malloc_increment(result == NULL); + KIT_ALLOC_LOG("%s: %d: %p = kit_strndup(%p[%zu])", file, line, result, txt, len + 1); + return result; +} + +size_t +kit_allocated_bytes(void) +{ + static bool kit_mib_init = false; // Set to true once memory mib ids are initialized + static size_t kit_epoch_mib[1]; // Binary mib id of the "epoch" + static size_t kit_epoch_mib_len = 1; // Number of elements in the mib id + static size_t kit_allocated_mib[2]; // Binary mib id of "stats.allocated" + static size_t kit_allocated_mib_len = 2; // Number of elements in the mib id + size_t len, alloc; + uint64_t epoch; + + if (!kit_mib_init) { + // To optimize collection of je_malloc statistics, get binary MIB ids + // + SXEA1(!mallctlnametomib("epoch", kit_epoch_mib, &kit_epoch_mib_len) + && !mallctlnametomib("stats.allocated", kit_allocated_mib, &kit_allocated_mib_len), + "Failed to generated binary mib id for je_malloc's epoch or stats.allocated"); + kit_mib_init = true; + } + + epoch = 1; + len = sizeof(epoch); + mallctlbymib(kit_epoch_mib, kit_epoch_mib_len, &epoch, &len, &epoch, len); + + len = sizeof(alloc); + mallctlbymib(kit_allocated_mib, kit_allocated_mib_len, &alloc, &len, NULL, 0); + + if (alloc > kit_memory_allocated_max) + kit_memory_allocated_max = alloc; + + return alloc; +} + +uint64_t +kit_thread_allocated_bytes(void) +{ + static __thread uint64_t *allocatedp, *deallocatedp; + size_t len; + + if (!allocatedp) { + len = sizeof(allocatedp); + mallctl("thread.allocatedp", &allocatedp, &len, NULL, 0); + SXEA1(allocatedp, "Couldn't obtain thread.allocatedp, is jemalloc built with --enable-stats?"); + } + + if (!deallocatedp) { + len = sizeof(deallocatedp); + mallctl("thread.deallocatedp", &deallocatedp, &len, NULL, 0); + SXEA1(deallocatedp, "Couldn't obtain thread.deallocatedp, is jemalloc built with --enable-stats?"); + } + + return *allocatedp - *deallocatedp; +} + +bool +kit_memory_log_growth(__printflike(1, 2) int (*printer)(const char *format, ...)) +{ + static size_t jemalloc_allocated_max = 0; + size_t jemalloc_allocated_cur = kit_allocated_bytes(); + bool growth = false; + + if (jemalloc_allocated_cur > jemalloc_allocated_max) { + (*printer)("Maximum memory allocated via jemalloc %zu (previous maximum %zu)\n", jemalloc_allocated_cur, + jemalloc_allocated_max); + jemalloc_allocated_max = jemalloc_allocated_cur; + growth = true; + } + +#ifdef __GLIBC__ + static size_t glibc_allocated_max = 0; // High watermark of glibc bytes allocated +#if __GLIBC_MINOR__ >= 33 + struct mallinfo2 glibc_mallinfo = mallinfo2(); +#else + struct mallinfo glibc_mallinfo = mallinfo(); +#endif + + if ((size_t)glibc_mallinfo.uordblks > glibc_allocated_max) { + /* COVERAGE EXCLUSION - This code can't be reached as long as malloc is correctly revectored to jemalloc */ + (*printer)("Maximum memory allocated via glibc %zu (previous maximum %zu)\n", /* COVERAGE EXCLUSION - Can't happen */ + (size_t)glibc_mallinfo.uordblks, glibc_allocated_max); + glibc_allocated_max = (size_t)glibc_mallinfo.uordblks; /* COVERAGE EXCLUSION - Can't happen */ + growth = true; /* COVERAGE EXCLUSION - Can't happen */ + } +#endif + +#ifdef __linux__ + static size_t rss_max = 0; // High watermark of RSS pages allocated + long long rss_cur; + char buf[256]; // Buffer for /proc//statm contents (e.g. "14214701 8277571 1403 269 0 14138966 0") + ssize_t len; + const char *rss_str; + char *end_ptr; + + // If the /proc//statm is open, seek to start and read contents + if (proc_statm_fd >= 0 && lseek(proc_statm_fd, 0, SEEK_SET) != (off_t)-1 + && (len = read(proc_statm_fd, buf, sizeof(buf) - 1)) >= 0) { + buf[len] = '\0'; + + if ((rss_str = strchr(buf, ' '))) { // Find the RSS number in the string + rss_str++; + + // Try to convert to a number and, if a new maximum, log it + if ((rss_cur = strtoll(rss_str, &end_ptr, 10)) > 0 && *end_ptr == ' ' && (size_t)rss_cur > rss_max) { + (*printer)("Maximum memory allocated in RSS pages %zu (previous maximum %zu)\n", (size_t)rss_cur, rss_max); + rss_max = (size_t)rss_cur; + growth = true; + } + } + } +#endif + + return growth; +} + +struct printer_visitor { + int written; + __printflike(1, 2) int (*printer)(const char *format, ...); +}; + +static void +memory_stats_line(void *visitor_void, const char *line) +{ + struct printer_visitor *visitor = visitor_void; + + if (visitor->written < 0) + return; /* COVERAGE EXCLUSION - Test printer failure case */ + + int written = (*visitor->printer)("%s", line); + + if (written >= 0) + visitor->written += written; + else + visitor->written = written; /* COVERAGE EXCLUSION - Test printer failure case */ +} + +bool +kit_memory_log_stats(__printflike(1, 2) int (*printer)(const char *format, ...), const char *options) +{ + struct printer_visitor visitor; + + visitor.written = 0; + visitor.printer = printer; + malloc_stats_print(memory_stats_line, &visitor, options ?: "gblxe"); + return visitor.written > 0; +} diff --git a/lib-kit-alloc/kit-alloc.h b/lib-kit-alloc/kit-alloc.h new file mode 100644 index 0000000..b397fed --- /dev/null +++ b/lib-kit-alloc/kit-alloc.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef KIT_ALLOC_H +#define KIT_ALLOC_H + +#include +#include +#include + +#include "kit-counters.h" +#include "sxe-log.h" + +/*- + * The packaged version of jemalloc is built with --enable-fill. You can enable junk filling in your debug builds by defining: + * const char *malloc_conf = KIT_MALLOC_CONF_JUNK_FILL; + */ +#define KIT_MALLOC_CONF_JUNK_FILL "junk:true" + +#define KIT_MEMORY_ABORT_ON_ENOMEM 0x00000001 // Abort if an allocate call returns ENOMEM (out of memory) +#define KIT_MEMORY_CHECK_OVERFLOWS 0x00000002 // Add guard words around allocations and check them on realloc/free + +struct kit_memory_counters { + kit_counter_t bytes; + kit_counter_t calloc; + kit_counter_t fail; + kit_counter_t free; + kit_counter_t malloc; + kit_counter_t realloc; +}; + +extern struct kit_memory_counters kit_memory_counters; +extern size_t kit_memory_max_allocated; +extern int kit_memory_diagnostics; // Set > 0 to turn on kit memory log messages if in debug mode + +#define KIT_COUNTER_MEMORY_BYTES (kit_memory_counters.bytes) +#define KIT_COUNTER_MEMORY_CALLOC (kit_memory_counters.calloc) +#define KIT_COUNTER_MEMORY_FAIL (kit_memory_counters.fail) +#define KIT_COUNTER_MEMORY_FREE (kit_memory_counters.free) +#define KIT_COUNTER_MEMORY_MALLOC (kit_memory_counters.malloc) +#define KIT_COUNTER_MEMORY_REALLOC (kit_memory_counters.realloc) + +// Use bin/kit-alloc-analyze to parse kit-alloc diagnostics lines + +#define KIT_ALLOC_SET_LOG(n) do { kit_memory_diagnostics = (n); } while (0) // Turn kit memory log messages on/off in debug + +#define kit_malloc(size) kit_malloc_diag(size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_memalign(al, size) kit_memalign_diag(al, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_reduce(ptr, size) kit_reduce_diag(ptr, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_strdup(txt) kit_strdup_diag(txt, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_calloc(num, size) kit_calloc_diag(num, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_realloc(ptr, size) kit_realloc_diag(ptr, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_free(ptr) kit_free_diag(ptr, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ +#define kit_strndup(txt, size) kit_strndup_diag(txt, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ + +extern void kit_memory_set_flags(unsigned flags); +extern void kit_memory_set_assert_on_enomem(bool assert_on_enomem); +extern void kit_memory_initialize(unsigned flags); +extern size_t kit_allocated_bytes(void); +extern uint64_t kit_thread_allocated_bytes(void); +extern uint64_t kit_memory_allocations(void); +extern void *kit_memory_check(void *ptr, size_t *size_out); +extern size_t kit_memory_size(size_t size_alloced, size_t alignment_or_0); +extern __attribute__((malloc)) void *kit_malloc_diag(size_t size, const char *file, int line); +extern __attribute__((malloc)) void *kit_memalign_diag(size_t alignment, size_t size, const char *file, int line); +extern void *kit_reduce_diag(void *ptr, size_t size, const char *file, int line); +extern __attribute__((malloc)) char *kit_strdup_diag(const char *txt , const char *file, int line); +extern __attribute__((malloc))void *kit_calloc_diag(size_t num, size_t size , const char *file, int line); +extern void *kit_realloc_diag(void *ptr, size_t size, const char *file, int line); +extern void kit_free_diag(void *ptr, const char *file, int line); +extern __attribute__((malloc)) char *kit_strndup_diag(const char *txt, size_t size , const char *file, int line); +extern bool kit_memory_log_growth(__printflike(1, 2) int (*printer)(const char *format, ...)); +extern bool kit_memory_log_stats(__printflike(1, 2) int (*printer)(const char *format, ...), const char *options); + +/* The following functions are for DPT-2036. Their use should be thoughtful and not just part of a kit alloc pattern. + * They should be used in _init and _fini functions, but not in _new and _free. + */ + +static inline void +kit_free_safe(void **mem_ptr) +{ + SXEA6(mem_ptr, "A valid address is required"); + kit_free(*mem_ptr); + *mem_ptr = NULL; +} + +static inline void * +kit_malloc_safe(const void *mem_ptr, size_t size) +{ + SXEL2A6(mem_ptr == NULL, "Internal error: Memory location for allocation is not NULL."); + return kit_malloc(size); +} + +static inline void * +kit_calloc_safe(const void *mem_ptr, size_t num, size_t size) +{ + SXEL2A6(mem_ptr == NULL, "Internal error: Memory location for (c)allocation is not NULL."); + return kit_calloc(num, size); +} + +/* The following macro is useful for finding allocations and frees in third party libraries that don't use the kit interface + */ +extern __thread uint64_t kit_thread_allocated_last; + +#define kit_memory_probe() do { \ + if (kit_thread_allocated_bytes() != kit_thread_allocated_last) { \ + SXEL7(": Thread memory %s by %"PRIu64" to %"PRIu64" (%s:%d)", \ + kit_thread_allocated_bytes() > kit_thread_allocated_last ? "grew" : "shrank", \ + kit_thread_allocated_bytes() > kit_thread_allocated_last ? kit_thread_allocated_bytes() - kit_thread_allocated_last \ + : kit_thread_allocated_last - kit_thread_allocated_bytes(), \ + kit_thread_allocated_bytes(), __FILE__, __LINE__); \ + kit_thread_allocated_last = kit_thread_allocated_bytes(); \ + } \ +} while (0) + +#endif diff --git a/lib-kit/kit-counters.c b/lib-kit-alloc/kit-counters.c similarity index 86% rename from lib-kit/kit-counters.c rename to lib-kit-alloc/kit-counters.c index 3c2eff1..546c353 100644 --- a/lib-kit/kit-counters.c +++ b/lib-kit-alloc/kit-counters.c @@ -41,7 +41,7 @@ #include #include -#define COUNTER_ISVALID(c) (((c) != INVALID_COUNTER) && ((c) <= num_counters)) +#define COUNTER_ISVALID(c) (((c) != KIT_COUNTERS_INVALID) && ((c) <= num_counters)) // Counters states #define COUNTER_DYNAMIC 1 @@ -51,14 +51,14 @@ static struct combine_handler { kit_counter_t counter; unsigned long long (*handler)(int); -} combine_handlers[MAXCOUNTERS]; +} combine_handlers[KIT_COUNTERS_MAX]; static bool initialized = false; static bool thread0_initialized = false; static bool allow_shared = false; -static kit_mibfn_t mibfns[MAXCOUNTERS]; -static kit_counter_t sorted_index[MAXCOUNTERS]; -static char *counter_txt[MAXCOUNTERS]; +static kit_mibfn_t mibfns[KIT_COUNTERS_MAX]; +static kit_counter_t sorted_index[KIT_COUNTERS_MAX]; +static const char *counter_txt[KIT_COUNTERS_MAX]; static unsigned max_counters; static unsigned num_counters; static unsigned num_handlers; @@ -100,13 +100,10 @@ add_counter(const char *txt, unsigned long long (*combine_handler)(int), kit_mib { kit_counter_t counter; - if (!kit_memory_is_initialized()) // Make sure memory counters are initialized before calling kit-alloc functions - kit_memory_init_internal(false); // Do a soft initialize on the counters - counter = ++num_counters; - SXEA1(counter < MAXCOUNTERS, "Counter %d exceeds MAXCOUNTERS (%d).", counter, MAXCOUNTERS); + SXEA1(counter < KIT_COUNTERS_MAX, "Counter %d exceeds KIT_COUNTERS_MAX (%d).", counter, KIT_COUNTERS_MAX); SXEA1(!counter_txt[counter], "Adding counter %d with value '%s' when it already has a value '%s'.", counter, txt, counter_txt[counter]); - SXEA1(counter_txt[counter] = kit_strdup(txt), "Failed to allocate %zu bytes for counter text.", strlen(txt)); + counter_txt[counter] = txt; add_to_sorted_index(counter, counter_txt[counter]); if (combine_handler) { @@ -143,24 +140,47 @@ kit_counter_txt(kit_counter_t c) return COUNTER_ISVALID(c) ? counter_txt[c] : NULL; } -/* Assign and return a new counter */ +/** + * Register and return a new counter + * + * @param txt Name of the counter (e.g. memory.malloc) + * + * @note The txt is not duplicated, so it must be static or in memory reserved by the caller for the lifetime of the counter + */ kit_counter_t -kit_counter_new(const char *txt) +kit_counter_reg(const char *txt) { + SXEA1(txt, "Can't register a counter with no text."); return add_counter(txt, NULL, NULL); } -/* Assign and return a new counter, specifying a combine handler */ +/** + * Register and return a new counter, specifying a combine handler + * + * @param txt Name of the counter (e.g. memory.bytes) + * @param combine_handler Pointer to a function that combines the per thread values (e.g. takes the maximum) + * + * @note The txt is not duplicated, so it must be static or in memory reserved by the caller for the lifetime of the counter + */ kit_counter_t -kit_counter_new_with_combine_handler(const char *txt, unsigned long long (*combine_handler)(int)) +kit_counter_reg_with_combine_handler(const char *txt, unsigned long long (*combine_handler)(int)) { + SXEA1(txt, "Can't register a counter with no text."); return add_counter(txt, combine_handler, NULL); } -/* Assign and return a new counter, specifying a mib function */ +/** + * Register and return a new counter, specifying a mib function + * + * @param txt Name of the counter (e.g. memory.bytes) + * @param mibfn Pointer to a function that allows special handling of the mib text + * + * @note The txt is not duplicated, so it must be static or in memory reserved by the caller for the lifetime of the counter + */ kit_counter_t -kit_counter_new_with_mibfn(const char *txt, kit_mibfn_t mibfn) +kit_counter_reg_with_mibfn(const char *txt, kit_mibfn_t mibfn) { + SXEA1(txt, "Can't register a counter with no text."); return add_counter(txt, NULL, mibfn); } @@ -189,7 +209,7 @@ kit_counter_incr(kit_counter_t c) return; if (kit_counters_are_shared()) { - if (c == INVALID_COUNTER) // Make sure unitialized shared counters don't slow us down + if (c == KIT_COUNTERS_INVALID) // Make sure unitialized shared counters don't slow us down shared_counters.val[c]++; else __atomic_add_fetch(&shared_counters.val[c], 1, __ATOMIC_SEQ_CST); @@ -205,7 +225,7 @@ kit_counter_decr(kit_counter_t c) return; if (kit_counters_are_shared()) { - if (c == INVALID_COUNTER) // Make sure unitialized shared counters don't slow us down + if (c == KIT_COUNTERS_INVALID) // Make sure unitialized shared counters don't slow us down shared_counters.val[c]--; else __atomic_add_fetch(&shared_counters.val[c], -1LL, __ATOMIC_SEQ_CST); @@ -217,11 +237,9 @@ kit_counter_decr(kit_counter_t c) /** * Initialize counters * - * @params counts Maximum number of counters supported; defaults to MAXCOUNTERS and currently aborts if greater + * @params counts Maximum number of counters supported; defaults to KIT_COUNTERS_MAX and currently aborts if greater * @params threads Maximum number of threads supported; more can be requested with kit_counters_prepare_dynamic_threads * @params allow_sharing True (default) to allow shared counters (which are slower) to be used after initialization - * - * @notes Counters may be used at startup by the main thread before this function is called. */ void kit_counters_initialize(unsigned counts, unsigned threads, bool allow_sharing) @@ -229,9 +247,11 @@ kit_counters_initialize(unsigned counts, unsigned threads, bool allow_sharing) unsigned i; SXEE6("(counts=%u,threads=%u,allow_sharing=%s)", counts, threads, allow_sharing ? "true" : "false"); - SXEA1(counts <= MAXCOUNTERS, "Currently, counts cannot be greater than %u", MAXCOUNTERS); - SXEA1(threads, "At least one counter slot is required"); - SXEA1(!initialized, "%s(): Already initialized!", __FUNCTION__); + SXEA1(counts <= KIT_COUNTERS_MAX, "Currently, counts cannot be greater than %u", KIT_COUNTERS_MAX); + SXEA1(threads, "At least one counter slot is required"); + SXEA1(!initialized, "Already initialized!"); + + kit_memory_initialize(~0U); // Initialize memory with default flags or flags set with kit_memory_flags_set if (!thread0_initialized) { // If no counter has been touched SXEA6(!thread_counters, "Per thread counters are set without initializing the main thread"); @@ -239,7 +259,7 @@ kit_counters_initialize(unsigned counts, unsigned threads, bool allow_sharing) thread0_initialized = true; } - SXEA6(!all_counters && !counter_state, "%s(): Partially initialized!", __FUNCTION__); + SXEA6(!all_counters && !counter_state, "Partially initialized!"); pthread_spin_init(&counter_lock, PTHREAD_PROCESS_PRIVATE); initialized = true; max_counters = counts; @@ -262,6 +282,7 @@ kit_counters_initialize(unsigned counts, unsigned threads, bool allow_sharing) SXEA1(all_counters[i] = kit_calloc(1, sizeof(*all_counters[i])), "Failed to allocate %zu bytes for a thread counter block", maxthreads * sizeof(*all_counters[i])); + kit_memory_initialize_counters(); // Tell the memory system to switch over to using counters SXER6("return"); } @@ -392,7 +413,7 @@ kit_counter_add(kit_counter_t c, unsigned long long value) return; if (kit_counters_are_shared()) { - if (c == INVALID_COUNTER) // Make sure unitialized shared counters don't slow us down + if (c == KIT_COUNTERS_INVALID) // Make sure unitialized shared counters don't slow us down shared_counters.val[c] += value; else __atomic_add_fetch(&shared_counters.val[c], value, __ATOMIC_SEQ_CST); @@ -404,7 +425,7 @@ kit_counter_add(kit_counter_t c, unsigned long long value) unsigned long long kit_counter_get(kit_counter_t c) { - return c == INVALID_COUNTER ? 0 : kit_counter_get_data(c, -1); + return c == KIT_COUNTERS_INVALID ? 0 : kit_counter_get_data(c, -1); } void @@ -446,8 +467,8 @@ void kit_counters_prepare_dynamic_threads(unsigned count) { struct kit_counters **ncounters, **ocounters; - uint8_t *nstate, nthreads, *ostate; - unsigned done, i; + unsigned done, i, nthreads; + uint8_t *nstate, *ostate; SXEA6(initialized, "Counters not yet initialized"); @@ -514,7 +535,7 @@ kit_mibintree(const char *tree, const char *mib) { size_t len = strlen(tree); - return memcmp(tree, mib, len) == 0 && (len == 0 || mib[len] == '\0' || mib[len] == '.'); + return strncmp(tree, mib, len) == 0 && (len == 0 || mib[len] == '\0' || mib[len] == '.'); } void diff --git a/lib-kit/kit-counters.h b/lib-kit-alloc/kit-counters.h similarity index 86% rename from lib-kit/kit-counters.h rename to lib-kit-alloc/kit-counters.h index 0f08fc2..a7866ec 100644 --- a/lib-kit/kit-counters.h +++ b/lib-kit-alloc/kit-counters.h @@ -24,10 +24,12 @@ #ifndef KIT_COUNTERS_H #define KIT_COUNTERS_H -#define MAXCOUNTERS 600 -#define INVALID_COUNTER 0 // Uninitialized counters hopefully have this value -#define COUNTER_FLAG_NONE 0x00 -#define COUNTER_FLAG_SUMMARIZE 0x01 +#include + +#define KIT_COUNTERS_MAX 600 +#define KIT_COUNTERS_INVALID 0 // Uninitialized counters hopefully have this value +#define KIT_COUNTERS_FLAG_NONE 0x00 +#define KIT_COUNTERS_FLAG_SUMMARIZE 0x01 /* Special values that can be passed to kit_counter_get_data as threadnum */ @@ -38,7 +40,7 @@ #include struct kit_counters { - unsigned long long val[MAXCOUNTERS]; + unsigned long long val[KIT_COUNTERS_MAX]; }; typedef unsigned kit_counter_t; diff --git a/lib-kit-alloc/kit-test.h b/lib-kit-alloc/kit-test.h new file mode 100644 index 0000000..f0c023e --- /dev/null +++ b/lib-kit-alloc/kit-test.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +/* Boiler-plate code for libkit/libtap based test programs + */ +#ifndef KIT_TEST_H +#define KIT_TEST_H 1 + +#include +#include +#include + +#define KIT_TEST_SUCCESS + +/* If a test program is built with the SXE_DEBUG flag set, enable jemalloc junk filling to catch bugs + */ +#if SXE_DEBUG +const char *malloc_conf = KIT_MALLOC_CONF_JUNK_FILL; +#endif + +/** + * Plan a number of tap tests, run under memory leak checking + * + * @param num_tests Number of tests + * + * @note Must be followed by a single matching kit_test_exit call + */ +#define kit_test_plan(num_tests) \ +{ \ + tap_plan((num_tests) + 1, TAP_FLAG_LINE_ON_OK, NULL); \ + uint64_t kit_start_allocations = kit_memory_allocations(); + +/** + * Test intermediate number of allocations + * + * @param exp_alloc Expected number of unfreed allocations at the current point in the test + * + * @note This counts as one of your tap tests + */ +#define kit_allocation_test(exp_alloc) \ + is(kit_memory_allocations(), kit_start_allocations + (exp_alloc), "Got expected number of allocations") + +/** + * Complete tap tests run under memory leak checking + * + * @param exp_alloc Expected number of unfreed allocations (normally 0) + * + * @note Must followed a single matching kit_test_plan call + */ +#define kit_test_exit(exp_alloc) \ + is(kit_memory_allocations() - kit_start_allocations, (exp_alloc), "All memory allocations were freed after tests"); \ + return exit_status(); \ +} + +#endif diff --git a/lib-kit-alloc/test/test-kit-alloc-no-counters.c b/lib-kit-alloc/test/test-kit-alloc-no-counters.c new file mode 100644 index 0000000..2a0dfd2 --- /dev/null +++ b/lib-kit-alloc/test/test-kit-alloc-no-counters.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" + +int +main(void) +{ + void *mem; + + plan_tests(6); + kit_memory_initialize(0); + + MOCKFAIL_START_TESTS(3, kit_malloc_diag); + is(kit_malloc(16), NULL, "Test malloc failure"); + is(kit_calloc(1, 16), NULL, "Test calloc failure"); + is(kit_realloc(NULL, 16), NULL, "Test realloc failure"); + MOCKFAIL_END_TESTS(); + +#if SXE_DEBUG || SXE_COVERAGE +# define EXP_ALLOCS 2 +#else +# define EXP_ALLOCS 0 +#endif + + is(kit_memory_allocations(), EXP_ALLOCS, "Failed allocations count as allocations"); + ok(mem = kit_memalign_diag(16, 16, "file", 1), "Allocated memory using kit_memalign_diag SSL callback function"); + kit_free(mem); + is(kit_memory_allocations(), EXP_ALLOCS, "kit_memalign and kit_free balance eachother out"); + + return exit_status(); +} diff --git a/lib-kit/test/test-kit-alloc.c b/lib-kit-alloc/test/test-kit-alloc.c similarity index 52% rename from lib-kit/test/test-kit-alloc.c rename to lib-kit-alloc/test/test-kit-alloc.c index 97050d1..63fecb2 100644 --- a/lib-kit/test/test-kit-alloc.c +++ b/lib-kit-alloc/test/test-kit-alloc.c @@ -21,16 +21,22 @@ * SPDX-License-Identifier: MIT */ -#include -#include #include #include #include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-test.h" // To enable junk filling when build with SXE_DEBUG == 1 +#include "sxe-util.h" + +unsigned long long initial_mallocs = ~0UL; static void check_counters(const char *why, int m, int c, int r, int f, int fail) { + initial_mallocs = initial_mallocs == ~0ULL ? kit_counter_get(KIT_COUNTER_MEMORY_MALLOC) : initial_mallocs; + m += initial_mallocs; + is(kit_counter_get(KIT_COUNTER_MEMORY_MALLOC), m, "%s, the KIT_COUNTER_MEMORY_MALLOC value is %d", why, m); is(kit_counter_get(KIT_COUNTER_MEMORY_CALLOC), c, "%s, the KIT_COUNTER_MEMORY_CALLOC value is %d", why, c); is(kit_counter_get(KIT_COUNTER_MEMORY_REALLOC), r, "%s, the KIT_COUNTER_MEMORY_REALLOC value is %d", why, r); @@ -108,20 +114,24 @@ main(void) char *ptr1, *ptr2; int failures; - plan_tests(77); + tap_plan(103, TAP_FLAG_LINE_ON_OK, NULL); - /* Initialize memory before counters. test-kit-counters tests the opposite order - */ - kit_memory_initialize(false); // Initialize memory with no aborts; this will call kit_memory_init_internal - ok(kit_memory_is_initialized(), "Memory is initialized"); - ok(kit_counter_get(KIT_COUNTER_MEMORY_BYTES), "Memory was allocated and tracked before counters were initialized"); - - kit_counters_initialize(MAXCOUNTERS, 1, false); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + is(kit_memory_allocations(), 0, "Expected no allocations before initializing counters"); + kit_memory_set_flags(KIT_MEMORY_CHECK_OVERFLOWS); // Need overflow checks to ensure coverage + kit_counters_initialize(KIT_COUNTERS_MAX, 1, false); // This calls kit_memory_initialize(~0U) + kit_memory_set_assert_on_enomem(true); + is(kit_memory_allocations(), 2, "Expected number of allocations for memory counters"); check_counters("After initializing the counters", 0, 0, 0, 0, 0); alloc1 = kit_allocated_bytes(); talloc1 = kit_thread_allocated_bytes(); ptr1 = kit_malloc(100); +#if SXE_DEBUG + is(*(uint64_t *)ptr1, 0xa5a5a5a5a5a5a5a5ULL, "Malloc junk fills in a debug test"); +#else + ok(*(uint64_t *)ptr1 != 0xa5a5a5a5a5a5a5a5ULL, "Malloc doesn't junk fill in a release test"); +#endif check_counters("After one malloc", 1, 0, 0, 0, 0); alloc2 = kit_allocated_bytes(); talloc2 = kit_thread_allocated_bytes(); @@ -158,8 +168,10 @@ main(void) kit_free(NULL); check_counters("After a free(NULL)", 3, 1, 1, 3, 0); + kit_memory_set_assert_on_enomem(false); // Failures tested after this point, so don't abort + failures = 0; - MOCKFAIL_START_TESTS(1, KIT_ALLOC_MANGLE(kit_reduce)); + MOCKFAIL_START_TESTS(1, kit_realloc_diag); is(kit_reduce(ptr1, 1), ptr1, "When kit_reduce() fails to realloc(), it returns the same pointer"); failures++; MOCKFAIL_END_TESTS(); @@ -169,19 +181,23 @@ main(void) ok(!kit_reduce(NULL, 0), "kit_reduce(NULL, 0) is a no-op"); + ok(ptr1 = kit_strndup("hello, world.", 5), "Duplicated up to 5 bytes of 'hello, world'"); + is_eq(ptr1, "hello", "Duplicated string is just 'hello'"); + kit_free(ptr1); + diag("Checking counter gatherer"); memset(&cg, 0xa5, sizeof(cg)); cg.wtf = 0; kit_counters_mib_text("memory", &cg, counter_callback, -1, 0); - ok(cg.bytes > 200, "Allocated more than 200 bytes"); - is(cg.malloc, 3, "Malloc says 3"); - is(cg.calloc, 1, "Calloc says 1"); - is(cg.realloc, 1, "Realloc says 1"); - is(cg.free, 4, "Free says 4"); - is(cg.fail, failures, "Fail says %d", failures); - is(cg.wtf, 0, "WTF says 0"); - - void *mem = malloc(1); // Force some glibc memory growth + ok(cg.bytes > 200, "Allocated more than 200 bytes"); + is(cg.malloc - initial_mallocs, 4, "Malloc says 4 more than the initial value"); + is(cg.calloc, 1, "Calloc says 1"); + is(cg.realloc, 1, "Realloc says 1"); + is(cg.free, 5, "Free says 5"); + is(cg.fail, failures, "Fail says %d", failures); + is(cg.wtf, 0, "WTF says 0"); + + void *mem = malloc(1); // CONVENTION EXCLUSION: Force some glibc memory growth ok(kit_memory_log_growth(test_printf), "Logged growth in allocated memory"); is_strncmp(buffer, "Maximum memory allocated via jemalloc", sizeof("Maximum memory allocated via jemalloc") - 1, "Expected start of output found"); @@ -189,9 +205,89 @@ main(void) ok(kit_memory_log_stats(test_printf, NULL), "Created a statistics file"); is_strncmp(buffer, "___ Begin jemalloc statistics ___", sizeof("___ Begin jemalloc statistics ___") - 1, "Expected start of output found"); - free(mem); + free(mem); // CONVENTION EXCLUSION: Free the memory allocated with glibc above + + diag("Test kit-alloc-analyze"); + { +#if SXE_DEBUG + KIT_ALLOC_SET_LOG(1); // Enable kit-alloc logging in debug builds +#endif + mem = kit_memalign(16, 16); + ptr1 = tap_shell("../../bin/kit-alloc-analyze test-kit-alloc.t.out", NULL); + +#if SXE_DEBUG + is_strncmp(ptr1, "Unmatched allocs:\n", sizeof("Unmatched allocs:\n") - 1, "There was an unmatched allocation"); + is_strstr( ptr1, "kit_memalign(16,16)", "It was a call to kit_memalign"); +#else + is_eq(ptr1, "kit_alloc debug logging may not be enabled\n", "kit-alloc-analyze requires a debug build"); + skip(1, "Non debug build"); +#endif + free(ptr1); // CONVENTION EXCLUSION: OK to use in test code + + kit_free(mem); + ptr1 = tap_shell("../../bin/kit-alloc-analyze test-kit-alloc.t.out", NULL); + +#if SXE_DEBUG + is_eq(ptr1, "", "There were no unmatched allocations"); +#else + is_eq(ptr1, "kit_alloc debug logging may not be enabled\n", "kit-alloc-analyze requires a debug build"); +#endif + free(ptr1); // CONVENTION EXCLUSION: OK to use in test code + } - is(kit_counter_get_data(INVALID_COUNTER, -1), 0, "The invalid counter has not been touched"); + diag("Test zero length allocations, calloc overflows, and error cases"); + { + ok(mem = kit_memalign(16, 0), "Allocated 0 bytes of aligned memory"); + kit_free(mem); + ok(mem = kit_calloc(0, 0), "Allocated 0 bytes of zeroed memory"); + kit_free(mem); + ok(mem = kit_calloc(0, 1), "Allocated 0 x 1 bytes of zeroed memory"); + kit_free(mem); + is(kit_calloc(2, 0x8000000000000000ULL), NULL, "Can't allocate 2 * 0x8000000000000000 bytes"); + is(errno, ENOMEM, "Errno '%s' is the expected 'Out of memory'", strerror(errno)); + + failures = kit_counter_get(KIT_COUNTER_MEMORY_FAIL); + MOCKFAIL_START_TESTS(2, kit_malloc_diag); + is(kit_malloc(16), NULL, "Test malloc failure"); + is(kit_counter_get(KIT_COUNTER_MEMORY_FAIL), failures + 1, "Expected the failure count to increase"); + MOCKFAIL_END_TESTS(); + } + + diag("Test memory guards"); + { + ok(ptr1 = kit_memalign(32, 32), "Allocated memory at 32 byte alignment"); + is((uintptr_t)ptr1 & 0x1F, 0, "Memory is actually 32 byte aligned"); + ok(ptr1 - (char *)kit_memory_check(ptr1, &alloc1) == 32, "Expected a double guard before 32 byte aligned memory"); + is(alloc1, 32, "Expected the correct allocated size to be set"); + + ok(ptr2 = kit_realloc(ptr1, 64), "Reallocated memory (%s aligned)", + ((uintptr_t)ptr2 & 0x1F) == 0 ? "still" : "no longer"); + ok(ptr2 - (char *)kit_memory_check(ptr2, &alloc1) == 32, "Expected a double guard before 64 byte reallocated memory"); + is(alloc1, 64, "Expected the new correct allocated size to be set"); + kit_free(ptr2); + } + + diag("Safe kit functions tests"); + { + mem = NULL; + mem = kit_malloc_safe(mem, 32); + ok(mem != NULL, "Allocated 32 bytes using safe malloc"); + kit_free_safe(&mem); + ok(mem == NULL, "Free'd 32 bytes using safe free"); + + mem = kit_calloc_safe(mem, 32, 2); + ok(mem != NULL, "Allocated 64 bytes using safe calloc"); + kit_free_safe(&mem); + ok(mem == NULL, "Free'd 64 bytes using safe free"); + } + + diag("Test kit_strdup"); + { + ok(mem = kit_strdup("hello, world"), "Duplicated a string"); + is_eq(mem, "hello, world", "It's correct"); + kit_free(mem); + } + is(kit_counter_get_data(KIT_COUNTERS_INVALID, -1), 0, "The invalid counter has not been touched"); return exit_status(); } diff --git a/lib-kit/test/test-kit-counters-not-shared.c b/lib-kit-alloc/test/test-kit-counters-not-shared.c similarity index 72% rename from lib-kit/test/test-kit-counters-not-shared.c rename to lib-kit-alloc/test/test-kit-counters-not-shared.c index f9ee4b9..fbee3c6 100644 --- a/lib-kit/test/test-kit-counters-not-shared.c +++ b/lib-kit-alloc/test/test-kit-counters-not-shared.c @@ -25,8 +25,11 @@ #include #include +#include "kit-alloc.h" #include "kit-counters.h" +#include "kit-mockfail.h" +kit_counter_t KIT_EARLY; kit_counter_t KIT_COUNT; static void @@ -88,12 +91,17 @@ int main(void) { pthread_t thr; - void *ret; + void *ret; + size_t size; - plan_tests(6); + plan_tests(10); - kit_counters_initialize(MAXCOUNTERS, 2, false); /* Don't allow shared counters to be used */ - KIT_COUNT = kit_counter_new("kit.counter"); + KIT_EARLY = kit_counter_reg("kit.early"); + kit_counter_incr(KIT_EARLY); + is(kit_counter_get(KIT_EARLY), 1, "kit.early works before counters are initialized"); + + kit_counters_initialize(KIT_COUNTERS_MAX, 2, false); // Don't allow shared counters to be used + KIT_COUNT = kit_counter_reg("kit.counter"); ok(kit_counter_isvalid(KIT_COUNT), "Created kit.counter"); kit_counter_incr(KIT_COUNT); @@ -108,7 +116,24 @@ main(void) pthread_join(thr, &ret); is(kit_counter_get(KIT_COUNT), 9, "Dynamic thread-specific cleanup was able to adjust counters"); - is(kit_num_counters(), 7, "Number of counters is as expected (1 + 6 memory counters)"); + is(kit_num_counters(), 8, "Number of counters is as expected (2 + 6 memory counters)"); + + diag("Test memory check function"); // Done here because guards are always on it test-kit-alloc.t + { + ret = kit_malloc(32); + char *guarded = kit_memory_check(ret, &size); + +#if SXE_DEBUG + ok((char *)ret - guarded == 16, "Expected a guard before memory returned by malloc"); + is(size, 32, "Expected the correct size to be set"); +#else + is(ret, guarded, "Expected no guards before memory returned by malloc"); + is(size, 0, "Expected no size to be set when not checking overflows"); +#endif + + ok(ret = kit_malloc(0), "Zero length allocation returns a valid pointer"); + kit_free(ret); + } return exit_status(); } diff --git a/lib-kit/test/test-kit-counters-shared.c b/lib-kit-alloc/test/test-kit-counters-shared.c similarity index 93% rename from lib-kit/test/test-kit-counters-shared.c rename to lib-kit-alloc/test/test-kit-counters-shared.c index 96742dc..3a7d377 100644 --- a/lib-kit/test/test-kit-counters-shared.c +++ b/lib-kit-alloc/test/test-kit-counters-shared.c @@ -21,14 +21,14 @@ * SPDX-License-Identifier: MIT */ -#include +#include #include #include -#include -#include "kit.h" #include "kit-alloc.h" #include "kit-counters.h" +#include "kit-time.h" +#include "sxe-log.h" #define ERROR ((void *)1) @@ -224,9 +224,9 @@ unmanaged_thread(void *v) kit_counter_zero(my->c3); // Make sure diddling the shared invalid counter is done non-atomically (tested by coverage) - kit_counter_incr(INVALID_COUNTER); - kit_counter_decr(INVALID_COUNTER); - kit_counter_add( INVALID_COUNTER, 0); + kit_counter_incr(KIT_COUNTERS_INVALID); + kit_counter_decr(KIT_COUNTERS_INVALID); + kit_counter_add( KIT_COUNTERS_INVALID, 0); return NULL; } @@ -243,25 +243,22 @@ main(void) /* Initialize counters before memory. test-kit-alloc tests the opposite order */ - kit_counters_initialize(MAXCOUNTERS, 2, true); // Allow shared counters to be used - my.c3 = kit_counter_new("hi.there"); - ok(kit_counter_isvalid(my.c3), "Created a hi.there counter"); + kit_counters_initialize(KIT_COUNTERS_MAX, 2, true); // Allow shared counters to be used + my.c3 = kit_counter_reg("hi.there"); + ok(kit_counter_isvalid(my.c3), "Created a hi.there counter"); kit_counter_incr(my.c3); - is(kit_counter_get(my.c3), 1, "Set hi.there => 1"); - ok(kit_counter_get_data(INVALID_COUNTER, -1), "Expect some incrementing of the invalid counter due to early memory calls"); - kit_counter_zero(INVALID_COUNTER); - - kit_memory_initialize(false); // Call after soft initialization + is(kit_counter_get(my.c3), 1, "Set hi.there => 1"); + is(0, kit_counter_get_data(KIT_COUNTERS_INVALID, -1), "Expect no incrementing of the invalid counter"); kit_counter_zero(my.c3); is(kit_counter_get(my.c3), 0, "Set hi.there => 0"); - my.c1 = kit_counter_new_with_combine_handler("hello.world", my_combine_handler); + my.c1 = kit_counter_reg_with_combine_handler("hello.world", my_combine_handler); ok(kit_counter_isvalid(my.c1), "Created a hello.world counter"); - my.c2 = kit_counter_new("hello.city"); + my.c2 = kit_counter_reg("hello.city"); ok(kit_counter_isvalid(my.c2), "Created a hello.city counter"); ok(kit_counter_isvalid(my.c3), "The hi.there counter is still valid and available"); - my.c4 = kit_counter_new_with_mibfn("this.path", mibfn_thispath); + my.c4 = kit_counter_reg_with_mibfn("this.path", mibfn_thispath); ok(kit_counter_isvalid(my.c4), "Created a this.path mibfn counter"); kit_counter_add(my.c4, 999); @@ -440,8 +437,8 @@ main(void) is(kit_counter_get(non_existent_counter), 0, "Nonexistent counter can't be added to"); } - ok(kit_counters_usable(), "Counters are usable in the main thread"); - is(kit_num_counters(), 10, "Number of counters is as expected (4 + 6 memory counters)"); - is(kit_counter_get_data(INVALID_COUNTER, -1), 0, "The invalid counter has not been touched"); + ok(kit_counters_usable(), "Counters are usable in the main thread"); + is(kit_num_counters(), 10, "Number of counters is as expected (4 + 6 memory counters)"); + is(kit_counter_get_data(KIT_COUNTERS_INVALID, -1), 0, "The invalid counter has not been touched"); return exit_status(); } diff --git a/lib-kit-mock/GNUmakefile b/lib-kit-mock/GNUmakefile new file mode 100644 index 0000000..25e4b41 --- /dev/null +++ b/lib-kit-mock/GNUmakefile @@ -0,0 +1,6 @@ +# Makefile for the lib-kit-mock library + +CFLAGS += -DMOCK -DMOCK_IMPL +LIBRARIES = kit-mock + +include ../dependencies.mak diff --git a/lib-kit-mock/kit-mock.c b/lib-kit-mock/kit-mock.c new file mode 100644 index 0000000..921d850 --- /dev/null +++ b/lib-kit-mock/kit-mock.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +#include "kit-mock.h" + +#ifdef WINDOWS_NT +#include +#endif + +#define MOCK_CDECL +#define MOCK_DEF(type, scope, function, parameters) type (MOCK_ ## scope * mock_ ## function) parameters = function + +/* Declarations of the mock function table. For Windows, CRT functions have CDECL, OS (e.g. WinSock) APIs have STDCALL + * Return Type CRT/OS Function Parameter Types + */ +MOCK_DEF(MOCK_SOCKET, STDCALL, accept, (MOCK_SOCKET, struct sockaddr *, MOCK_SOCKLEN_T *)); +MOCK_DEF(MOCK_SOCKET, STDCALL, accept4, (MOCK_SOCKET, struct sockaddr *, MOCK_SOCKLEN_T *, int)); +MOCK_DEF(int, STDCALL, bind, (MOCK_SOCKET, const struct sockaddr *, MOCK_SOCKLEN_T)); +MOCK_DEF(int, STDCALL, getaddrinfo, (const char *, const char *, const struct addrinfo *, struct addrinfo **)); +MOCK_DEF(void *, CDECL, calloc, (size_t, size_t)); // CONVENTION EXCLUSION: OK to mock glibc allocation +MOCK_DEF(int, CDECL, close, (int)); +MOCK_DEF(int, STDCALL, connect, (MOCK_SOCKET, const struct sockaddr *, MOCK_SOCKLEN_T)); +MOCK_DEF(FILE *, CDECL, fopen, (const char * file, const char * mode)); +MOCK_DEF(int, CDECL, fputs, (const char * string, FILE * file)); +MOCK_DEF(int, STDCALL, getsockopt, (MOCK_SOCKET, int, int, MOCK_SOCKET_VOID *, MOCK_SOCKLEN_T * __restrict)); +MOCK_DEF(int, CDECL, gettimeofday,(struct timeval * __restrict tm, __timezone_ptr_t __restrict tz)); +MOCK_DEF(off_t, CDECL, lseek, (int fd, off_t offset, int whence)); +MOCK_DEF(int, STDCALL, listen, (MOCK_SOCKET, int)); +MOCK_DEF(void *, CDECL, malloc, (size_t)); // CONVENTION EXCLUSION: OK to mock glibc allocation +MOCK_DEF(MOCK_SSIZE_T, STDCALL, recvfrom, (MOCK_SOCKET, MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int, struct sockaddr *, MOCK_SOCKLEN_T *)); +MOCK_DEF(MOCK_SSIZE_T, STDCALL, send, (MOCK_SOCKET, const MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int)); +MOCK_DEF(MOCK_SSIZE_T, STDCALL, sendto, (MOCK_SOCKET, const MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int, const struct sockaddr *, MOCK_SOCKLEN_T)); +MOCK_DEF(MOCK_SOCKET, STDCALL, socket, (int, int, int)); +MOCK_DEF(MOCK_SSIZE_T, CDECL, recv, (int, void *, MOCK_SIZE_T, int)); +MOCK_DEF(MOCK_SSIZE_T, CDECL, write, (int, const void *, MOCK_SIZE_T)); +MOCK_DEF(MOCK_SSIZE_T, CDECL, readv, (int, const struct iovec *, int)); +MOCK_DEF(MOCK_SSIZE_T, CDECL, writev, (int, const struct iovec *, int)); +MOCK_DEF(int, CDECL, clock_gettime,(clockid_t clk_id, struct timespec *tp)); + +#ifdef WINDOWS_NT +MOCK_DEF(DWORD, STDCALL, timeGetTime, (void)); +MOCK_DEF(int, CDECL, mkdir, (const char * pathname)); +#else +# if defined(__APPLE__) +MOCK_DEF(int , STDCALL, sendfile, (int, int, off_t, off_t *, struct sf_hdtr *, int)); +# elif defined(__FreeBSD__) +MOCK_DEF(int , STDCALL, sendfile, (int, int, off_t, size_t, struct sf_hdtr *, off_t *, int)); +# else +MOCK_DEF(MOCK_SSIZE_T, STDCALL, sendfile, (int, int, off_t *, size_t)); +# endif +MOCK_DEF(int, CDECL, mkdir, (const char * pathname, mode_t mode)); +MOCK_DEF(void, CDECL, openlog, (const char * ident, int option, int facility)); +MOCK_DEF(void, CDECL, syslog, (int priority, const char * format, ...)); +#endif + +/* The following mock was removed because the function that it mocks cannot be linked statically with the debian version of glibc. + * If you need to mock this function, put it in a separate file so that only the program that uses it requires dynamic linking. + */ +#ifdef DYNAMIC_LINKING_REQUIRED +MOCK_DEF(struct hostent *, STDCALL, gethostbyname, (const char *)); +#endif diff --git a/lib-kit-mock/kit-mock.h b/lib-kit-mock/kit-mock.h new file mode 100644 index 0000000..abdc9da --- /dev/null +++ b/lib-kit-mock/kit-mock.h @@ -0,0 +1,187 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __KIT_MOCK_H__ +#define __KIT_MOCK_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef WINDOWS_NT +# include +#else +# include +# include +# if defined(__APPLE__) || defined(__FreeBSD__) +# include +# else +# include +# endif +#endif + +/* CONVENTION EXCLUSION: system functions mocked using #define */ + +#ifdef MOCK + +#define MOCK_SET_HOOK(func, test) mock_ ## func = (test) +#define MOCK_SKIP_START(numtests) +#define MOCK_SKIP_END + +#ifdef WINDOWS_NT +#define MOCK_SIZE_T unsigned +#define MOCK_STDCALL __stdcall +#define MOCK_SOCKET SOCKET +#define MOCK_SOCKET_VOID char +#define MOCK_SOCKET_SSIZE_T int +#define MOCK_SSIZE_T int +#define MOCK_SOCKLEN_T int + +#else /* UNIX */ +#define MOCK_STDCALL +typedef size_t MOCK_SIZE_T; +typedef int MOCK_SOCKET; +typedef void MOCK_SOCKET_VOID; +typedef size_t MOCK_SOCKET_SSIZE_T; +typedef ssize_t MOCK_SSIZE_T; +typedef socklen_t MOCK_SOCKLEN_T; + +# ifdef __APPLE__ + typedef void *__timezone_ptr_t; +# elif defined(__FreeBSD__) + typedef struct timezone *__timezone_ptr_t; +# elif __GNUC__ >= 9 + typedef void *__restrict __timezone_ptr_t; +# endif + +#endif + +/* External definitions of the mock function table + * - MOCK_STDCALL signifies that Windows implements this function in an OS API, not the C runtime + */ +extern MOCK_SOCKET (MOCK_STDCALL * mock_accept) (MOCK_SOCKET, struct sockaddr *, MOCK_SOCKLEN_T *); +extern MOCK_SOCKET (MOCK_STDCALL * mock_accept4) (MOCK_SOCKET, struct sockaddr *, MOCK_SOCKLEN_T *, int); +extern int (MOCK_STDCALL * mock_bind) (MOCK_SOCKET, const struct sockaddr *, MOCK_SOCKLEN_T); +extern int (MOCK_STDCALL * mock_getaddrinfo) (const char *, const char *, const struct addrinfo *, struct addrinfo **); +extern void * ( * mock_calloc) (size_t, size_t); +extern int ( * mock_close) (int); +extern int (MOCK_STDCALL * mock_connect) (MOCK_SOCKET, const struct sockaddr *, MOCK_SOCKLEN_T); +extern FILE * ( * mock_fopen) (const char * file, const char * mode); +extern int ( * mock_fputs) (const char * string, FILE * file); +extern int (MOCK_STDCALL * mock_getsockopt) (MOCK_SOCKET, int, int, MOCK_SOCKET_VOID *, MOCK_SOCKLEN_T * __restrict); +extern int ( * mock_gettimeofday)(struct timeval * __restrict, __timezone_ptr_t); +extern int (MOCK_STDCALL * mock_listen) (MOCK_SOCKET, int); +extern off_t ( * mock_lseek) (int fd, off_t offset, int whence); +extern void * ( * mock_malloc) (size_t); +extern MOCK_SSIZE_T (MOCK_STDCALL * mock_recvfrom) (MOCK_SOCKET, MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int, struct sockaddr *, MOCK_SOCKLEN_T *); +extern MOCK_SSIZE_T (MOCK_STDCALL * mock_send) (MOCK_SOCKET, const MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int); +extern MOCK_SSIZE_T (MOCK_STDCALL * mock_sendto) (MOCK_SOCKET, const MOCK_SOCKET_VOID *, MOCK_SOCKET_SSIZE_T, int, + const struct sockaddr *, MOCK_SOCKLEN_T); +extern MOCK_SOCKET (MOCK_STDCALL * mock_socket) (int, int, int); +extern MOCK_SSIZE_T ( * mock_recv) (int, void *, MOCK_SIZE_T, int); +extern MOCK_SSIZE_T ( * mock_write) (int, const void *, MOCK_SIZE_T); +extern MOCK_SSIZE_T ( * mock_readv) (int, const struct iovec *, int); +extern MOCK_SSIZE_T ( * mock_writev) (int, const struct iovec *, int); +extern int ( * mock_clock_gettime)(clockid_t clk_id, struct timespec *tp); + +#ifdef WINDOWS_NT +extern DWORD (MOCK_STDCALL * mock_timeGetTime) (void); +extern int ( * mock_mkdir) (const char * pathname); +#else /* UNIX */ +# if defined(__APPLE__) +extern int (MOCK_STDCALL * mock_sendfile) (int, int, off_t, off_t *, struct sf_hdtr *, int); +# elif defined(__FreeBSD__) +extern int (MOCK_STDCALL * mock_sendfile) (int, int, off_t, size_t, struct sf_hdtr *, off_t *, int); +# else /* not __APPLE__ and not __FreeBSD__ */ +extern MOCK_SSIZE_T (MOCK_STDCALL * mock_sendfile) (int, int, off_t *, size_t); +# endif +extern int ( * mock_mkdir) (const char * pathname, mode_t mode); +extern void ( * mock_openlog) (const char * ident, int option, int facility); +extern void ( * mock_syslog) (int priority, const char *format, ...); +#endif + +#ifndef MOCK_IMPL + +#define accept(fd, addr, len) (*mock_accept) ((fd), (addr), (len)) +#define accept4(fd, addr, len, flags) (*mock_accept4) ((fd), (addr), (len), (flags)) +#define bind(fd, addr, len) (*mock_bind) ((fd), (addr), (len)) +#define getaddrinfo(node, service, hints, res) (*mock_getaddrinfo) ((node), (service), (hints), (res)) +#define calloc(num, size) (*mock_calloc) ((num), (size)) +#define close(fd) (*mock_close) (fd) +#define connect(fd, addr, len) (*mock_connect) ((fd), (addr), (len)) +#define fopen(file, mode) (*mock_fopen) ((file), (mode)) +#define fputs(string, file) (*mock_fputs) ((string), (file)) +#define getsockopt(fd, lev, oname, oval, olen) (*mock_getsockopt) ((fd), (lev), (oname), (oval), (olen)) +#define gettimeofday(tm,tz) (*mock_gettimeofday)((tm), (tz)) +#define listen(fd, backlog) (*mock_listen) ((fd), (backlog)) +#define lseek(fd, offset, whence) (*mock_lseek) ((fd), (offset), (whence)) +#define malloc(size) (*mock_malloc) (size) +#define send(fd, buf, len, flags) (*mock_send) ((fd), (buf), (len), (flags)) +#ifdef WINDOWS_NT +#define timeGetTime() (*mock_timeGetTime) () +#define mkdir(pathname) (*mock_mkdir) (pathname) +#else +# if defined(__APPLE__) +# define sendfile(in_fd, out_fd, off, len, hdr, _r) (*mock_sendfile) ((in_fd), (out_fd), (off), (len), (hdr), (_r)) +# elif defined(__FreeBSD__) +# define sendfile(in_fd, out_fd, off, nbytes, hdr, sbytes, flags) (*mock_sendfile) ((in_fd), (out_fd), (off), (nbytes), (hdr), (sbytes), (flags)) +# else +# define sendfile(out_fd, in_fd, offset, count) (*mock_sendfile) ((out_fd), (in_fd), (offset), (count)) +# endif +#define mkdir(pathname, mode) (*mock_mkdir) ((pathname), (mode)) +#define openlog(ident, option, facility) (*mock_openlog) ((ident), (option), (facility)) +#define syslog(priority, ...) (*mock_syslog) ((priority), __VA_ARGS__) +#endif +#define sendto(fd, buf, len, flags, to, tolen) (*mock_sendto) ((fd), (buf), (len), (flags), (to), (tolen)) +#define socket(dom, typ, pro) (*mock_socket) ((dom), (typ), (pro)) +#define recv(fd, buf, len, flags) (*mock_recv) ((fd), (buf), (len), (flags)) +#define recvfrom(fd, buf, len, flags, to, tolen) (*mock_recvfrom) ((fd), (buf), (len), (flags), (to), (tolen)) +#define write(fd, buf, len) (*mock_write) ((fd), (buf), (len)) +#define readv(fd, iov, iovcnt) (*mock_readv) ((fd), (iov), (iovcnt)) +#define writev(fd, iov, iovcnt) (*mock_writev) ((fd), (iov), (iovcnt)) +#define clock_gettime(clk_id, tp) (*mock_clock_gettime)((clk_id), (tp)) + +#endif /* !MOCK_IMPL */ + +#else /* !defined(MOCK) */ + +#define MOCK_SET_HOOK(func, test) ((void)test) +#define MOCK_SKIP_START(num_tests) skip_start(1, (num_tests), "- this test requires mock functions") +#define MOCK_SKIP_END skip_end + +#endif /* !defined(MOCK) */ + +/* The following mock was removed because the function that it mocks cannot be linked statically with the debian version of glibc. + */ +#ifdef DYNAMIC_LINKING_REQUIRED +#define gethostbyname(name) (*mock_gethostbyname)((name)) +extern struct hostent * (MOCK_STDCALL * mock_gethostbyname)(const char *); +#endif + +#endif /* __KIT_MOCK_H__ */ diff --git a/lib-kit-mock/kit-mockfail.c b/lib-kit-mock/kit-mockfail.c new file mode 100644 index 0000000..c0e1fc3 --- /dev/null +++ b/lib-kit-mock/kit-mockfail.c @@ -0,0 +1,5 @@ +#include "kit-mockfail.h" + +const void *kit_mockfail_failaddr; +unsigned kit_mockfail_failfreq; +unsigned kit_mockfail_failnum; diff --git a/lib-kit-mock/kit-mockfail.h b/lib-kit-mock/kit-mockfail.h new file mode 100644 index 0000000..cd0c96c --- /dev/null +++ b/lib-kit-mock/kit-mockfail.h @@ -0,0 +1,65 @@ +#ifndef KIT_MOCKFAIL_H +#define KIT_MOCKFAIL_H + +#include + +#if !SXE_DEBUG && !SXE_COVERAGE // In the release build, remove all mock scaffolding + +#define MOCKFAIL(addr, ret, expr) (expr) +#define MOCKERROR(addr, ret, error, expr) (expr) +#define MOCKFAIL_START_TESTS(n, addr) skip_start(1, n, "MOCKFAIL: Skipping %d test%s for release build", n, (n) == 1 ? "" : "s") +#define MOCKFAIL_SET_FREQ(n) +#define MOCKFAIL_SET_SKIP(n) +#define MOCKFAIL_END_TESTS() skip_end + +#else // In the debug and coverage builds, mock failures can be triggered + +#define MOCKFAIL(addr, ret, expr) (((addr) == kit_mockfail_failaddr && !--kit_mockfail_failnum) ? ((kit_mockfail_failnum = kit_mockfail_failfreq), ret) : expr) +#define MOCKERROR(addr, ret, error, expr) (((addr) == kit_mockfail_failaddr && !--kit_mockfail_failnum) ? ((kit_mockfail_failnum = kit_mockfail_failfreq), (errno = error ), ret) : expr) +#define MOCKFAIL_START_TESTS(n, addr) do { kit_mockfail_failaddr = (addr); kit_mockfail_failfreq = kit_mockfail_failnum = 1; } while (0) +#define MOCKFAIL_SET_FREQ(n) do { kit_mockfail_failfreq = kit_mockfail_failnum = (n); } while (0) // Fail every n +#define MOCKFAIL_SET_SKIP(n) do { kit_mockfail_failnum = (n) + 1; } while (0) // Skip n before failing +#define MOCKFAIL_END_TESTS() do { kit_mockfail_failaddr = NULL; } while (0) + +extern const void *kit_mockfail_failaddr; +extern unsigned kit_mockfail_failfreq; +extern unsigned kit_mockfail_failnum; +#endif + +/* + * We can change a piece of code such as + * + * if (almost_impossible_failure(args) == hard_to_make_fail) { + * really_hard_to_test(); + * } + * + * to + * + * #include "kit-mockfail.h" + * + * if (MOCKFAIL(UNIQUE_ADDRESS, hard_to_make_fail, almost_impossible_failure(args)) == hard_to_make_fail) { + * really_hard_to_test(); + * } + * + * and test with: + * + * #include "kit-mockfail.h" + * + * MOCKFAIL_START_TESTS(3, UNIQUE_ADDR); + * ok(!some_caller_of_almost_impossible_failure(), "caller fails because allmost_impossible_failure() occurred"); + * ok(!another_caller(), "caller fails because allmost_impossible_failure() occurred"); + * MOCKFAIL_SET_FREQ(3); + * ok(some_caller_of_almost_impossible_failure(), "caller succeeds because allmost_impossible_failure() only fails every third time now"); + * MOCKFAIL_END_TESTS(); + * + * UNIQUE_ADDR can be a global function name or the address of a global variable. It must be a global so that the test can + * use it. If you are mocking more than one failure in a function or the function is static, you can define tags in the .h: + * + * #if defined(SXE_DEBUG) || defined(SXE_COVERAGE) // Define unique tags for mockfails + * # define APPLICATION_CLONE ((const char *)application_register_resolver + 0) + * # define APPLICATION_CLONE_DOMAINLISTS ((const char *)application_register_resolver + 1) + * # define APPLICATION_MOREDOMAINLISTS ((const char *)application_register_resolver + 2) + * #endif + */ + +#endif diff --git a/lib-kit-mock/kit-process.c b/lib-kit-mock/kit-process.c new file mode 100644 index 0000000..0aed6d7 --- /dev/null +++ b/lib-kit-mock/kit-process.c @@ -0,0 +1,105 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* Emulate Windows spawn and cwait functions on UNIX + */ +#ifndef _WIN32 + +#include +#include +#include +#include +#include +#include +#include + +#include "kit-process.h" + +intptr_t +kit_spawnl(int mode, const char * command, const char * arg0, ...) +{ + int pid, err; + int i, nargs; + const char **argv; + + assert(mode == P_NOWAIT); /* Only the P_NOWAIT mode is supported */ + + /* Count the number of arguments */ + { + va_list va; + + nargs = 2; /* arg0, NULL */ + va_start(va, arg0); + while (va_arg(va, const char *) != 0) + nargs++; + va_end(va); + } + + /* Construct an argv array */ + { + va_list va; + const char *arg; + + argv = alloca(nargs * sizeof(const char *)); + assert(argv); + + argv[0] = arg0; + va_start(va, arg0); + for (i = 1; (arg = va_arg(va, const char *)) != 0; i++) { + argv[i] = arg; + } + va_end(va); + argv[i] = NULL; + } + +#ifdef __APPLE__ + err = posix_spawn(&pid, arg0, NULL, NULL, (char * const *)(intptr_t)argv, environ); + if (err == 0) { + return (intptr_t)pid; + } + + fprintf(stderr, "Failed to execute %s: %s\n", command, strerror(err)); +#else + + /* On error or if parent, return -1 or pid + */ + if ((pid = fork()) != 0) { + return (intptr_t)pid; + } + + execv(command, (char * const *)(intptr_t)argv); + err = errno; + fprintf(stderr, "Failed to execute %s: %s\n", command, strerror(err)); + exit(1); +#endif + + return (intptr_t)-1; /* Can't happen */ +} + +intptr_t +kit_cwait(int * status, intptr_t process_id, int action) +{ + assert(action == WAIT_CHILD); /* Only the WAIT_CHILD action is supported */ + + return (intptr_t)waitpid((pid_t)process_id, status, 0); +} + +#endif // !_WIN32 diff --git a/lib-kit-mock/kit-process.h b/lib-kit-mock/kit-process.h new file mode 100644 index 0000000..e8161a9 --- /dev/null +++ b/lib-kit-mock/kit-process.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef KIT_PROCESS_H +#define KIT_PROCESS_H + +#ifdef __WIN32 + +#include +#define kit_spawnl(mode, command, arg0, ...) spawnl(mode, command, arg0, __VA_ARGS__) +#define kit_cwait(status, process_id, action) cwait(status, process_id, action) + +#else // Emulate Windows spawn and cwait functions on UNIX + + + +#include /* For intptr_t */ + +#define P_NOWAIT 1 +#define WAIT_CHILD 0 + +#ifdef __cplusplus +extern "C" { +#endif + +extern intptr_t kit_cwait(int * status, intptr_t process_id, int action); +extern intptr_t kit_spawnl(int mode, const char * command, const char * arg0, ...); + +#ifdef __cplusplus +} +#endif + +#endif // !_WIN32 + +#endif // KIT_PROCESS_H diff --git a/lib-kit-mock/test/test-kit-process.c b/lib-kit-mock/test/test-kit-process.c new file mode 100644 index 0000000..2613ef7 --- /dev/null +++ b/lib-kit-mock/test/test-kit-process.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2016 by Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "kit-process.h" + +int +main(int argc, char ** argv) +{ + intptr_t child_pid; + intptr_t dead_pid; + int status; + + if (argc != 2 || strcmp(argv[1], "child process") != 0) + plan_tests(2); + + /* If this is the spawned process, exit with the low byte of the PID as the status + */ + if (argc > 1) { + int pid = getpid(); + return pid & 0xFF; + } + + if ((child_pid = kit_spawnl(P_NOWAIT, argv[0], argv[0], "child process", NULL)) < 0) { + fail("Failed to spawn a child process"); + } + else if ((dead_pid = kit_cwait(&status, child_pid, WAIT_CHILD)) < 0) { + fail("Expect cwait to return child pid %p, got %p", (void *)child_pid, (void *)dead_pid); + } + else { + fprintf(stderr, "Status %x\n", status); + is(status >> 8, child_pid & 0xFF, "Expected the high byte of the status to be the low byte of child pid"); + } + + if ((child_pid = kit_spawnl(P_NOWAIT, "gorporola", "gorporola", "child process", NULL)) < 0) { + fail("Failed to spawn a child process"); + } + else if ((dead_pid = kit_cwait(&status, child_pid, WAIT_CHILD)) < 0) { + fail("Expect cwait to return child pid %p, got %p", (void *)child_pid, (void *)dead_pid); + } + else { + fprintf(stderr, "Status %x\n", status); + is(status >> 8, 1, "Expected the high byte of the status to be 1 (exit value on error from kit_spawnl)"); + } + + return exit_status(); +} diff --git a/lib-kit/GNUmakefile b/lib-kit/GNUmakefile index e2750a5..4b08bd8 100644 --- a/lib-kit/GNUmakefile +++ b/lib-kit/GNUmakefile @@ -1,34 +1,9 @@ LIBRARIES = kit -ifdef ZEUS_TOP.dir -# Use the ZEUS libsxe -ifdef LIBSXE_DEV_ROOT -SXE.dir = $(LIBSXE_DEV_ROOT) -else -SXE.dir = $(ZEUS_TOP.dir)/libsxe -endif -else -SXE.dir = $(TOP.dir)/libsxe -endif - include ../dependencies.mak -ifdef ZEUS_TOP.dir -IFLAGS_TEST := $(filter-out -I$(TOP.dir)/libsxe/lib-tap/$(DST.dir), $(IFLAGS_TEST)) -endif - -# List of the libraries in linker order. -LIB_DEPENDENCIES = jemalloc - MAKE_ALLOW_LOWERCASE_TYPEDEF = 1 -IFLAGS += $(if $(findstring port,$(LIB_DEPENDENCIES)),$(CC_INC)$(COM.dir)/lib-port/$(OS_class),) \ - -I$(SXE.dir)/$(DST.dir)/include -I$(TOP.dir)/libjemalloc/$(DST.dir) LINK_FLAGS += $(TOP.dir)/libkit/lib-kit/$(DST.dir)/kit$(EXT.lib) -LINK_FLAGS += $(TOP.dir)/libjemalloc/$(DST.dir)/jemalloc$(EXT.lib) -LINK_FLAGS += $(SXE.dir)/$(DST.dir)/libsxe$(EXT.lib) -LINK_FLAGS += -lrt -rdynamic -pthread -ldl -pie -z noexecstack -CFLAGS += -D_GNU_SOURCE=1 -D_FORTIFY_SOURCE=2 # man 7 feature_test_macros -CFLAGS += -pthread CFLAGS_WFORMAT = -Wformat=2 CFLAGS_MARCH = diff --git a/lib-kit/kit-alloc.c b/lib-kit/kit-alloc.c deleted file mode 100644 index ad14e3f..0000000 --- a/lib-kit/kit-alloc.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * SPDX-License-Identifier: MIT - */ - -#include "kit-alloc-private.h" - -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ -#include -#endif - -#ifdef __GLIBC__ -#pragma GCC diagnostic ignored "-Waggregate-return" // To allow use of glibc mallinfo -#endif - -/*- - * As of December 2012, the stock malloc implementation in linux is not - * clever enough to handle the allocations requirements of opendnscache. - * - * This can be demonstrated by monitoring the following data: - * - * VSZ of the process - * ps -axwwovsz,comm | sed -n 's, opednscache,,p' - * - * # Inflight allocations & total - * my %stats = map { m{^"(\S+)\s(.+)"$} } `dig +short txt stats.opendns.com \@127.0.0.1`; - * my $inflight = $stats{'memory.calloc'} + - * $stats{'memory.malloc'} - - * $stats{'memory.free'}; - * my $total = $stats{'memory.bytes'}; - * - * On a busy resolver running with -c 800000000, you will see $inflight - * settle at around 8,044,000 allocations and 1565 MB total. It will - * increase as pref files are loaded and then settle back to these numbers - * afterwards. - * - * When it is settled, using jemalloc, the process VSZ starts at about - * 2000 MB and increases to around 2450 MB after about 10 minutes. - * It stabilizes at this value. - * - * Using the stock system malloc, the process VSZ starts at about - * 2200 MB and increases to around 3219 MB in the first 10 minutes. - * After that, on a machine with 3GB of RAM, it starts to swap and - * takes so long to load a prefs file that there's another one there - * before it's done. - */ - -#if SXE_DEBUG -int kit_alloc_diagnostics; -#define KIT_ALLOC_LOG(...) do { if (kit_alloc_diagnostics) SXEL6(__VA_ARGS__); } while (0) -#else -#define KIT_ALLOC_LOG(...) do { } while (0) -#endif - -// The level to which kit-memory has been initialized -enum kit_memory_init_level { - KIT_MEMORY_INIT_NONE, // Uninitialized - KIT_MEMORY_INIT_SOFT, // Internally initialized by kit_counters with minimal defaults; primarily for memory leak checks - KIT_MEMORY_INIT_HARD // Initialized by the user of libkit; this can only be done once -} kit_memory_init_level = KIT_MEMORY_INIT_NONE; - -struct kit_memory_counters kit_memory_counters; // Global counters -size_t kit_memory_allocated_max = 0; // Tracks the high watermark ever allocated with jemalloc -static bool kit_memory_assert_on_enomem = false; // By default, return NULL on failure to allocate memory - -#ifdef __linux__ -static int proc_statm_fd = -1; // Open file descriptor on /proc//statm; must be opened before calling chroot -#endif - -static unsigned long long -counter_bytes_combine_handler(int threadnum) -{ - return threadnum <= 0 ? kit_allocated_bytes() : 0; -} - -/** - * Initialize the memory interface - * - * @param hard True if this is an external call to initialize kit-memory - * - * @note This function should only be called internally (by kit-memory and kit-counters). - */ -void -kit_memory_init_internal(bool hard) -{ - enum kit_memory_init_level old_level = kit_memory_init_level; - - SXEA1(kit_memory_init_level != KIT_MEMORY_INIT_HARD, "Kit memory is already initialized"); - - // Set this immediately to prevent infinite recursion via kit_counters_add - kit_memory_init_level = hard ? KIT_MEMORY_INIT_HARD : KIT_MEMORY_INIT_SOFT; - - if (old_level != KIT_MEMORY_INIT_NONE) // If kit-memory was already initialized, return - return; - - kit_memory_counters.bytes = kit_counter_new_with_combine_handler("memory.bytes", counter_bytes_combine_handler); - kit_memory_counters.calloc = kit_counter_new("memory.calloc"); - kit_memory_counters.fail = kit_counter_new("memory.fail"); - kit_memory_counters.free = kit_counter_new("memory.free"); - kit_memory_counters.malloc = kit_counter_new("memory.malloc"); - kit_memory_counters.realloc = kit_counter_new("memory.realloc"); - -#ifdef __linux__ - char proc_statm_path[PATH_MAX]; - int pid; - - pid = getpid(); - snprintf(proc_statm_path, sizeof(proc_statm_path), "/proc/%d/statm", pid); - proc_statm_fd = open(proc_statm_path, O_RDONLY); -#endif - -#if SXE_DEBUG - const char *KIT_ALLOC_DIAGNOSTICS = getenv("KIT_ALLOC_DIAGNOSTICS"); - - if (KIT_ALLOC_DIAGNOSTICS && KIT_ALLOC_DIAGNOSTICS[0] && KIT_ALLOC_DIAGNOSTICS[0] != '0') - kit_alloc_diagnostics = 1; -#endif -} - -/** - * Initialize the kit memory management interface; this should done once, normally on an application wide basis - * - * @param assert_on_enomem true to enable assertions on memory allocation failure, false (default) to make sure no one changes - */ -void -kit_memory_initialize(bool assert_on_enomem) -{ - kit_memory_assert_on_enomem = assert_on_enomem; - kit_memory_init_internal(true); -} - -bool -kit_memory_is_initialized(void) -{ - return kit_memory_init_level != KIT_MEMORY_INIT_NONE; -} - -__attribute__((malloc)) void * -KIT_ALLOC_MANGLE(kit_malloc)(size_t size KIT_ALLOC_SOURCE_PROTO) -{ - void *result = je_malloc(size); - - SXEA1(!kit_memory_assert_on_enomem || result, "%s: failed to allocate %zu bytes of memory", __FUNCTION__, size); - kit_counter_incr(KIT_COUNTER_MEMORY_MALLOC); - - if (result == NULL) - kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); /* COVERAGE EXCLUSION: todo: mock malloc() to create failure */ - - KIT_ALLOC_LOG("%s: %d: %p = kit_malloc(%zu)", file, line, result, size); - return result; -} - -__attribute__((malloc)) void * -KIT_ALLOC_MANGLE(kit_calloc)(size_t num, size_t size KIT_ALLOC_SOURCE_PROTO) -{ - void *result = je_calloc(num, size); - - SXEA1(!kit_memory_assert_on_enomem || result, "%s: failed to allocate %zu %zu byte objects", __FUNCTION__, num, size); - kit_counter_incr(KIT_COUNTER_MEMORY_CALLOC); - - if (result == NULL) - kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); /* COVERAGE EXCLUSION: todo: mock calloc() to create failure */ - - KIT_ALLOC_LOG("%s: %d: %p = kit_calloc(%zu, %zu)", file, line, result, num, size); - return result; -} - -void -KIT_ALLOC_MANGLE(kit_free)(void *ptr KIT_ALLOC_SOURCE_PROTO) -{ - if (ptr) { - KIT_ALLOC_LOG("%s: %d: kit_free(%p)", file, line, ptr); - kit_counter_incr(KIT_COUNTER_MEMORY_FREE); - SXEA6(!(((long)ptr) & 7), "ungranular free(%p)", ptr); - } -#if SXE_DEBUG - else if (kit_alloc_diagnostics > 1) - SXEL6("%s: %d: kit_free((nil))", file, line); -#endif - - je_free(ptr); -} - -/*- - * Special case for realloc() since it can behave like malloc() or free() ! - */ -void * -KIT_ALLOC_MANGLE(kit_realloc)(void *ptr, size_t size KIT_ALLOC_SOURCE_PROTO) -{ - void *result = je_realloc(ptr, size); - - if (result == NULL && (size || ptr == NULL)) { - SXEA1(!kit_memory_assert_on_enomem, "%s: failed to reallocate object to %zu bytes", __FUNCTION__, size); - kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); /* COVERAGE EXCLUSION: todo: mock realloc() to create failure */ - } - - if (ptr == NULL && result != NULL) - kit_counter_incr(KIT_COUNTER_MEMORY_MALLOC); - else if (size == 0 && result == NULL) - kit_counter_incr(KIT_COUNTER_MEMORY_FREE); - else - kit_counter_incr(KIT_COUNTER_MEMORY_REALLOC); - - KIT_ALLOC_LOG("%s: %d: %p = kit_realloc(%p, %zu)", file, line, result, ptr, size); - return result; -} - -void * -KIT_ALLOC_MANGLE(kit_reduce)(void *ptr, size_t size KIT_ALLOC_SOURCE_PROTO) -{ - void *result; - - /*- - * We're aiming for two things here: - * - Don't turn realloc(NULL, 0) into malloc(0) - * - If realloc() fails, return the original - */ - if (ptr) { - if ((result = MOCKFAIL(KIT_ALLOC_MANGLE(kit_reduce), NULL, je_realloc(ptr, size))) == NULL) { - if (size == 0) - kit_counter_incr(KIT_COUNTER_MEMORY_FREE); - else { - kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); - result = ptr; - } - } - - if (result != ptr) - KIT_ALLOC_LOG("%s: %d: %p = kit_realloc(%p, %zu)", file, line, result, ptr, size); - } else { - SXEA1(!size, "Cannot kit_reduce() NULL to size %zu", size); - result = NULL; - } - - return result; -} - -__attribute__((malloc)) char * -KIT_ALLOC_MANGLE(kit_strdup)(const char *txt KIT_ALLOC_SOURCE_PROTO) -{ - size_t len = strlen(txt); - void *result = je_malloc(len + 1); - - if (result == NULL) { - SXEA1(!kit_memory_assert_on_enomem, "%s: failed to allocate %zu bytes of memory", __FUNCTION__, len + 1); - kit_counter_incr(KIT_COUNTER_MEMORY_FAIL); /* COVERAGE EXCLUSION: todo: mock malloc() to create failure */ - } - - kit_counter_incr(KIT_COUNTER_MEMORY_MALLOC); - - if (result) - strcpy(result, txt); - - KIT_ALLOC_LOG("%s: %d: %p = kit_strdup(%p[%zu])", file, line, result, txt, len + 1); - return result; -} - -#if SXE_DEBUG - -/*- - * The debug version of jemalloc is built with --enable-debug and - * --enable-fill. We enable junk and redzone by default so that we - * detect memory underflows and some overflows. - * - * It's worth noting that this triggers an assert(): - * char *ptr = je_malloc(8); - * ptr[8] = 'x'; - * je_free(ptr); - * - * But this doesn't, due to the rounding that jemalloc does: - * char *ptr = je_malloc(3); - * ptr[3] = 'x'; - * je_free(ptr); - */ -const char *je_malloc_conf = "junk:true,redzone:true"; - -#else // For the non-debug build, the following diag wrappers are needed for openssl hooks - -void * -kit_malloc_diag(size_t size, const char *file, int line) -{ - SXE_UNUSED_PARAMETER(file); - SXE_UNUSED_PARAMETER(line); - return kit_malloc(size); -} - -void * -kit_realloc_diag(void *ptr, size_t size, const char *file, int line) -{ - SXE_UNUSED_PARAMETER(file); - SXE_UNUSED_PARAMETER(line); - return kit_realloc(ptr, size); -} - -void -kit_free_diag(void *ptr, const char *file, int line) -{ - SXE_UNUSED_PARAMETER(file); - SXE_UNUSED_PARAMETER(line); - kit_free(ptr); -} - -#endif - -size_t -kit_allocated_bytes(void) -{ - static bool kit_mib_init = false; // Set to true once memory mib ids are initialized - static size_t kit_epoch_mib[1]; // Binary mib id of the "epoch" - static size_t kit_epoch_mib_len = 1; // Number of elements in the mib id - static size_t kit_allocated_mib[2]; // Binary mib id of "stats.allocated" - static size_t kit_allocated_mib_len = 2; // Number of elements in the mib id - size_t len, alloc; - uint64_t epoch; - - if (!kit_mib_init) { - // To optimize collection of je_malloc statistics, get binary MIB ids - // - SXEA1(!je_mallctlnametomib("epoch", kit_epoch_mib, &kit_epoch_mib_len) - && !je_mallctlnametomib("stats.allocated", kit_allocated_mib, &kit_allocated_mib_len), - "Failed to generated binary mib id for je_malloc's epoch or stats.allocated"); - kit_mib_init = true; - } - - epoch = 1; - len = sizeof(epoch); - je_mallctlbymib(kit_epoch_mib, kit_epoch_mib_len, &epoch, &len, &epoch, len); - - len = sizeof(alloc); - je_mallctlbymib(kit_allocated_mib, kit_allocated_mib_len, &alloc, &len, NULL, 0); - - if (alloc > kit_memory_allocated_max) - kit_memory_allocated_max = alloc; - - return alloc; -} - -uint64_t -kit_thread_allocated_bytes(void) -{ - static __thread uint64_t *allocatedp, *deallocatedp; - size_t len; - - len = sizeof(allocatedp); - if (!allocatedp) { - je_mallctl("thread.allocatedp", &allocatedp, &len, NULL, 0); - SXEA1(allocatedp, "Couldn't obtain thread.allocatedp, is jemalloc built with --enable-stats?"); - } - - len = sizeof(deallocatedp); - if (!deallocatedp) { - je_mallctl("thread.deallocatedp", &deallocatedp, &len, NULL, 0); - SXEA1(deallocatedp, "Couldn't obtain thread.deallocatedp, is jemalloc built with --enable-stats?"); - } - - return *allocatedp - *deallocatedp; -} - -bool -kit_memory_log_growth(__printflike(1, 2) int (*printer)(const char *format, ...)) -{ - static size_t jemalloc_allocated_max = 0; - size_t jemalloc_allocated_cur = kit_allocated_bytes(); - bool growth = false; - - if (jemalloc_allocated_cur > jemalloc_allocated_max) { - (*printer)("Maximum memory allocated via jemalloc %zu (previous maximum %zu)\n", jemalloc_allocated_cur, - jemalloc_allocated_max); - jemalloc_allocated_max = jemalloc_allocated_cur; - growth = true; - } - -#ifdef __GLIBC__ - static size_t glibc_allocated_max = 0; // High watermark of glibc bytes allocated - struct mallinfo glibc_mallinfo; - - glibc_mallinfo = mallinfo(); - - if ((size_t)glibc_mallinfo.uordblks > glibc_allocated_max) { - (*printer)("Maximum memory allocated via glibc %zu (previous maximum %zu)\n", (size_t)glibc_mallinfo.uordblks, - glibc_allocated_max); - glibc_allocated_max = (size_t)glibc_mallinfo.uordblks; - growth = true; - } -#endif - -#ifdef __linux__ - static size_t rss_max = 0; // High watermark of RSS pages allocated - long long rss_cur; - char buf[256]; // Buffer for /proc//statm contents (e.g. "14214701 8277571 1403 269 0 14138966 0") - ssize_t len; - char *rss_str; - char *end_ptr; - - // If the /proc//statm is open, seek to start and read contents - if (proc_statm_fd >= 0 && lseek(proc_statm_fd, 0, SEEK_SET) != (off_t)-1 - && (len = read(proc_statm_fd, buf, sizeof(buf) - 1)) >= 0) { - buf[len] = '\0'; - - if ((rss_str = strchr(buf, ' '))) { // Find the RSS number in the string - rss_str++; - - // Try to convert to a number and, if a new maximum, log it - if ((rss_cur = strtoll(rss_str, &end_ptr, 10)) > 0 && *end_ptr == ' ' && (size_t)rss_cur > rss_max) { - (*printer)("Maximum memory allocated in RSS pages %zu (previous maximum %zu)\n", (size_t)rss_cur, rss_max); - rss_max = (size_t)rss_cur; - growth = true; - } - } - } -#endif - - return growth; -} - -struct printer_visitor { - int written; - __printflike(1, 2) int (*printer)(const char *format, ...); -}; - -static void -memory_stats_line(void *visitor_void, const char *line) -{ - struct printer_visitor *visitor = visitor_void; - - if (visitor->written < 0) - return; /* COVERAGE EXCLUSION - Test printer failure case */ - - int written = (*visitor->printer)("%s", line); - - if (written >= 0) - visitor->written += written; - else - visitor->written = written; /* COVERAGE EXCLUSION - Test printer failure case */ -} - -bool -kit_memory_log_stats(__printflike(1, 2) int (*printer)(const char *format, ...), const char *options) -{ - struct printer_visitor visitor; - - visitor.written = 0; - visitor.printer = printer; - je_malloc_stats_print(memory_stats_line, &visitor, options ?: "gblxe"); - return visitor.written > 0; -} diff --git a/lib-kit/kit-alloc.h b/lib-kit/kit-alloc.h deleted file mode 100644 index a408ead..0000000 --- a/lib-kit/kit-alloc.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - * the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * SPDX-License-Identifier: MIT - */ - -#ifndef KIT_ALLOC_H -#define KIT_ALLOC_H - -#include -#include -#include - -#include "kit-counters.h" -#include "sxe-log.h" - -struct kit_memory_counters { - kit_counter_t bytes; - kit_counter_t calloc; - kit_counter_t fail; - kit_counter_t free; - kit_counter_t malloc; - kit_counter_t realloc; -}; - -extern struct kit_memory_counters kit_memory_counters; -extern size_t kit_memory_max_allocated; - -#define KIT_COUNTER_MEMORY_BYTES (kit_memory_counters.bytes) -#define KIT_COUNTER_MEMORY_CALLOC (kit_memory_counters.calloc) -#define KIT_COUNTER_MEMORY_FAIL (kit_memory_counters.fail) -#define KIT_COUNTER_MEMORY_FREE (kit_memory_counters.free) -#define KIT_COUNTER_MEMORY_MALLOC (kit_memory_counters.malloc) -#define KIT_COUNTER_MEMORY_REALLOC (kit_memory_counters.realloc) - -#if SXE_DEBUG -/* Use bin/kit-alloc-analyze to parse kit_alloc_diagnostics lines */ -extern int kit_alloc_diagnostics; -#define KIT_ALLOC_SET_LOG(n) do { kit_alloc_diagnostics = (n); } while (0) // Turn kit_alloc log messages on/off in debug -#define KIT_ALLOC_SOURCE_PROTO , const char *file, int line -#define KIT_ALLOC_MANGLE(name) name ## _diag - -#define kit_malloc(size) kit_malloc_diag(size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ -#define kit_reduce(ptr, size) kit_reduce_diag(ptr, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ -#define kit_strdup(txt) kit_strdup_diag(txt, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ -#define kit_calloc(num, size) kit_calloc_diag(num, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ -#define kit_realloc(ptr, size) kit_realloc_diag(ptr, size, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ -#define kit_free(ptr) kit_free_diag(ptr, __FILE__, __LINE__) /* CONVENTION EXCLUSION: these are supposed to look like functions */ - -#else -#define KIT_ALLOC_SET_LOG(n) do { } while (0) -#define KIT_ALLOC_SOURCE_PROTO -#define KIT_ALLOC_MANGLE(name) name -#define KIT_ALLOC_SUFFIX - -extern void *kit_malloc_diag(size_t size, const char *file, int line); -extern void *kit_realloc_diag(void *ptr, size_t size, const char *file, int line); -extern void kit_free_diag(void *ptr, const char *file, int line); -#endif - -extern void kit_memory_initialize(bool assert_on_enomem); -extern bool kit_memory_is_initialized(void); -extern size_t kit_allocated_bytes(void); -extern uint64_t kit_thread_allocated_bytes(void); -extern __attribute__((malloc)) void *KIT_ALLOC_MANGLE(kit_malloc)(size_t size KIT_ALLOC_SOURCE_PROTO); -extern void *KIT_ALLOC_MANGLE(kit_reduce)(void *ptr, size_t size KIT_ALLOC_SOURCE_PROTO); -extern __attribute__((malloc)) char *KIT_ALLOC_MANGLE(kit_strdup)(const char *txt KIT_ALLOC_SOURCE_PROTO); -extern __attribute__((malloc)) void *KIT_ALLOC_MANGLE(kit_calloc)(size_t num, size_t size KIT_ALLOC_SOURCE_PROTO); -extern void *KIT_ALLOC_MANGLE(kit_realloc)(void *ptr, size_t size KIT_ALLOC_SOURCE_PROTO); -extern void KIT_ALLOC_MANGLE(kit_free)(void *ptr KIT_ALLOC_SOURCE_PROTO); -extern bool kit_memory_log_growth(__printflike(1, 2) int (*printer)(const char *format, ...)); -extern bool kit_memory_log_stats(__printflike(1, 2) int (*printer)(const char *format, ...), const char *options); - -#endif diff --git a/lib-kit/kit-arc4random.c b/lib-kit/kit-arc4random.c index b0dd7f4..af80a42 100644 --- a/lib-kit/kit-arc4random.c +++ b/lib-kit/kit-arc4random.c @@ -47,7 +47,7 @@ static __thread size_t rs_have; /* valid bytes at end of rs_buf */ static __thread size_t rs_count; /* bytes till reseed */ static inline void -_rs_init(uint8_t *buf, size_t n) +_rs_init(const uint8_t *buf, size_t n) { if (n >= KEYSZ + IVSZ) { chacha_keysetup(&rs, buf, KEYSZ * 8, 0); @@ -56,7 +56,7 @@ _rs_init(uint8_t *buf, size_t n) } static inline void -_rs_rekey(uint8_t *dat, size_t datlen) +_rs_rekey(const uint8_t *dat, size_t datlen) { /* fill rs_buf with the keystream */ chacha_encrypt_bytes(&rs, rs_buf, rs_buf, RSBUFSZ); @@ -163,7 +163,7 @@ kit_arc4random_uniform(uint32_t upper_bound) SXEA1(upper_bound >= 2, "Invalid upper_bound value %u", upper_bound); /* 2**32 % x == (2**32 - x) % x */ - min = -upper_bound % upper_bound; + min = (uint32_t)(-(int32_t)upper_bound) % upper_bound; /* * This could theoretically loop forever but each retry has diff --git a/lib-kit/kit-base-encode.c b/lib-kit/kit-base-encode.c index 8669ec2..5d96a20 100644 --- a/lib-kit/kit-base-encode.c +++ b/lib-kit/kit-base-encode.c @@ -28,7 +28,7 @@ #include "kit.h" #include -#if SXE_DEBUG +#if SXE_DEBUG || defined MAK_CHECKED #include #endif @@ -268,14 +268,14 @@ decode(uint8_t *out, size_t *olen, const char *in, size_t *ilen, unsigned flags, *olen = 0; buf = 0; while (!done && *ilen < maxilen) - switch (c = cfg->txtmap[(int)in[*ilen]]) { + switch (c = cfg->txtmap[(uint8_t)in[*ilen]]) { case WS: if (flags & KIT_BASE_DECODE_SKIP_WHITESPACE) { (*ilen)++; continue; } - /* FALLTHRU */ + __FALLTHROUGH; case IN: done = 1; break; diff --git a/lib-kit/kit-bits.c b/lib-kit/kit-bits.c index ac4f1a6..3b0544d 100644 --- a/lib-kit/kit-bits.c +++ b/lib-kit/kit-bits.c @@ -57,7 +57,7 @@ kit_bits_equal(const void *s1, const void *s2, size_t num_bits) } bool -kit_bits_isset_any(void *bits, size_t num_bits) +kit_bits_isset_any(const void *bits, size_t num_bits) { size_t byte, whole_bytes = num_bits / 8; diff --git a/lib-kit/kit-bool.c b/lib-kit/kit-bool.c new file mode 100644 index 0000000..dc04821 --- /dev/null +++ b/lib-kit/kit-bool.c @@ -0,0 +1,19 @@ +#include "kit-bool.h" + +bool +kit_bool_from_strn(bool *val, const char *txt, unsigned len) +{ + if ((len == 1 && strncasecmp(txt, "1", len) == 0) + || (len == 3 && strncasecmp(txt, "yes", len) == 0) + || (len == 4 && strncasecmp(txt, "true", len) == 0)) + return *val = true; + + if ((len == 1 && strncasecmp(txt, "0", len) == 0) + || (len == 2 && strncasecmp(txt, "no", len) == 0) + || (len == 5 && strncasecmp(txt, "false", len) == 0)) { + *val = false; + return true; + } + + return false; +} diff --git a/lib-kit/kit-bool.h b/lib-kit/kit-bool.h index d1de694..ffa8bc2 100644 --- a/lib-kit/kit-bool.h +++ b/lib-kit/kit-bool.h @@ -22,9 +22,18 @@ */ #include +#include + +#include "kit-bool-proto.h" static inline const char * kit_bool_to_str(bool flag) { return flag ? "true" : "false"; } + +static inline bool +kit_bool_from_str(bool *val, const char *txt) +{ + return kit_bool_from_strn(val, txt, (unsigned)strlen(txt)); +} diff --git a/lib-kit/kit-deviceid.c b/lib-kit/kit-deviceid.c index 8e790f8..c7fec67 100644 --- a/lib-kit/kit-deviceid.c +++ b/lib-kit/kit-deviceid.c @@ -21,8 +21,6 @@ * SPDX-License-Identifier: MIT */ -#include - #include "kit.h" #include "sxe-log.h" #include "sxe-util.h" @@ -75,3 +73,4 @@ kit_deviceid_cmp(const struct kit_deviceid *deviceid1, const struct kit_deviceid { return deviceid1 == NULL ? (deviceid2 == NULL ? 0 : -1) : deviceid2 == NULL ? 1 : memcmp(deviceid1, deviceid2, sizeof(struct kit_deviceid)); } + diff --git a/lib-kit/kit-fsevent-inotify.c b/lib-kit/kit-fsevent-inotify.c index d8b067c..fa46f7e 100644 --- a/lib-kit/kit-fsevent-inotify.c +++ b/lib-kit/kit-fsevent-inotify.c @@ -23,29 +23,33 @@ #ifdef __linux__ -#include "kit-fsevent.h" +#include +#include #include +#include "kit-fsevent.h" + void kit_fsevent_init(struct kit_fsevent *me) { - SXEA1((me->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC)) != -1, "Couldn't inotify_init()"); + SXEA1((me->fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC)) != -1, "Couldn't inotify_init; error '%s'", strerror(errno)); } void kit_fsevent_fini(struct kit_fsevent *me) { close(me->fd); + me->fd = -1; } int -kit_fsevent_add_watch(struct kit_fsevent *me, const char *mon, int how) +kit_fsevent_add_watch(const struct kit_fsevent *me, const char *mon, int how) { return inotify_add_watch(me->fd, mon, how); } void -kit_fsevent_rm_watch(struct kit_fsevent *me, int fd) +kit_fsevent_rm_watch(const struct kit_fsevent *me, int fd) { inotify_rm_watch(me->fd, fd); } @@ -57,7 +61,7 @@ kit_fsevent_iterator_init(struct kit_fsevent_iterator *me) } kit_fsevent_ev_t * -kit_fsevent_read(struct kit_fsevent *me, struct kit_fsevent_iterator *iter) +kit_fsevent_read(const struct kit_fsevent *me, struct kit_fsevent_iterator *iter) { kit_fsevent_ev_t *ev; diff --git a/lib-kit/kit-fsevent.h b/lib-kit/kit-fsevent.h index 843dbcd..0c7e01f 100644 --- a/lib-kit/kit-fsevent.h +++ b/lib-kit/kit-fsevent.h @@ -79,11 +79,6 @@ struct kit_fsevent_iterator { ssize_t pos, len; }; -void kit_fsevent_init(struct kit_fsevent *me); -void kit_fsevent_fini(struct kit_fsevent *me); -int kit_fsevent_add_watch(struct kit_fsevent *me, const char *mon, int how); -void kit_fsevent_rm_watch(struct kit_fsevent *me, int fd); -void kit_fsevent_iterator_init(struct kit_fsevent_iterator *me); -kit_fsevent_ev_t *kit_fsevent_read(struct kit_fsevent *me, struct kit_fsevent_iterator *iter); +#include "kit-fsevent-inotify-proto.h" #endif diff --git a/lib-kit/kit-graphitelog.c b/lib-kit/kit-graphitelog.c index ac3aa47..60dd705 100644 --- a/lib-kit/kit-graphitelog.c +++ b/lib-kit/kit-graphitelog.c @@ -36,10 +36,11 @@ #include "kit-safe-rw.h" #include "kit-graphitelog.h" -static __thread int graphitelog_fd = -1; +static __thread int graphitelog_fd = -1; static volatile unsigned graphitelog_json_limit; -static volatile unsigned graphitelog_interval = 0; -static volatile bool timetodie = false; +static volatile unsigned graphitelog_interval; +static volatile unsigned graphitelog_timeout_ms = -1; +static volatile bool timetodie; struct kit_graphitelog_buffer { char buf[INT16_MAX]; @@ -60,7 +61,7 @@ kit_graphitelog_complete(struct kit_graphitelog_buffer *buffer) SXEL3("graphitelog buffer overflow - graphite data has been truncated and is invalid"); /* COVERAGE EXCLUSION: Not possible to overflow with current stats */ buffer->json_complete = 1; - kit_safe_write(graphitelog_fd, buffer->buf, (size_t)buffer->pos, -1); + kit_safe_write(graphitelog_fd, buffer->buf, (size_t)buffer->pos, graphitelog_timeout_ms); } } @@ -92,63 +93,74 @@ kit_graphitelog_counter_callback(void *v, const char *key, const char *value) * * @param json_limit Maximum number of counters in a single json line * @param interval Seconds between outputting counters to the graphite log + * @param interval Milliseconds to wait on poll() while writing for the graphite log + */ void -kit_graphitelog_update_set_options(unsigned json_limit, unsigned interval) +kit_graphitelog_update_set_options(unsigned json_limit, unsigned interval, unsigned timeout_ms) { + SXEL6("(json_limit=%u,interval=%u, timeout_ms=%u)", json_limit, interval, timeout_ms); graphitelog_json_limit = json_limit; - graphitelog_interval = interval; - + graphitelog_interval = interval; + graphitelog_timeout_ms = timeout_ms; } /** * Launch the graphite logging thread * - * @param arg Pointer to a struct kit_graphitelog_thread containing the log file - * descriptor and counter slot. + * @param arg Pointer to a struct kit_graphitelog_thread containing the log file descriptor and counter slot. */ void * kit_graphitelog_start_routine(void *arg) { - struct kit_graphitelog_thread *thr = arg; - struct kit_graphitelog_buffer buffer; - uint64_t now_usec, sleep_ms; - bool bedtime, exittime; + uint64_t interval_ns, sleep_ns, wall_ns; + const struct kit_graphitelog_thread *thr = arg; + struct kit_graphitelog_buffer buffer; + struct timespec wall_time, delay_time; - SXEL4("kit_graphitelog_start_routine(): thread started"); + SXEL4("(): thread started"); kit_counters_init_thread(thr->counter_slot); - graphitelog_fd = thr->fd; + if (thr->started) + thr->started(); + delay_time.tv_sec = 0; + graphitelog_fd = thr->fd; + sleep_ns = 0; // Shut gcc up SXEL6("Graphitelog is %s", graphitelog_fd >= 0 ? "enabled" : "disabled"); - for (exittime = false; !exittime; ) { - if (timetodie) - exittime = true; /* This will be our last time through! */ - - SXEA1(graphitelog_interval, "No configuration acquired; cannot run graphitelog thread"); - time(&buffer.now); + for (;;) { + SXEA1(graphitelog_interval, "No configuration acquired; cannot run graphitelog thread"); + SXEA1(clock_gettime(CLOCK_REALTIME, &wall_time) == 0, "Can't get the wall clock time"); + buffer.now = wall_time.tv_sec; if (graphitelog_fd >= 0) { buffer.counter = 0; - kit_counters_mib_text("", &buffer, kit_graphitelog_counter_callback, -1, COUNTER_FLAG_NONE); + kit_counters_mib_text("", &buffer, kit_graphitelog_counter_callback, -1, KIT_COUNTERS_FLAG_NONE); kit_graphitelog_complete(&buffer); } - for (bedtime = true; !timetodie && bedtime; ) { - now_usec = kit_time_nsec() / 1000; + while (!timetodie) { + interval_ns = graphitelog_interval * 1000000000ULL; + SXEA1(clock_gettime(CLOCK_REALTIME, &wall_time) == 0, "Can't get the wall clock time"); + wall_ns = wall_time.tv_sec * 1000000000ULL + wall_time.tv_nsec; + sleep_ns = interval_ns - (wall_ns + interval_ns / 2) % interval_ns; - /* Aim to wake up at the next half-interval */ - sleep_ms = graphitelog_interval * 1000000 - - (now_usec + graphitelog_interval * 500000) % (graphitelog_interval * 1000000); + if (sleep_ns < 1000000000ULL) + break; - if (sleep_ms > 1000000) - /* Sleep for 3/4 second at a time so that we wake up reasonably quickly when it's time to die */ - sleep_ms = 750000; - else - bedtime = false; - usleep(sleep_ms); + /* Sleep for at most 3/4 second at a time so that we wake up reasonably quickly when it's time to die + */ + delay_time.tv_nsec = 750000000; + nanosleep(&delay_time, NULL); } + + if (timetodie) + break; + + delay_time.tv_nsec = sleep_ns; + nanosleep(&delay_time, NULL); } + SXEL4(": thread exiting"); return NULL; } @@ -162,4 +174,3 @@ kit_graphitelog_terminate(void) { timetodie = true; } - diff --git a/lib-kit/kit-graphitelog.h b/lib-kit/kit-graphitelog.h index 84a17e4..1f33cfb 100644 --- a/lib-kit/kit-graphitelog.h +++ b/lib-kit/kit-graphitelog.h @@ -25,6 +25,7 @@ #define GRAPHITELOG_H struct kit_graphitelog_thread { + void (*started)(void); unsigned counter_slot; int fd; }; diff --git a/lib-kit/kit-guid.c b/lib-kit/kit-guid.c index 11c6092..6cfc8b1 100644 --- a/lib-kit/kit-guid.c +++ b/lib-kit/kit-guid.c @@ -21,7 +21,6 @@ * SPDX-License-Identifier: MIT */ -#include #include #include @@ -85,3 +84,4 @@ kit_guid_cmp(const struct kit_guid *guid1, const struct kit_guid *guid2) return guid1 == NULL ? (guid2 == NULL ? 0 : -1) : guid2 == NULL ? 1 : memcmp(guid1, guid2, sizeof(struct kit_guid)); } + diff --git a/lib-kit/kit-hostname.c b/lib-kit/kit-hostname.c index efa5efc..5028904 100644 --- a/lib-kit/kit-hostname.c +++ b/lib-kit/kit-hostname.c @@ -21,41 +21,39 @@ * SPDX-License-Identifier: MIT */ -#include "kit.h" - +#include #include #include #include #include +#include "kit.h" +#include "kit-mockfail.h" +#include "kit-time.h" + #define HOSTNAME_LOOKUP_INTERVAL 60 /* Lookup the hostname every 60 seconds from each thread */ +/** + * Get the hostname (efficiently if the cuurent thread updates the cached kit-time with kit_time_cached_update) + * + * @note Once a non-zero value is returned by kit_time_cached_sec, this function will not update the hostname again until the + * cached seconds value increases by 60s. This means less system calls are made but changes will take a minute to notice. + */ const char * kit_hostname(void) { - static __thread char hostname[MAXHOSTNAMELEN]; - static __thread int32_t then = -1; - int32_t now; + static __thread char hostname[MAXHOSTNAMELEN]; + static __thread uint32_t then = 0; + uint32_t now; now = kit_time_cached_sec(); - if (now == 0 || now > then + HOSTNAME_LOOKUP_INTERVAL) { - if (gethostname(hostname, sizeof(hostname)) != 0) - snprintf(hostname, sizeof(hostname), "Amnesiac"); /* COVERAGE EXCLUSION: todo: Make gethostname() fail */ - then = now; - } - return hostname; -} + if (then == 0 || now > then + HOSTNAME_LOOKUP_INTERVAL) { + if (MOCKERROR(kit_hostname, -1, EFAULT, gethostname(hostname, sizeof(hostname))) != 0) + snprintf(hostname, sizeof(hostname), "Amnesiac"); -const char * -kit_short_hostname(void) -{ - const char *hostname; - char *dot; - - hostname = kit_hostname(); - if ((dot = strchr(hostname, '.')) != NULL && (dot = strchr(dot + 1, '.')) != NULL) - *dot = '\0'; /* COVERAGE EXCLUSION: todo: Some hostnames contain two '.'s, some don't */ + then = now; + } return hostname; } diff --git a/lib-kit/kit-infolog.c b/lib-kit/kit-infolog.c index 9a1bf98..f234cf1 100644 --- a/lib-kit/kit-infolog.c +++ b/lib-kit/kit-infolog.c @@ -21,8 +21,6 @@ * SPDX-License-Identifier: MIT */ -#include -#include #include #include #include @@ -33,6 +31,8 @@ #endif #include "kit-infolog.h" +#include "kit-safe-rw.h" +#include "kit-time.h" #define DELAY_BETWEEN_IDENTICAL_LOG_ENTRIES 1 #define ALLOWED_BURST_FOR_IDENTICAL_LOG_ENTRIES 10U @@ -42,20 +42,20 @@ unsigned kit_infolog_flags; __printflike(1, 2) int kit_infolog_printf(const char *format, ...) { - static __thread char buf[KIT_INFOLOG_MAX_LINE]; - static __thread char previous_buf[KIT_INFOLOG_MAX_LINE]; - static __thread uint32_t last_log_ts = 0U; - static __thread unsigned burst_counter = 0U; + static __thread char buf[KIT_INFOLOG_MAX_LINE]; + static __thread char previous_buf[KIT_INFOLOG_MAX_LINE]; + static __thread uint32_t last_log_ts = 0; + static __thread unsigned burst_counter = 0; uint32_t now = kit_time_sec(); - int i, len; - va_list ap; + int i, len; + va_list ap; #ifdef __APPLE__ pid_t thread_id = syscall(SYS_thread_selfid); #elif defined(__FreeBSD__) pthread_t thread_id = pthread_self(); #else /* __linux__ */ - pid_t thread_id = syscall(SYS_gettid); + pid_t thread_id = gettid(); #endif len = snprintf(buf, sizeof(buf), "%ld ", (long)thread_id); diff --git a/lib-kit/kit-queue.h b/lib-kit/kit-queue.h index 2f4113a..576fc97 100644 --- a/lib-kit/kit-queue.h +++ b/lib-kit/kit-queue.h @@ -38,9 +38,12 @@ #include #if SXE_DEBUG +#define QUEUE_MACRO_DEBUG_TRACE #define QUEUE_MACRO_DEBUG_TRASH +#define panic(...) SXEA1(0, __VA_ARGS__) #endif + /* * This file defines four types of data structures: singly-linked lists, * singly-linked tail queues, lists and tail queues. @@ -137,6 +140,13 @@ struct qm_trace { #define TRACEBUF struct qm_trace trace; #define TRACEBUF_INITIALIZER { __LINE__, 0, __FILE__, NULL } , +#define QMD_TRACE_INIT(head) do { \ + (head)->trace.prevline = 0; \ + (head)->trace.prevfile = NULL; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + #define QMD_TRACE_HEAD(head) do { \ (head)->trace.prevline = (head)->trace.lastline; \ (head)->trace.prevfile = (head)->trace.lastfile; \ @@ -154,6 +164,7 @@ struct qm_trace { #else /* !QUEUE_MACRO_DEBUG_TRACE */ #define QMD_TRACE_ELEM(elem) #define QMD_TRACE_HEAD(head) +#define QMD_TRACE_INIT(head) #define TRACEBUF #define TRACEBUF_INITIALIZER #endif /* QUEUE_MACRO_DEBUG_TRACE */ @@ -210,7 +221,7 @@ struct { \ /* * Singly-linked List functions. */ -#if (defined(_KERNEL) && defined(INVARIANTS)) +#if (defined(_KERNEL) && defined(INVARIANTS)) || SXE_DEBUG #define QMD_SLIST_CHECK_PREVPTR(prevp, elm) do { \ if (*(prevp) != (elm)) \ panic("Bad prevptr *(%p) == %p != %p", \ @@ -478,7 +489,7 @@ struct { \ * List functions. */ -#if (defined(_KERNEL) && defined(INVARIANTS)) +#if (defined(_KERNEL) && defined(INVARIANTS)) || SXE_DEBUG /* * QMD_LIST_CHECK_HEAD(LIST_HEAD *head, LIST_ENTRY NAME) * @@ -657,7 +668,7 @@ struct { \ /* * Tail queue functions. */ -#if (defined(_KERNEL) && defined(INVARIANTS)) +#if (defined(_KERNEL) && defined(INVARIANTS)) || SXE_DEBUG /* * QMD_TAILQ_CHECK_HEAD(TAILQ_HEAD *head, TAILQ_ENTRY NAME) * @@ -768,7 +779,7 @@ struct { \ #define TAILQ_INIT(head) do { \ TAILQ_FIRST((head)) = NULL; \ (head)->tqh_last = &TAILQ_FIRST((head)); \ - QMD_TRACE_HEAD(head); \ + QMD_TRACE_INIT(head); \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ diff --git a/lib-kit/kit-safe-rw.c b/lib-kit/kit-safe-rw.c index dab8d03..561ba27 100644 --- a/lib-kit/kit-safe-rw.c +++ b/lib-kit/kit-safe-rw.c @@ -53,7 +53,7 @@ kit_safe_write(const int fd, const void *const buf_, size_t count, const int tim } SXE_EARLY_OUT: - result = (ssize_t)(buf - (const char *)buf_); + result = buf - (const char *)buf_; SXEL7("kit_safe_write(fd=%d, buf=%p, count=%zu, timeout=%d){} // %zd", fd, buf_, count, timeout, result); return result; @@ -81,7 +81,7 @@ kit_safe_read(const int fd, void *const buf_, size_t count) buf += readnb; } while (count > 0); - result = (ssize_t)(buf - (unsigned char *)buf_); + result = buf - (unsigned char *)buf_; SXE_EARLY_OUT: SXER7("return %zd", result); diff --git a/lib-kit/kit-sortedarray.c b/lib-kit/kit-sortedarray.c index 6dab3a8..1034428 100644 --- a/lib-kit/kit-sortedarray.c +++ b/lib-kit/kit-sortedarray.c @@ -21,29 +21,32 @@ * SPDX-License-Identifier: MIT */ -#include #include #include -#include -#include "kit.h" #include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-log.h" +#include "kit-sortedarray.h" /** - * Add an element to a sorted array + * Add an element to a sorted array with flags * * @param type Object definining the type of elements of the array * @param array Pointer to the address of the array in malloced storage; *array == NULL to allocate first time * @param count Pointer to the count of array elements * @param alloc Pointer to the number of array slots allocated; used as the initial allocation if *array == NULL * @param element Address of the element to insert - * @param flags KIT_SORTEDARRAY_DEFAULT or a combination of KIT_SORTEDARRAY_ALLOW_INSERTS | KIT_SORTEDARRAY_ALLOW_GROWTH - * | KIT_SORTEDARRAY_ZERO_COPY + * @param flags KIT_SORTEDARRAY_DEFAULT or one or more of KIT_SORTEDARRAY_ALLOW_INSERTS | KIT_SORTEDARRAY_ALLOW_GROWTH + * | KIT_SORTEDARRAY_ZERO_COPY * - * @return Pointer to inserted element (uninitialized if KIT_SORTEDARRAY_ZERO_COPY passed), or NULL on a duplicate or error + * @return Pointer to inserted element (uninitialized if KIT_SORTEDARRAY_ZERO_COPY in flags), or NULL on a duplicate or error + * + * @note If KIT_SORTEDARRAY_ZERO_COPY is set in flags, caller should assign the returned pointer a value immediately to avoid + * a possible unsorted array */ void * -kit_sortedarray_add(const struct kit_sortedelement_class *type, void **array, unsigned *count, unsigned *alloc, +kit_sortedarray_add(const struct kit_sortedarray_class *type, void **array, unsigned *count, unsigned *alloc, const void *element, unsigned flags) { unsigned pos; @@ -61,13 +64,12 @@ kit_sortedarray_add(const struct kit_sortedelement_class *type, void **array, un key = (const uint8_t *)element + type->keyoffset; if ((cmp = type->cmp(slot + type->keyoffset, key)) == 0) // Already exists - return false; + return NULL; if (cmp > 0) { // Out of order // This (actually the memmove() below) is expensive when building large pref blocks if (flags & KIT_SORTEDARRAY_ALLOW_INSERTS) { - pos = kit_sortedarray_find(type, *array, *count, key, &match); - slot = (uint8_t *)*array + pos * type->size; + pos = kit_sortedarray_find(type, *array, *count, key, &match); if (match) // Already exists return NULL; @@ -89,9 +91,9 @@ kit_sortedarray_add(const struct kit_sortedelement_class *type, void **array, un // First time through, allocate the array; if more space is needed, reallocate if (!*array || more) { - if (!(new_array = MOCKFAIL(kit_sortedarray_add, NULL, kit_realloc(*array, (*alloc + more) * type->size)))) { + if (!(new_array = MOCKERROR(kit_sortedarray_add, NULL, ENOMEM, kit_realloc(*array, (*alloc + more) * type->size)))) { SXEL2("Failed to allocate array of %u %zu byte elements", *alloc + more, type->size); - return false; + return NULL; } *array = new_array; @@ -106,11 +108,43 @@ kit_sortedarray_add(const struct kit_sortedelement_class *type, void **array, un if (!(flags & KIT_SORTEDARRAY_ZERO_COPY)) memcpy(slot, element, type->size); +#if SXE_DEBUG + /** + * In the case of KIT_SORTEDARRAY_ZERO_COPY - + * corrupt memory intentionally, forcing caller + * to populate the returned memory address + */ + else + memset(slot, 0xa5, type->size); +#endif (*count)++; return slot; } +/** + * Add an element to a sorted array using flags from the kit_sortedarray_class + * + * @param type Object definining the type of elements of the array + * @param array Pointer to the address of the array in malloced storage; *array == NULL to allocate first time + * @param count Pointer to the count of array elements + * @param alloc Pointer to the number of array slots allocated; used as the initial allocation if *array == NULL + * @param element Address of the element to insert + * + * @return Pointer to inserted element (uninitialized if type has flag KIT_SORTEDARRAY_ZERO_COPY), or NULL on a duplicate or + * error + * + * @note Affected by type->flags KIT_SORTEDARRAY_ALLOW_INSERTS, KIT_SORTEDARRAY_ALLOW_GROWTH, and KIT_SORTEDARRAY_ZERO_COPY. If + * KIT_SORTEDARRAY_ZERO_COPY is set caller SHOULD assign the returned pointer a value immediately to avoid a possible + * unsorted array. + */ +void * +kit_sortedarray_add_element(const struct kit_sortedarray_class *type, void **array, unsigned *count, unsigned *alloc, + const void *element) +{ + return kit_sortedarray_add(type, array, count, alloc, element, type->flags); +} + /** * Search a sorted array for a key, returning the closest match * @@ -120,15 +154,15 @@ kit_sortedarray_add(const struct kit_sortedelement_class *type, void **array, un * @param key The key to search for * @param match_out Pointer to a bool set to true if there was an exact match and to false otherwise * - * @return Index of the element found on exact match, the index of the first element whose key is greater, or count if there is - * no greater element. + * @return Index of the element found on exact match, the index of the first element whose key is greater, count if there is + * no greater element, or ~0U if the compare function supports failures and returns one. */ unsigned -kit_sortedarray_find(const struct kit_sortedelement_class *type, const void *array, unsigned count, const void *key, +kit_sortedarray_find(const struct kit_sortedarray_class *type, const void *array, unsigned count, const void *key, bool *match_out) { unsigned i, lim, pos; - int cmp; + int cmp; *match_out = false; @@ -142,31 +176,148 @@ kit_sortedarray_find(const struct kit_sortedelement_class *type, const void *arr break; } + if (cmp == INT_MAX && (type->flags & KIT_SORTEDARRAY_CMP_CAN_FAIL)) { + SXEL2("(me=?, count=%u, key=?) // return ~0U due to comparison failure", count); + return ~0U; + } + if (cmp > 0) { pos = i + 1; lim--; } } - SXEA6(pos == count || type->cmp(key, (const uint8_t *)array + type->size * pos + type->keyoffset) <= 0, - "Unexpected pos %u looking for %s, landed on %s", pos, (*type->fmt)(key), - (*type->fmt)((const uint8_t *)array + type->size * pos + type->keyoffset)); + if (type->fmt) { + SXEA6(pos == count || type->cmp(key, (const uint8_t *)array + type->size * pos + type->keyoffset) <= 0, + "Unexpected pos %u looking for %s, landed on %s", pos, (*type->fmt)(key), + (*type->fmt)((const uint8_t *)array + type->size * pos + type->keyoffset)); - SXEL7("%s(me=?, count=%u, key=%s) // return %u, val %s, prev %s, next %s", __FUNCTION__, count, (*type->fmt)(key), pos, - pos < count ? type->fmt((const uint8_t *)array + type->size * pos + type->keyoffset) : "NOT FOUND", - pos > 0 ? type->fmt((const uint8_t *)array + type->size * (pos - 1) + type->keyoffset) : "NONE", - pos + 1 < count ? type->fmt((const uint8_t *)array + type->size * (pos + 1) + type->keyoffset) : "NONE"); + SXEL7("(me=?, count=%u, key=%s) // return %u, val %s, prev %s, next %s", count, (*type->fmt)(key), pos, + pos < count ? type->fmt((const uint8_t *)array + type->size * pos + type->keyoffset) : "NOT FOUND", + pos > 0 ? type->fmt((const uint8_t *)array + type->size * (pos - 1) + type->keyoffset) : "NONE", + pos + 1 < count ? type->fmt((const uint8_t *)array + type->size * (pos + 1) + type->keyoffset) : "NONE"); + } + else + SXEL7("(me=?, count=%u, key=?) // return %u", count, pos); return pos; } const void * -kit_sortedarray_get(const struct kit_sortedelement_class *class, const void *array, unsigned count, const void *key) +kit_sortedarray_get(const struct kit_sortedarray_class *class, const void *array, unsigned count, const void *key) { - bool match; + bool match = false; SXEA6(array || count == 0, "kit_sortedarray_get called with a NULL array and count %u", count); unsigned pos = array ? kit_sortedarray_find(class, array, count, key, &match) : count; return match ? (const uint8_t *)array + class->size * pos : NULL; } + +#define SORTEDARRAY_ELEM(type, array, idx) ((const char *)(array) + (idx) * (type)->size) +#define SORTEDARRAY_KEY( type, array, idx) (SORTEDARRAY_ELEM(type, array, idx) + (type)->keyoffset) + +/** + * Visit every element of the left array that is in the right array + * + * @param type An object defining the type of element in the array, including the visit function + * @param left The left side array + * @param left_count The number of elements in the left side array + * @param right The right side array + * @param right_count The number of elements in the right side array + * + * @return true if the arrays were entirely intersected, false if the intersection was terminated by the type's visit function + * or due to an error + */ +bool +kit_sortedarray_intersect(struct kit_sortedarray_class *type, const void *left, unsigned left_count, const void *right, + unsigned right_count) +{ + unsigned idx, median; + bool match; + + if (left_count == 0 || right_count == 0) + return true; + + if (left_count == 1) { + if (kit_sortedarray_find(type, right, right_count, (const char *)left + type->keyoffset, &match) == ~0U + && (type->flags & KIT_SORTEDARRAY_CMP_CAN_FAIL)) + return false; + + if (match && !type->visit(type->value, left)) // Visit element + return false; + + return true; + } + + median = left_count / 2; + + if ((idx = kit_sortedarray_find(type, right, right_count, SORTEDARRAY_KEY(type, left, median), &match)) == ~0U + && (type->flags & KIT_SORTEDARRAY_CMP_CAN_FAIL)) + return false; + + if (median > 0 && idx > 0) // If median is not the 1st element in left and there are elements in right before the match + if (!kit_sortedarray_intersect(type, left, median, right, idx)) + return false; + + if (match) { + if (!type->visit(type->value, SORTEDARRAY_ELEM(type, left, median))) // Visit element + return false; + + median++; // Move past the median in the left array + idx++; // Move past the match in the right array + + if (median == left_count - 1) { // There's exactly one left element after the (previous) median + if (idx < right_count) { // There's at least one right element after the match + if (kit_sortedarray_find(type, SORTEDARRAY_ELEM(type, right, idx), right_count - idx, + SORTEDARRAY_KEY(type, left, median), &match) == ~0U + && (type->flags & KIT_SORTEDARRAY_CMP_CAN_FAIL)) + return false; + + if (match && !type->visit(type->value, SORTEDARRAY_ELEM(type, left, median))) // Visit element + return false; + } + + return true; + } + } + else + median++; // Move past the median in the left array + + if (idx < right_count) + return kit_sortedarray_intersect(type, SORTEDARRAY_KEY(type, left, median), left_count - median, + SORTEDARRAY_KEY(type, right, idx), right_count - idx); + + return true; +} + + +/** + * Delete key element from sorted array, returning true if the key is found and removed and decrementing count. + * + * @param type Defines the type of the elements + * @param array The array to search + * @param count Number of elements in the array + * @param key The key to delete + * + * @return Boolean value equaling true if the key is found and deleted successfully, false otherwise. + */ +bool +kit_sortedarray_delete(const struct kit_sortedarray_class *type, void *array, unsigned *count, const void *key) +{ + bool match; + unsigned pos; + + if (!array || *count == 0) + return false; + + pos = kit_sortedarray_find(type, array, *count, key, &match); + + if (!match) + return false; + + memmove((uint8_t *)array + pos * type->size, (uint8_t *)array + (pos + 1) * type->size, (size_t)(*count - pos - 1) * type->size); + (*count)--; + + return true; +} diff --git a/lib-kit/kit-sortedarray.h b/lib-kit/kit-sortedarray.h new file mode 100644 index 0000000..4f8464e --- /dev/null +++ b/lib-kit/kit-sortedarray.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef KIT_SORTEDARRAY_H +#define KIT_SORTEDARRAY_H + +#include +#include +#include + +#define KIT_SORTEDARRAY_DEFAULT 0 // No special behaviours +#define KIT_SORTEDARRAY_ALLOW_INSERTS 0x01 // Elements can be added to a sorted array out of order (expensive!) +#define KIT_SORTEDARRAY_ALLOW_GROWTH 0x02 // Sorted array is allowed to grow dynamically +#define KIT_SORTEDARRAY_ZERO_COPY 0x04 // Don't copy the key into the array, just return a pointer to the element +#define KIT_SORTEDARRAY_CMP_CAN_FAIL 0x08 // Comparing two values can fail, returning INT_MAX + +#define kit_sortedelement_class kit_sortedarray_class // For backward compatibility + +struct kit_sortedarray_class { + size_t size; // Sizeof the element (including padding if not packed) + size_t keyoffset; // Offset of the key within the element + int (*cmp)(const void *, const void *); // Comparitor for element keys + const char *(*fmt)(const void *); // Formatter for element keys that returns the LRU of 4 static buffers + bool (*visit)(void *, const void *); // Visitor for matching elements in intersections, false on error + void *value; // Arbitrary value passed as first parameter to visit + unsigned flags; // Flags as defined above +}; + +#include "kit-sortedarray-proto.h" + +static inline const char * +kit_sortedarray_element_to_str(const struct kit_sortedarray_class *type, const void *array, unsigned pos) +{ + return type->fmt((const uint8_t *)array + type->size * pos + type->keyoffset); +} + +#endif diff --git a/lib-kit/kit-strto.c b/lib-kit/kit-strto.c index 1689e8e..31ce1ff 100644 --- a/lib-kit/kit-strto.c +++ b/lib-kit/kit-strto.c @@ -162,3 +162,26 @@ kit_strtod(const char *str, char **endptr) return ret; } + +/** + * @note Sets errno to 0 on success or an error code on failure + */ +uint32_t +kit_strtou32(const char *str, char **endptr, int base) +{ + unsigned long ret; + + errno = 0; + + ret = strtoul(str, endptr, base); + if ((ret == 0) && (errno == 0)) { + check_zero_result(str, endptr, base); + } + + if (ret != (uint32_t)ret) { + errno = ERANGE; + return UINT32_MAX; + } + + return (uint32_t)ret; +} diff --git a/lib-kit/kit-tensor-private.h b/lib-kit/kit-tensor-private.h new file mode 100644 index 0000000..2fa52fa --- /dev/null +++ b/lib-kit/kit-tensor-private.h @@ -0,0 +1,9 @@ +#ifndef KIT_TENSOR_PRIVATE_H +#define KIT_TENSOR_PRIVATE_H + +#if defined(SXE_DEBUG) || defined(SXE_COVERAGE) // Define unique tags for mockfails +# define KIT_TENSOR_MAKE_BEGIN ((const char *)kit_tensor_init + 0) +# define KIT_TENSOR_MATMUL ((const char *)kit_tensor_init + 1) +#endif + +#endif diff --git a/lib-kit/kit-tensor.c b/lib-kit/kit-tensor.c new file mode 100644 index 0000000..5d0cd35 --- /dev/null +++ b/lib-kit/kit-tensor.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-tensor.h" +#include "kit-tensor-private.h" +#include "sxe-log.h" + +bool +kit_tensor_make_begin(struct kit_tensor *tensor, const char *dim_line, size_t *num_values) +{ + unsigned consumed; + + *num_values = 0; + + if (memcmp(dim_line, "DIMS:", 5) != 0) { + SXEL2(": Dimension line should begin with 'DIMS:', not '%.5s'", dim_line); + return false; + } + + for (dim_line += 5, tensor->num_dims = 0; tensor->num_dims < 2; tensor->num_dims++) { + if (sscanf(dim_line, "%u%n", &tensor->dimension[tensor->num_dims], &consumed) != 1) { + SXEL2(": Unsigned integer expected in dimension line not '%.10s'", dim_line); + return false; + } + + dim_line += consumed; + + if (*dim_line == ':') + dim_line++; + else if (*dim_line == '\0') + break; + else { + SXEL2(": Expected ':' or EOL in dimension line after value, not '%c'", *dim_line); + return false; + } + } + + tensor->num_dims++; + + if (!(tensor->value = MOCKFAIL(KIT_TENSOR_MAKE_BEGIN, NULL, kit_malloc(sizeof(*tensor->value) * kit_tensor_num_values(tensor))))) { + SXEL2(": Failed to allocate %zu bytes for tensor values", sizeof(*tensor->value) * kit_tensor_num_values(tensor)); + return false; + } + + return true; +} + +bool +kit_tensor_make_add_values(struct kit_tensor *tensor, const char *values_line, size_t *num_values) +{ + unsigned consumed; + + while (*values_line) { + if (*num_values >= kit_tensor_num_values(tensor)) { + SXEL2(": Array is full but line still contains '%s'", values_line); + return false; + } + + if (sscanf(values_line, "%f%n", &tensor->value[*num_values], &consumed) != 1) { + SXEL2(": Float expected in value line not '%.20s'", values_line); + return false; + } + + (*num_values)++; + values_line += consumed; + + if (*values_line == ',') + values_line++; + else if (*values_line == '\0') + break; + else { + SXEL2(": Expected ',' or EOL in line after value, not '%c'", *values_line); + return false; + } + } + + return true; +} + +bool +kit_tensor_make_end(const struct kit_tensor *tensor, size_t num_values) +{ + if (num_values != kit_tensor_num_values(tensor)) { + SXEL2(": Failed to end tensor construction; got %zu values, expected %zu", kit_tensor_num_values(tensor), num_values); + return false; + } + + return true; +} + +void +kit_tensor_fini(struct kit_tensor *a) +{ + a->num_dims = 0; + memset(a->dimension, '\0', sizeof(a->dimension) ); + kit_free(a->value); +} + +void +kit_tensor_init(struct kit_tensor *a) +{ + a->num_dims = 0; + for (int i = 0; i < KIT_TENSOR_MAX_DIMS; i++) + a->dimension[i] = 0; +} + +void +kit_array_inner_product(const float *a, int askip, const float *b, int bskip, int len, float *c) +{ + *c = 0; + for (int i = 0; i < len; i++) + *c += a[i * askip] * b[i * bskip]; +} + +// kit_tensor_matmul function assumes that output tensor c is allocated and initialized +bool +kit_tensor_matmul(const struct kit_tensor *a, const struct kit_tensor *b, struct kit_tensor *c) +{ + bool ret = false; + unsigned dim, index; + + SXEA1(c->sz, "Tensor size %d has no value", c->sz); + SXEA1(c->b0, "Tensor offset b0 %d has no value", c->b0); + SXEA1(c->b1, "Tensor offset b1 %d has no value", c->b1); + + dim = a->dimension[2]; + if ((dim != b->dimension[1]) || + (a->dimension[0] != b->dimension[0]) || + (kit_tensor_matmul_sz(a, b) != kit_tensor_sz(c))) + goto OUT; + + c->num_dims = 2; + c->dimension[0] = a->dimension[0]; + c->dimension[1] = a->dimension[1]; + c->dimension[2] = b->dimension[2]; + + index = 0; + for (unsigned k = 0; k < a->dimension[0]; k++) + for (unsigned m = 0; m < a->dimension[1]; m++) + for (unsigned n = 0; n < b->dimension[2]; n++) { + kit_array_inner_product(a->value + k * a->dimension[1] * a->dimension[2] + m * a->dimension[2], 1, b->value + k * b->dimension[1] * b->dimension[2] + n, b->dimension[2], dim, &c->value[index]); + index += 1; + } + + ret = true; +OUT: + return ret; +} + +// kit_tensor_embedding function assumes that output tensor out is allocated and initialized +bool +kit_tensor_embedding(const struct kit_tensor *a, const unsigned *indices, unsigned n, struct kit_tensor *out) +{ + bool ret = false; + unsigned wrap; + + if (a->num_dims == 2) { + wrap = a->dimension[2]; + out->dimension[0] = 1; + out->dimension[1] = n; + out->dimension[2] = a->dimension[2]; + } else if (a->num_dims == 3) { + wrap = a->dimension[1] * a->dimension[2]; + out->dimension[0] = n; + out->dimension[1] = a->dimension[1]; + out->dimension[2] = a->dimension[2]; + } else + goto OUT; + + out->num_dims = a->num_dims; + + SXEA1(out->sz, "Tensor size %d has no value", out->sz); + SXEA1(out->b0, "Tensor offset b0 %d has no value", out->b0); + SXEA1(out->b1, "Tensor offset b1 %d has no value", out->b1); + + for (unsigned i = 0; i < n; i++) { + memcpy(&out->value[i * wrap], &a->value[wrap * indices[i]], sizeof(*a->value) * wrap); + } + + ret = true; +OUT: + return ret; +} + +// kit_tensor_permute function assumes that output tensor b is allocated and initialized +bool +kit_tensor_permute(const struct kit_tensor *a, const unsigned dims[KIT_TENSOR_MAX_DIMS], struct kit_tensor *b) +{ + bool ret = false; + unsigned index, idx; + unsigned base[KIT_TENSOR_MAX_DIMS] = {0}; + + if (kit_tensor_sz(a) != kit_tensor_sz(b)) + goto OUT; + + base[0] = kit_tensor_sz(a) / a->dimension[0]; + base[1] = base[0] / a->dimension[1]; + base[2] = base[1] / a->dimension[2]; + b->dimension[0] = a->dimension[dims[0]]; + b->dimension[1] = a->dimension[dims[1]]; + b->dimension[2] = a->dimension[dims[2]]; + + SXEA1(b->sz, "Tensor size %d has no value", b->sz); + SXEA1(b->b0, "Tensor offset b0 %d has no value", b->b0); + SXEA1(b->b1, "Tensor offset b1 %d has no value", b->b1); + + index = 0; + for (unsigned k = 0; k < b->dimension[0]; k++) + for (unsigned m = 0; m < b->dimension[1]; m++) + for (unsigned n = 0; n < b->dimension[2]; n++) { + idx = base[dims[0]] * k + base[dims[1]] * m + base[dims[2]] * n; + b->value[index] = a->value[idx]; + index++; + } + + ret = true; +OUT: + return ret; +} + +bool +kit_tensor_transpose(const struct kit_tensor *a, unsigned i, unsigned j, struct kit_tensor *b) +{ + unsigned dims[KIT_TENSOR_MAX_DIMS] = {0, 1, 2}, tmp; + + tmp = dims[i]; + dims[i] = dims[j]; + dims[j] = tmp; + return kit_tensor_permute(a, dims, b); +} + +bool +kit_tensor_conv1d(const struct kit_tensor *a, const struct kit_tensor *b, const unsigned stride, struct kit_tensor *c) +{ + bool ret = false; + unsigned kernel = a->dimension[2], unfolds, foldloc, index; + + SXEA6(kernel, "A valid tensor is required"); + + if (a->dimension[0] != b->dimension[1]) + goto OUT; + + if (a->dimension[0] != a->dimension[1]) + goto OUT; + + if (a->dimension[0] != c->dimension[1]) + goto OUT; + + unfolds = 0; + for (int i = 0; i + kernel <= b->dimension[2]; i+= stride) + unfolds++; + + if (unfolds != c->dimension[2]) + goto OUT; /* COVERAGE EXCLUSION - Not sure how to test */ + + if (b->dimension[0] != c->dimension[0]) + goto OUT; + + index = 0; + + for (unsigned batch_item = 0; batch_item < b->dimension[0]; batch_item++) + for (unsigned k = 0; k < a->dimension[0]; k++) { + for (foldloc = 0; foldloc + a->dimension[2] <= b->dimension[2]; foldloc += stride) { + c->value[index] = 0; + for (unsigned m = 0; m < a->dimension[1]; m++) + for (unsigned n = 0; n < a->dimension[2]; n++) + c->value[index] += a->value[kit_dotp2(a->b0, a->b1, k, m) + n] * b->value[kit_dotp2(b->b0, b->b1, batch_item, m) + foldloc + n]; + index++; + } + } + ret = true; +OUT: + return ret; +} + +bool +kit_tensor_batchnorm1d_affine(const struct kit_tensor *bn, const struct kit_tensor *x, struct kit_tensor *c) +{ + bool ret = false; + unsigned index; + + if (!(bn->dimension[2] == 4 && bn->dimension[1] == x->dimension[1] && bn->dimension[0] == 1)) + goto OUT; + + if (!(x->dimension[0] == c->dimension[0] && x->dimension[1] == c->dimension[1] && x->dimension[2] == c->dimension[2])) + goto OUT; + + index = 0; + + for (unsigned k = 0; k < x->dimension[0]; k++) + for (unsigned m = 0; m < x->dimension[1]; m++) + for (unsigned n = 0; n < x->dimension[2]; n++) { + c->value[index] = (bn->value[kit_dotp2(bn->b0, bn->b1, 0, m) + 2] * + (x->value[kit_dotp2(x->b0, x->b1, k, m) + n] - bn->value[kit_dotp2(bn->b0, bn->b1, 0, m)]) / + sqrtf(bn->value[kit_dotp2(bn->b0, bn->b1, 0, m) + 1])) + bn->value[kit_dotp2(bn->b0, bn->b1, 0, m) + 3]; + index++; + } + + ret = true; +OUT: + return ret; +} + +bool +kit_tensor_apply(struct kit_tensor *a, float (*cb)(float x)) +{ + for (unsigned i = 0; i < kit_tensor_sz(a); i++) + a->value[i] = cb(a->value[i]); + + return true; +} + +float +kit_relu(float x) +{ + return x > 0 ? x : 0; +} + +bool +kit_tensor_relu(struct kit_tensor *a) +{ + return kit_tensor_apply(a, kit_relu); +} + +bool +kit_tensor_flatten(struct kit_tensor *a) +{ + for (unsigned i = 0; i < KIT_TENSOR_MAX_DIMS - 1; i++) { + a->dimension[KIT_TENSOR_MAX_DIMS - 1] *= a->dimension[i]; + a->dimension[i] = 1; + } + return true; +} + +bool +kit_tensor_sum(const struct kit_tensor *a, const struct kit_tensor *b, struct kit_tensor *c) +{ + bool ret = false; + + if (a->dimension[0] != b->dimension[0] || + a->dimension[1] != b->dimension[1] || + a->dimension[2] != b->dimension[2] || + b->dimension[0] != c->dimension[0] || + b->dimension[1] != c->dimension[1] || + b->dimension[2] != c->dimension[2]) + goto OUT; + + for (unsigned i = 0; i < kit_tensor_sz(a); i++) + c->value[i] = a->value[i] + b->value[i]; + ret = true; + +OUT: + return ret; +} + +void +kit_tensor_zeros(struct kit_tensor *a) +{ + memset(a->value, 0, a->sz * sizeof(*a->value)); +} + +bool +kit_tensor_dimset(struct kit_tensor *a, const unsigned dims[KIT_TENSOR_MAX_DIMS]) +{ + a->dimension[0] = dims[0]; + a->dimension[1] = dims[1]; + a->dimension[2] = dims[2]; + a->num_dims = a->dimension[0] > 1 ? 3 : (a->dimension[1] > 1 ? 2 : 1); + + // Set tensor size and offsets + a->sz = a->dimension[0] * a->dimension[1] * a->dimension[2]; + a->b0 = a->dimension[1] * a->dimension[2]; + a->b1 = a->dimension[2]; + + return true; +} diff --git a/lib-kit/kit-tensor.h b/lib-kit/kit-tensor.h new file mode 100644 index 0000000..0803447 --- /dev/null +++ b/lib-kit/kit-tensor.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#define KIT_TENSOR_MAX_DIMS 3 + +struct kit_tensor +{ + unsigned num_dims; // 1, 2, or 3 + unsigned dimension[KIT_TENSOR_MAX_DIMS]; + float *value; + unsigned sz; + unsigned b0; // Dimension 0 offset in linearized tensor + unsigned b1; // Dimension 1 offset in linearized tensor +}; + +#include "kit-tensor-proto.h" + +static inline size_t +kit_tensor_num_values(const struct kit_tensor *tensor) +{ + return (size_t)tensor->dimension[0] * (tensor->num_dims > 1 ? tensor->dimension[1] : 1) + * (tensor->num_dims > 2 ? tensor->dimension[2] : 1); + // I wonder if this calculation should be the following: + // return (size_t)tensor->dimension[0] * tensor->dimension[1] * tensor->dimension[2]; + // the reason is that if any dimension is zero, the tensor should be "0" shaped. +} + +static inline unsigned +kit_dotp2(const unsigned b0, const unsigned b1, const unsigned k0, const unsigned k1) +{ + return b0 * k0 + b1 * k1; +} + +static inline unsigned +kit_tensor_matmul_sz(const struct kit_tensor *a, const struct kit_tensor *b) +{ + return a->dimension[0] * a->dimension[1] * b->dimension[2]; +} + +static inline unsigned +kit_tensor_sz(const struct kit_tensor *a) +{ + unsigned ret = a->sz; + if (a->sz == 0) + ret = a->dimension[0] * a->dimension[1] * a->dimension[2]; + return ret; +} + +static inline float +kit_tensor_get_k_m_n(const struct kit_tensor *a, const unsigned k, const unsigned m, const unsigned n) +{ + return a->value[a->b0 * k + a->b1 * m + n]; +} \ No newline at end of file diff --git a/lib-kit/kit-timezone.c b/lib-kit/kit-timezone.c new file mode 100644 index 0000000..7350fbb --- /dev/null +++ b/lib-kit/kit-timezone.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +/* A thread-safe cache for tzcode timezone objects + */ + + +#include +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-timezone.h" +#include "sxe-dict.h" +#include "sxe-log.h" +#include "sxe-util.h" +#include "tzcode.h" +#include "tzfile.h" + +struct kit_timezone { + pthread_mutex_t lock; // Lock the definition of the timezone + char *filename; // Name of the file with leading : + size_t name_len; // Length of the filename + time_t time_checked; // Time the file was last checked + struct timespec mtim; // Modification time when last loaded + timezone_t tzdata; // Timezone specification data +}; + +static struct sxe_dict *timezone_cache = NULL; +static pthread_mutex_t cache_lock = PTHREAD_MUTEX_INITIALIZER; // Control access to the cache +static unsigned cache_seconds; + +/* Wrap kit_malloc to provide a signature consistant with malloc + */ +static void * +kit_timezone_alloc_memory(size_t size) { + return kit_malloc(size); +} + +/* Wrap kit_free to provide a signature consistant with free + */ +static void +kit_timezone_free_memory(void *mem) { + return kit_free(mem); +} + +/** + * Initialize the timezone cache + * + * @param seconds Number of seconds before checking whether a zoneinfo file has been changed or removed + */ +void +kit_timezone_initialize(unsigned seconds) +{ + SXEA1(!timezone_cache, "Already initialized"); + tz_malloc = kit_timezone_alloc_memory; + tz_free = kit_timezone_free_memory; + cache_seconds = seconds; + SXEA1(timezone_cache = kit_malloc(sizeof(struct sxe_dict)), "Failed to allocate timezone cache"); + SXEA1(sxe_dict_init(timezone_cache, 1, 100, 2, SXE_DICT_FLAG_KEYS_NOCOPY), "Failed to initialize timezone cache"); +} + +/** + * Unlock a timezone + * + * @param me Pointer to a locked timezone + */ +void +kit_timezone_unlock(const struct kit_timezone *me) +{ + SXEA1(pthread_mutex_unlock(&SXE_CAST_NOCONST(struct kit_timezone *, me)->lock) == 0, "Failed to unlock the timezone"); +} + +/** + * Given a timezone, reload its tzdata if needed and lock it + * + * @param me A pointer to a timezone + * + * @return The pointer to the timezone or NULL if the timezone is no longer a valid or on failure to load + */ +const struct kit_timezone * +kit_timezone_lock(const struct kit_timezone *me) +{ + struct stat status; + struct kit_timezone *zone; + const char *name = &me->filename[1]; + time_t now; + bool found; + char path[PATH_MAX]; + + zone = SXE_CAST_NOCONST(struct kit_timezone *, me); + SXEA1(pthread_mutex_lock(&zone->lock) == 0, "Failed to lock the timezone"); + now = time(NULL); // Must do this inside the lock + + if (cache_seconds && now - zone->time_checked <= cache_seconds) { // Don't hit the disk too often + if (zone->tzdata) + return zone; + + kit_timezone_unlock(zone); + return NULL; + } + + if (name[0] != '/') { + snprintf(path, sizeof(path), "%s/%s", TZDIR, name); + name = path; + } + + found = stat(name, &status) >= 0; + zone->time_checked = now; + + if (found && memcmp(&status.st_mtim, &zone->mtim, sizeof(status.st_mtim)) == 0) // If the file is unchanged + return zone; + + if (zone->tzdata) { // Discard the cached tzdata if any + tz_tzfree(zone->tzdata); + zone->tzdata = NULL; + } + + memcpy(&zone->mtim, &status.st_mtim, sizeof(zone->mtim)); + + if (!found) { // If the file is gone, return NULL + kit_timezone_unlock(zone); + return NULL; + } + + if (!(zone->tzdata = tz_tzalloc(zone->filename))) { // Attempt to reload the tzdata for the timezone + kit_timezone_unlock(zone); + return NULL; + } + + return me; +} + +/** + * Given a timezone name (without a leading colon), get the timezone from the cache, loading it if needed + * + * @param name A file name which, if relative, will be looked for in /usr/share/zoneinfo + * @param len Length of the filename or 0 to have it computed with strlen + * + * @return A pointer to the timezone or NULL if the name is not a valid timezone name or on failure to add or load + */ +const struct kit_timezone * +kit_timezone_load(const char *name, size_t len) +{ + struct stat status; + const void *value, **value_ptr; + struct kit_timezone *zone = NULL; + char *filename = NULL; + time_t now; + char path[PATH_MAX]; + + SXEA1(timezone_cache, ": kit_timezone is not initialized"); + SXEA1(pthread_mutex_lock(&cache_lock) == 0, "Failed to lock the timezone cache"); + + if ((value = sxe_dict_find(timezone_cache, name, len))) { // Already in the cache + if (((const struct kit_timezone *)value)->tzdata) + goto EXIT; + + zone = SXE_CAST_NOCONST(struct kit_timezone *, value); + value = NULL; // Only set once there is tzdata + now = time(NULL); + + if (cache_seconds && now - zone->time_checked <= cache_seconds) // Don't hit the disk too often + goto EXIT; + + zone->time_checked = now; + } + else { + len = len ?: strlen(name); + SXEL7(": Cache miss on timezone '%.*s'", (int)len, name); + + if (!(filename = kit_malloc(len + 2))) // + 2 is room for the leading ':' and the trailing '\0' + goto ERROR; /* COVERAGE EXCLUSION: Out of memory */ + + filename[0] = ':'; + memcpy(filename + 1, name, len); + filename[len + 1] = '\0'; + + if (!(zone = kit_malloc(sizeof(struct kit_timezone))) + || !(value_ptr = MOCKERROR(kit_timezone_load, NULL, ENOMEM, sxe_dict_add(timezone_cache, filename + 1, len)))) + goto ERROR; + + SXEA1(!*value_ptr, "Entry not found, but was there when added"); + pthread_mutex_init(&zone->lock, NULL); + zone->filename = filename; + zone->name_len = len; + zone->time_checked = time(NULL); + *value_ptr = zone; + } + + if (name[0] != '/') { + snprintf(path, sizeof(path), "%s/%s", TZDIR, zone->filename + 1); + name = path; + } + + if (stat(name, &status) >= 0) { + memcpy(&zone->mtim, &status.st_mtim, sizeof(zone->mtim)); + + if ((zone->tzdata = tz_tzalloc(zone->filename))) + value = zone; + } else { + memset(&zone->mtim, 0, sizeof(zone->mtim)); + zone->tzdata = NULL; + } + + goto EXIT; + +ERROR: + kit_free(zone); + kit_free(filename); + +EXIT: + SXEA1(pthread_mutex_unlock(&cache_lock) == 0, "Failed to unlock the timezone cache"); + return value; +} + +const char * +kit_timezone_get_name(const struct kit_timezone *me, size_t *len_out) +{ + if (len_out) + *len_out = me->name_len; + + return me->filename + 1; // Don't include the leading ':' +} + +/** + * Using a timezone's definition, convert a UTC time_t to a local time struct tm + * + * @return A pointer to the tm structure or NULL on error + */ +struct tm * +kit_timezone_time_to_localtime(const struct kit_timezone *me, time_t timestamp, struct tm *tm_out) +{ + if (!me || !me->tzdata) + return NULL; + + return tz_localtime_rz(me->tzdata, ×tamp, tm_out); +} + +static bool +kit_timezone_free(const void *key, size_t len, const void **value, void *user) +{ + SXE_UNUSED_PARAMETER(key); + SXE_UNUSED_PARAMETER(len); + SXE_UNUSED_PARAMETER(user); + struct kit_timezone *zone = SXE_CAST_NOCONST(struct kit_timezone *, *value); + + tz_tzfree(zone->tzdata); + kit_free(zone->filename); + kit_free(zone); + return true; +} + +void +kit_timezone_finalize(void) +{ + SXEA1(timezone_cache, ": not already initialized"); + sxe_dict_forEach(timezone_cache, kit_timezone_free, NULL); + sxe_dict_fini(timezone_cache); + kit_free(timezone_cache); + timezone_cache = NULL; +} diff --git a/lib-kit/kit-timezone.h b/lib-kit/kit-timezone.h new file mode 100644 index 0000000..be2816e --- /dev/null +++ b/lib-kit/kit-timezone.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef KIT_TIMEZONE_H +#define KIT_TIMEZONE_H + +#include + +struct kit_timezone; + +#include "kit-timezone-proto.h" + +#endif diff --git a/lib-kit/kit-udp.c b/lib-kit/kit-udp.c index 77c9e10..47fe5b1 100644 --- a/lib-kit/kit-udp.c +++ b/lib-kit/kit-udp.c @@ -28,10 +28,10 @@ #endif #include #include -#include -#include #include "kit.h" +#include "kit-mock.h" +#include "sxe-log.h" /** * Create a UDP socket and optionally set socket options for extra information @@ -97,16 +97,17 @@ kit_recvfrom(int fd, void *buffer, size_t buffer_len, int flags, struct sockaddr *dest_address, socklen_t *dest_address_len, unsigned long *delay_in_msec, struct kit_udp_ttltos *ttltos) { - uint8_t control[CMSG_SPACE(sizeof(struct timeval)) + CMSG_SPACE(sizeof(int)) * 2 + - CMSG_SPACE(sizeof(struct sockaddr_in6))]; /* SO_TIMESTAMP + IP_TTL + IP_TOS + IP_ORIGDSTADDR */ - ssize_t size; - struct msghdr header; - struct iovec io_vector[1]; - struct cmsghdr *cmsghdr; - struct timeval current; - struct timeval *timestamp = NULL; - int rerrno, *valp; - bool dest_addr_found = false; + uint8_t control[CMSG_SPACE(sizeof(struct timeval)) + CMSG_SPACE(sizeof(int)) * 2 + + CMSG_SPACE(sizeof(struct sockaddr_in6))]; /* SO_TIMESTAMP + IP_TTL + IP_TOS + IP_ORIGDSTADDR */ + ssize_t size; + struct msghdr header; + struct iovec io_vector[1]; + struct cmsghdr *cmsghdr; + struct timeval current; + const struct timeval *timestamp; + int rerrno; + const int *valp; + bool dest_addr_found = false; io_vector[0].iov_base = buffer; io_vector[0].iov_len = buffer_len; @@ -137,7 +138,7 @@ kit_recvfrom(int fd, void *buffer, size_t buffer_len, int flags, SXEL3("UDP message received from fd %d control data includes an SOL_SOCKET cmsg that is not an" /* COVERAGE EXCLUSION: Can't happen */ " SO_TIMESTAMP (got type %d)", fd, cmsghdr->cmsg_type); else if (delay_in_msec) { - timestamp = (struct timeval *)CMSG_DATA(cmsghdr); + timestamp = (const struct timeval *)CMSG_DATA(cmsghdr); SXEA1(gettimeofday(¤t, 0) >= 0, "Kernel won't give use the timeofday"); if (current.tv_sec < timestamp->tv_sec @@ -155,7 +156,7 @@ kit_recvfrom(int fd, void *buffer, size_t buffer_len, int flags, switch (cmsghdr->cmsg_type) { case IP_ORIGDSTADDR: if (dest_address && dest_address_len) { - struct sockaddr *addr = (struct sockaddr *)CMSG_DATA(cmsghdr); + const struct sockaddr *addr = (const struct sockaddr *)CMSG_DATA(cmsghdr); if (addr->sa_family == AF_INET6) { SXEA1(*dest_address_len >= sizeof(struct sockaddr_in6), "Provided dest_address_len < sizeof sockaddr_in6"); /* COVERAGE EXCLUSION: IPv6 testing is hard */ *dest_address_len = sizeof(struct sockaddr_in6); /* COVERAGE EXCLUSION: IPv6 testing is hard */ @@ -172,16 +173,16 @@ kit_recvfrom(int fd, void *buffer, size_t buffer_len, int flags, case IP_TTL: if (ttltos) { - valp = (int *)CMSG_DATA(cmsghdr); // Required to avoid breaking strict-aliasing rules - ttltos->ttl = *valp; + valp = (const int *)CMSG_DATA(cmsghdr); // Required to avoid breaking strict-aliasing rules + ttltos->ttl = (uint8_t)*valp; // See ip(7): "it contains a byte" } break; case IP_TOS: if (ttltos) { - valp = (int *)CMSG_DATA(cmsghdr); // Required to avoid breaking strict-aliasing rules - ttltos->tos = *valp; + valp = (const int *)CMSG_DATA(cmsghdr); // Required to avoid breaking strict-aliasing rules + ttltos->tos = (uint8_t)*valp; // See ip(7): "the time-to-live field of the packet" } break; diff --git a/lib-kit/kit.h b/lib-kit/kit.h index 7bae4fb..1048ab7 100644 --- a/lib-kit/kit.h +++ b/lib-kit/kit.h @@ -29,8 +29,13 @@ #if __FreeBSD__ #include #endif + #include #include +#include + +#include "kit-sortedarray.h" // For backward compatibility +#include "kit-time.h" // For backward compatibility #define KIT_UNSIGNED_MAX (~0U) @@ -46,11 +51,6 @@ #define KIT_DEVICEID_SIZE 8 #define KIT_DEVICEID_STR_LEN (2 * KIT_DEVICEID_SIZE) -#define KIT_SORTEDARRAY_DEFAULT 0 // No special behaviours -#define KIT_SORTEDARRAY_ALLOW_INSERTS 0x01 // Elements can be added to a sorted array out of order (expensive!) -#define KIT_SORTEDARRAY_ALLOW_GROWTH 0x02 // Sorted array is allowed to grow dynamically -#define KIT_SORTEDARRAY_ZERO_COPY 0x04 // Don't copy the key into the array, just return a pointer to the element - #define KIT_UDP_DELAY 0x01 // Get the delay (RQT) in msec for all packets received on this socket #define KIT_UDP_TTLTOS 0x02 // Get the TTL and TOS fields for all packets received on this socket #define KIT_UDP_DST_ADDR 0x04 // Get the original destination address for packets received on this socket @@ -73,13 +73,6 @@ struct kit_deviceid { uint8_t bytes[KIT_DEVICEID_SIZE]; }; -struct kit_sortedelement_class { - size_t size; // Sizeof the element (including padding if not packed) - size_t keyoffset; // Offset of the key within the element - int (*cmp)(const void *, const void *); // Comparitor for element keys - const char *(*fmt)(const void *); // Formatter for element keys; return the LRU of 4 static buffers -}; - struct kit_udp_ttltos { uint8_t ttl; uint8_t tos; @@ -98,20 +91,26 @@ typedef void *(*kit_realloc_ptr_t)(void *, size_t); extern const struct kit_guid kit_guid_nil; // The nil GUID (All bytes are 0) extern const struct kit_deviceid kit_deviceid_nil; // The nil DEVICEID (All bytes are 0) +/* Check if a device ID is null (all bytes are zero) */ +static inline bool +kit_deviceid_is_null(const struct kit_deviceid *deviceid) +{ + return memcmp(deviceid, &kit_deviceid_nil, sizeof(struct kit_deviceid)) == 0; +} + +/* Check if a GUID is null (all bytes are zero) */ +static inline bool +kit_guid_is_null(const struct kit_guid *guid) +{ + return memcmp(guid, &kit_guid_nil, sizeof(struct kit_guid)) == 0; +} + #include "kit-base-encode-proto.h" #include "kit-basename-proto.h" #include "kit-guid-proto.h" #include "kit-deviceid-proto.h" #include "kit-hostname-proto.h" -#include "kit-sortedarray-proto.h" -#include "kit-time-proto.h" #include "kit-strto-proto.h" #include "kit-udp-proto.h" -static inline uint32_t -kit_time_ms(void) -{ - return kit_time_nsec() / 1000000ULL; -} - #endif diff --git a/lib-kit/test/test-kit-bool.c b/lib-kit/test/test-kit-bool.c new file mode 100644 index 0000000..f9a19fc --- /dev/null +++ b/lib-kit/test/test-kit-bool.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "kit-bool.h" + +int +main(int argc, char **argv) +{ + const char *truetxt[] = { "true", "1", "yes" }; + const char *falsetxt[] = { "false", "0", "no" }; + unsigned i; + bool val; + + SXE_UNUSED_PARAMETER(argc); + SXE_UNUSED_PARAMETER(argv); + + plan_tests(14); + + for (i = 0; i < sizeof(truetxt) / sizeof(*truetxt); i++) { + ok(kit_bool_from_str(&val, truetxt[i]), "Converted '%s' to a bool", truetxt[i]); + ok(val, "Converted value is 'true'"); + } + + for (i = 0; i < sizeof(falsetxt) / sizeof(*falsetxt); i++) { + ok(kit_bool_from_str(&val, falsetxt[i]), "Converted '%s' to a bool", falsetxt[i]); + ok(!val, "Converted value is 'false'"); + } + + ok(!kit_bool_from_str(&val, ""), "Failed to convert '' to a bool"); + ok(!kit_bool_from_str(&val, "junk"), "Failed to convert 'junk' to a bool"); + + return exit_status(); +} diff --git a/lib-kit/test/test-kit-deviceid.c b/lib-kit/test/test-kit-deviceid.c index 65ad5fe..79d3782 100644 --- a/lib-kit/test/test-kit-deviceid.c +++ b/lib-kit/test/test-kit-deviceid.c @@ -29,7 +29,7 @@ int main(void) { - plan_tests(12); + plan_tests(15); struct kit_deviceid deviceid = { .bytes = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07} @@ -58,5 +58,12 @@ main(void) ok(kit_deviceid_cmp(&deviceid, &deviceid) == 0, "Deviceid compare for same pointer"); ok(kit_deviceid_cmp(&deviceid, NULL) != 0, "Deviceid compare for NULL pointer"); + /* Test kit_deviceid_is_null */ + ok(kit_deviceid_is_null(&kit_deviceid_nil), "kit_deviceid_is_null returns true for nil device ID"); + ok(!kit_deviceid_is_null(&deviceid), "kit_deviceid_is_null returns false for non-nil device ID"); + + struct kit_deviceid deviceid3 = { .bytes = {0} }; + ok(kit_deviceid_is_null(&deviceid3), "kit_deviceid_is_null returns true for zeroed device ID"); + return exit_status(); } diff --git a/lib-kit/test/test-kit-graphitelog.c b/lib-kit/test/test-kit-graphitelog.c index a0ab248..a8667e3 100644 --- a/lib-kit/test/test-kit-graphitelog.c +++ b/lib-kit/test/test-kit-graphitelog.c @@ -35,40 +35,56 @@ #define INTERVAL 2 -const char *graphite_log_file = "graphite_log_file"; +static const char *graphite_log_file = "graphite_log_file"; +static kit_counter_t COUNTER; +static bool thread_started; -kit_counter_t COUNTER; +static void +started(void) +{ + thread_started = true; +} int main(void) { struct kit_graphitelog_thread gthr; - pthread_t thread; - char buf[4096]; - ssize_t read_bytes; + unsigned long lastval, val; + const char *eor, *p; + ssize_t read_bytes; + pthread_t thread; + unsigned i, rec; + char buf[4096]; - plan_tests(7); + plan_tests(11); // Initializer counters - kit_counters_initialize(MAXCOUNTERS, 2, false); - COUNTER = kit_counter_new("counter"); + kit_counters_initialize(KIT_COUNTERS_MAX, 2, false); + COUNTER = kit_counter_reg("counter"); ok(kit_counter_isvalid(COUNTER), "Created counter"); is(kit_counter_get(COUNTER), 0, "Counter was initialized to zero"); - // Initialize the graphite log configuration, with a low json limit to ensure that the splitting code is exercised - kit_graphitelog_update_set_options(2, INTERVAL); + // Initialize the graphite log with a ridiculous interval so that nothing will ever get logged + kit_graphitelog_update_set_options(2, ~0U, 1000); - // Open the graphitelog output file - gthr.fd = open(graphite_log_file, O_CREAT | O_NONBLOCK | O_RDWR, 0644); + // Open the graphitelog output file, emptying it if it exists because it might be left from a previous run + gthr.fd = open(graphite_log_file, O_CREAT | O_NONBLOCK | O_RDWR | O_TRUNC, 0644); ok(gthr.fd >= 0, "Successfully created %s", graphite_log_file); // Create the graphitelog thread gthr.counter_slot = 1; + gthr.started = started; is(pthread_create(&thread, NULL, kit_graphitelog_start_routine, >hr), 0, "Successfully created graphitelog thread"); + for (i = 0; i < 10 && !thread_started; i++) + usleep(1000); + is(thread_started, true, "The graphitelog thread has started"); + + // Reinitialize the graphite log configuration, with a low json limit to ensure that the splitting code is exercised + kit_graphitelog_update_set_options(2, INTERVAL, 1000); // Update the counter and wait to give the graphite log time to output kit_counter_add(COUNTER, 5); - usleep(1000000 * INTERVAL + 100000); + usleep(1000000 * INTERVAL * 3 + 1000000); // Terminate the graphitelog thread kit_graphitelog_terminate(); @@ -80,9 +96,41 @@ main(void) buf[read_bytes] = '\0'; ok(strstr(buf, "\"counter\":\"5\""), "Found expected counters in graphitelog"); +#define TSDATA "\"log.timestamp\":\"" + diag("Checking timestamps, but skipping the first (immediate) and last (terminated) records"); + + for (lastval = 0, p = buf, rec = 0; p && *p; p = eor) { + if (*p == '{') { + if ((eor = strchr(p, '}')) != NULL) + eor += strspn(eor + 1, "\r\n") + 1; + + p = strstr(p, TSDATA); + val = p && eor && p < eor ? kit_strtoul(p + sizeof(TSDATA) - 1, NULL, 10) : 0; + + if (!val) { + fail("Record %u: Invalid Record", ++rec); + } else if (val != lastval) { + if (lastval) { + if (++rec > 1) { + if (rec < 4) + is(lastval % INTERVAL, INTERVAL / 2, "Record %u: timestamp %lu is at a half-interval", rec, lastval); + else if (lastval % INTERVAL != INTERVAL / 2) + fail("Record %u: timestamp %lu is not at a half-interval", rec, lastval); + } + else + pass("Record %u: timestamp %lu could be anything (first record)", rec, lastval); + } + + lastval = val; + } + + } else { + fail("Record %u: Invalid Record", rec); + break; + } + } + + pass("Record %u: timestamp %lu could be anything (last record)", ++rec, lastval); close(gthr.fd); - - ok(remove(graphite_log_file) == 0, "Successfully removed %s", graphite_log_file); - return exit_status(); } diff --git a/lib-kit/test/test-kit-guid.c b/lib-kit/test/test-kit-guid.c index cd44db2..eb3cef3 100644 --- a/lib-kit/test/test-kit-guid.c +++ b/lib-kit/test/test-kit-guid.c @@ -29,7 +29,7 @@ int main(void) { - plan_tests(16); + plan_tests(19); struct kit_guid guid = { .bytes = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F} @@ -68,5 +68,12 @@ main(void) ok(kit_guid_cmp(&guid, &guid) == 0, "Guid compare for same pointer"); ok(kit_guid_cmp(&guid, NULL) != 0, "Guid compare for NULL pointer"); + /* Test kit_guid_is_null */ + ok(kit_guid_is_null(&kit_guid_nil), "kit_guid_is_null returns true for nil GUID"); + ok(!kit_guid_is_null(&guid), "kit_guid_is_null returns false for non-nil GUID"); + + struct kit_guid guid3 = { .bytes = {0} }; + ok(kit_guid_is_null(&guid3), "kit_guid_is_null returns true for zeroed GUID"); + return exit_status(); } diff --git a/lib-kit/test/test-kit-hostname.c b/lib-kit/test/test-kit-hostname.c index dffd93a..de7a5e7 100644 --- a/lib-kit/test/test-kit-hostname.c +++ b/lib-kit/test/test-kit-hostname.c @@ -21,32 +21,29 @@ * SPDX-License-Identifier: MIT */ -#include #include #include #include "kit.h" +#include "kit-mockfail.h" +#include "sxe-util.h" int -main(int argc, char **argv) +main(void) { - const char *dot, *host; - unsigned dots; - - SXE_UNUSED_PARAMETER(argc); - SXE_UNUSED_PARAMETER(argv); + const char *host; plan_tests(3); + MOCKFAIL_START_TESTS(1, kit_hostname); host = kit_hostname(); - ok(host, "kit_hostname() returns some name ('%s')", host ?: "NULL"); - - host = kit_short_hostname(); - ok(host, "kit_short_hostname() returns some name ('%s')", host ?: "NULL"); + is_eq(host, "Amnesiac", "Hostname is 'Amnesiac' when gethostbyname() fails"); + MOCKFAIL_END_TESTS(); - for (dots = 0; (dot = strchr(host, '.')) != NULL; host = dot + 1, dots++) - ; - ok(dots < 2, "The short hostname contains less than two dots"); + kit_time_cached_update(); // Update the cached time + host = kit_hostname(); + ok(host, "kit_hostname() returns some name ('%s')", host ?: "NULL"); + isnt_eq(host, "Amnesiac", "Hostname is no longer 'Amnesiac'"); return exit_status(); } diff --git a/lib-kit/test/test-kit-init.c b/lib-kit/test/test-kit-init.c new file mode 100644 index 0000000..fe3b4b5 --- /dev/null +++ b/lib-kit/test/test-kit-init.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" + +int +main(void) +{ + void *mem; + + tap_plan(6, TAP_FLAG_LINE_ON_OK, NULL); + + is(kit_memory_allocations(), 0, "Expected no allocations before initializing memory"); + kit_memory_initialize(KIT_MEMORY_CHECK_OVERFLOWS); + kit_memory_set_assert_on_enomem(false); /* This can be called after initalization */ + is(kit_memory_allocations(), 0, "Expected no allocations after initializing memory either"); + + ok(mem = kit_malloc(0), "Got a non-NULL value from kit_malloc(0)"); + + MOCKFAIL_START_TESTS(2, kit_malloc_diag); + ok(kit_malloc(100) == NULL, "When kit_malloc() fails, we get NULL (no abort())"); + ok(kit_calloc(1, 100) == NULL, "When kit_calloc() fails, we get NULL (no abort())"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, kit_realloc_diag); + ok(kit_realloc(mem, 100) == NULL, "When kit_realloc() fails, we get NULL (no abort())"); + MOCKFAIL_END_TESTS(); + + kit_free(mem); + return exit_status(); +} diff --git a/lib-kit/test/test-kit-sortedarray.c b/lib-kit/test/test-kit-sortedarray.c index 64e3f31..40942d6 100644 --- a/lib-kit/test/test-kit-sortedarray.c +++ b/lib-kit/test/test-kit-sortedarray.c @@ -21,15 +21,27 @@ * SPDX-License-Identifier: MIT */ -#include #include #include -#include "kit.h" +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-sortedarray.h" +#include "sxe-log.h" + +struct my_visitor +{ + unsigned *array; + unsigned count; + unsigned size; +}; static int unsigned_cmp(const void *lhs, const void *rhs) { + if (*(const unsigned *)lhs == ~0U) // If the magic value is passed, return an error + return INT_MAX; + return *(const unsigned *)lhs == *(const unsigned *)rhs ? 0 : *(const unsigned *)lhs < *(const unsigned *)rhs ? -1 : 1; } @@ -43,58 +55,94 @@ unsigned_fmt(const void *u) return string[next]; } -static const struct kit_sortedelement_class testclass = { sizeof(unsigned), 0, unsigned_cmp, unsigned_fmt}; +static struct kit_sortedelement_class testclass = { sizeof(unsigned), 0, unsigned_cmp, unsigned_fmt, NULL, 0, 0}; static const unsigned u[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; +static bool +my_visit(void *void_visitor, const void *element) +{ + struct my_visitor *visitor = void_visitor; + + SXEA1(kit_sortedarray_add_element(&testclass, (void **)&visitor->array, &visitor->count, &visitor->size, element), + "Failed to add %u to the intersection", *(const unsigned *)element); + SXEL6("Added %u to the intersection", *(const unsigned *)element); + return true; +} + +static bool +my_visit_error(void *visitor, const void *element) +{ + SXE_UNUSED_PARAMETER(visitor); + SXE_UNUSED_PARAMETER(element); + + if (*(const unsigned *)element == 13) { + SXEL2("visit is returning error"); + return false; + } + + return true; +} + int main(void) { unsigned *array = NULL; + unsigned *value_ptr; unsigned count = 0; unsigned alloc = 7; unsigned value = 2; bool match; + uint64_t start_allocations; + + plan_tests(94); + start_allocations = kit_memory_allocations(); +// KIT_ALLOC_SET_LOG(1); // Turn off when done - plan_tests(40); + testclass.flags = KIT_SORTEDARRAY_DEFAULT; + ok(!kit_sortedarray_delete(&testclass, array, &count, &value), "Did not delete 2 (empty array)"); MOCKFAIL_START_TESTS(1, kit_sortedarray_add); - ok(!kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Failed to add 2 (realloc failed)"); + ok(!kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Failed to add 2 (realloc failed)"); MOCKFAIL_END_TESTS(); - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 2 (first element)"); - ok(array, "Array was allocated"); - is(count, 1, "Array has one element"); - ok(!kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Failed to add a second 2"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 2 (first element)"); + ok(array, "Array was allocated"); + is(count, 1, "Array has one element"); + /* force inaccessible delete */ + count = 0; + ok(!kit_sortedarray_delete(&testclass, array, &count, &value), "Did not delete 2 (count is zero)"); + /* reset */ + count = 1; + ok(!kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Failed to add a second 2"); value = 3; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 3 (second element)"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 3 (second element)"); value = 1; - ok(!kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Failed to added 1 out of order"); - is(count, 2, "Array has two elements"); + ok(!kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Failed to added 1 out of order"); + is(count, 2, "Array has two elements"); value = 7; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 7 (third element)"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 7 (third element)"); value = 13; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 13 (fourth element)"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 13 (fourth element)"); value = 17; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 17 (fifth element)"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 17 (fifth element)"); value = 23; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, 0), "Added 23 (sixth element)"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 23 (sixth element)"); + testclass.flags = KIT_SORTEDARRAY_ALLOW_INSERTS; value = 7; - ok(!kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, KIT_SORTEDARRAY_ALLOW_INSERTS), - "Failed to inserted 7 (duplicate element)"); + ok(!kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Can't add 7 (duplicate element)"); value = 5; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, KIT_SORTEDARRAY_ALLOW_INSERTS), - "Inserted 5 (third element)"); - is(count, 7, "Array now has seven elements"); + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added 5 (third element)"); + is(count, 7, "Array now has seven elements"); + testclass.flags = KIT_SORTEDARRAY_DEFAULT; // No longer allow insertions value = 29; - ok(!kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, KIT_SORTEDARRAY_DEFAULT), \ - "Failed to add 29 (full)"); + ok(!kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Failed to add 29 (full)"); is(array[0], 2, "Element 0 is 2"); - is(array[0], 2, "Element 1 is 3"); + is(array[1], 3, "Element 1 is 3"); is(array[2], 5, "Element 2 is 5"); - is(array[2], 5, "Element 3 is 7"); + is(array[3], 7, "Element 3 is 7"); is(array[6], 23, "Element 6 is 23"); is(kit_sortedarray_find(&testclass, array, count, &u[1], &match), 0, "Correct insertion point for 1"); @@ -118,10 +166,129 @@ main(void) ok((g = (const unsigned *)kit_sortedarray_get(&testclass, array, count, &u[7])) && *g == 7, "Got 7"); ok((g = (const unsigned *)kit_sortedarray_get(&testclass, array, count, &u[23])) && *g == 23, "Got 23"); + is(count, 7, "Array now has 7 elements"); + + ok(kit_sortedarray_delete(&testclass, array, &count, &u[5]), "Deleted 5"); + + is(count, 6, "Array now has 6 elements"); + + /* Verify ascending order */ + is(array[0], 2, "Element 0 is 2"); + is(array[1], 3, "Element 1 is 3"); + is(array[2], 7, "Element 2 is 7"); + is(array[3], 13, "Element 3 is 13"); + is(array[4], 17, "Element 4 is 17"); + is(array[5], 23, "Element 5 is 23"); + + ok(kit_sortedarray_delete(&testclass, array, &count, &u[2]), "Deleted 2"); + ok(kit_sortedarray_delete(&testclass, array, &count, &u[7]), "Deleted 7"); + ok(kit_sortedarray_delete(&testclass, array, &count, &u[23]), "Deleted 23"); + ok(!kit_sortedarray_delete(&testclass, array, &count, &u[23]), "Did not delete 23 (already deleted)"); + ok(kit_sortedarray_delete(&testclass, array, &count, &u[13]), "Deleted 13"); + + is(count, 2, "Array now has 2 elements"); + + /* Verify ascending order */ + is(array[0], 3, "Element 0 is 3"); + is(array[1], 17, "Element 1 is 17"); + + ok(kit_sortedarray_delete(&testclass, array, &count, &u[3]), "Deleted 3"); + ok(kit_sortedarray_delete(&testclass, array, &count, &u[17]), "Deleted 17"); + is(count, 0, "Array now has 0 elements"); + + testclass.flags = KIT_SORTEDARRAY_ALLOW_INSERTS; + + for (value = 0; value < 7; value++) + ok(kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), "Added %d", value); + + is(count, 7, "Array now has 7 elements"); + + testclass.flags = KIT_SORTEDARRAY_ALLOW_GROWTH | KIT_SORTEDARRAY_ZERO_COPY; value = 29; - ok(kit_sortedarray_add(&testclass, (void **)&array, &count, &alloc, &value, - KIT_SORTEDARRAY_ALLOW_GROWTH | KIT_SORTEDARRAY_ZERO_COPY), "Added 29 (full, but growth allowed)"); + ok(value_ptr = (unsigned *)kit_sortedarray_add_element(&testclass, (void **)&array, &count, &alloc, &value), + "Added 29 (full, but growth allowed)"); ok(array[7] != 29, "Zero copy specified, so added array element was not initialized"); + is(count, 8, "Array now has 8 elements"); + *value_ptr = 29; + ok(array[7] == 29, "Zero copy specified, set to 29"); + kit_free(array); + + diag("Test intersection"); + { + struct my_visitor intersection = {NULL, 0, 0}; // Intersections will be constructed here + const unsigned fibonaci[] = {2, 3, 5, 8, 13, 21, 34, 55, 89}; + unsigned mix[9]; + + testclass.visit = my_visit; + testclass.value = &intersection; + testclass.flags = KIT_SORTEDARRAY_ALLOW_GROWTH | KIT_SORTEDARRAY_CMP_CAN_FAIL; + + kit_sortedarray_intersect(&testclass, mix, 0, fibonaci, 9); + is(0, intersection.count, "Intersecting an empty array yeilds an empty array"); + + mix[0] = 13; + kit_sortedarray_intersect(&testclass, mix, 1, fibonaci, 9); + is(1, intersection.count, "Intersecting a single element array yeilds a single element intersection"); + is(13, intersection.array[0], "And it's the expected element"); + + for (count = 0; count < 5; count++) // Set mix to [1, 2, 3, 4, 5] + mix[count] = count + 1; + + intersection.count = 0; // Empty the intersection array + kit_sortedarray_intersect(&testclass, mix, 5, fibonaci, 9); + is(3, intersection.count, "Intersecting a 5 element array yeilds a 3 element intersection"); + is(2, intersection.array[0], "First element is the expected element"); + is(3, intersection.array[1], "Second element is the expected element"); + is(5, intersection.array[2], "Third element is the expected element"); + + intersection.count = 0; // Empty the intersection array + mix[0] = 1; + mix[1] = 2; + mix[2] = 3; + kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9); + is(2, intersection.count, "Intersecting [1, 2, 3] yeilds a 2 element intersection"); + is(2, intersection.array[0], "First element is 2"); + is(3, intersection.array[1], "Second element is 3"); + + intersection.count = 0; // Empty the intersection array + mix[2] = 4; + kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9); + is(1, intersection.count, "Intersecting [1, 2, 4] yeilds a 1 element intersection"); + is(2, intersection.array[0], "The element is 2"); + + intersection.count = 0; // Empty the intersection array + mix[1] = 4; + mix[2] = 5; + kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9); + is(1, intersection.count, "Intersecting [1, 4, 5] yeilds a 1 element intersection"); + is(5, intersection.array[0], "The element is 5"); + + mix[0] = ~0U; + ok(!kit_sortedarray_intersect(&testclass, mix, 1, fibonaci, 9), "Intersecting [~0U] is detected as an error"); + ok(!kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9), "Intersecting [~0U,4,5] is detected as an error"); + + mix[0] = 1; + mix[1] = ~0U; + ok(!kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9), "Intersecting [1,~0U,5] is detected as an error"); + + intersection.count = 0; // Empty the intersection array + mix[1] = 2; + mix[2] = ~0U; + ok(!kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9), "Intersecting [1,2,~0U] is detected as an error"); + + testclass.visit = my_visit_error; + mix[0] = 13; + ok(!kit_sortedarray_intersect(&testclass, mix, 1, fibonaci, 9), "Intersecting [13] with visit error"); + mix[0] = 1; + mix[1] = 13; + ok(!kit_sortedarray_intersect(&testclass, mix, 2, fibonaci, 9), "Intersecting [1,13] with visit error"); + mix[1] = 2; + mix[2] = 13; + ok(!kit_sortedarray_intersect(&testclass, mix, 3, fibonaci, 9), "Intersecting [1,2,13] with visit error"); + + kit_free(intersection.array); + } + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); return exit_status(); } diff --git a/lib-kit/test/test-kit-strto.c b/lib-kit/test/test-kit-strto.c index 3bc50be..bce22de 100644 --- a/lib-kit/test/test-kit-strto.c +++ b/lib-kit/test/test-kit-strto.c @@ -37,7 +37,7 @@ main(int argc, char **argv) SXE_UNUSED_PARAMETER(argc); SXE_UNUSED_PARAMETER(argv); - plan_tests(50); + plan_tests(85); diag("Verify normal valid parsing"); { @@ -119,6 +119,66 @@ main(int argc, char **argv) is(kit_strtoull("0x1ffffffffffffffff", NULL, 16), ULLONG_MAX, "Test for overflow"); is(errno, ERANGE, "There was an overflow and errno was set to ERANGE"); + + is(kit_strtoull("18446744073709551615", NULL, 10), ULLONG_MAX, "Test for upper bound"); + is(errno, 0, "errno is not set"); + + is(kit_strtoull("18446744073709551616", NULL, 10), -1, "Test for overflow"); + is(errno, ERANGE, "There was an overflow and errno was set to ERANGE"); + } + + diag("Verify kit_strtou32 parsing"); + { + is(kit_strtou32("12345678", NULL, 10), 12345678, "kit_strtou32 parses correctly"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("0", NULL, 0), 0, "kit_strtou32 correctly parses 0"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("0x0", NULL, 0), 0, "kit_strtou32 correctly parses 0x0"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32(" \t 0", NULL, 10), 0, "kit_strtou32 correctly parses 0 with leading whitespace"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("4294967295", NULL, 10), UINT32_MAX, "kit_strtou32 correctly parses UINT32_MAX"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("4294967296", NULL, 10), UINT32_MAX, "kit_strtou32 correctly handles overflow"); + is(errno, ERANGE, "errno is set to ERANGE on overflow"); + + is(kit_strtou32("-1", NULL, 10), UINT32_MAX, "kit_strtou32 correctly handles negative value"); + is(errno, ERANGE, "errno is set to ERANGE on negative value"); + + str = "1234"; + is(kit_strtou32(str, &endptr, 10), 1234, "kit_strtou32 valid parsing to clear errno"); + is(errno, 0, "errno was cleared after valid parsing"); + is(str + 4, endptr, "kit_strtou32 valid parsing advanced endptr"); + + str = "4294967296"; + is(kit_strtou32(str, &endptr, 10), UINT32_MAX, "kit_strtou32 overflow parsing"); + is(errno, ERANGE, "errno is set to ERANGE on overflow"); + is(str + 10, endptr, "kit_strtou32 overflow parsing advanced endptr"); + } + + diag("Verify kit_strtou32 parsing with hexadecimal strings"); + { + is(kit_strtou32("0x1A2B3C4D", NULL, 16), 0x1A2B3C4D, "kit_strtou32 correctly parses hexadecimal 0x1A2B3C4D"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("0xFFFFFFFF", NULL, 16), UINT32_MAX, "kit_strtou32 correctly parses hexadecimal UINT32_MAX"); + is(errno, 0, "errno is not set"); + + is(kit_strtou32("0x100000000", NULL, 16), UINT32_MAX, "kit_strtou32 correctly handles hexadecimal overflow"); + is(errno, ERANGE, "errno is set to ERANGE on hexadecimal overflow"); + + is(kit_strtou32("0xGHIJKL", NULL, 16), 0, "kit_strtou32 correctly handles invalid hexadecimal"); + is(errno, EINVAL, "errno is set to EINVAL on invalid hexadecimal"); + + str = "0x1A2B3C4D"; + is(kit_strtou32(str, &endptr, 16), 0x1A2B3C4D, "kit_strtou32 valid hexadecimal parsing to clear errno"); + is(errno, 0, "errno was cleared after valid hexadecimal parsing"); + is(str + 10, endptr, "kit_strtou32 valid hexadecimal parsing advanced endptr"); } return exit_status(); diff --git a/lib-kit/test/test-kit-tensor.c b/lib-kit/test/test-kit-tensor.c new file mode 100644 index 0000000..d01a237 --- /dev/null +++ b/lib-kit/test/test-kit-tensor.c @@ -0,0 +1,800 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-tensor.h" +#include "kit-tensor-private.h" +#include "sxe-log.h" + +static bool +tensor_make_kxmxn(float *a, const unsigned k, const unsigned m, const unsigned n, struct kit_tensor *out) +{ + out->num_dims = k > 1 ? 3 : (m > 1 ? 2 : 1); + out->dimension[0] = k; + out->dimension[1] = m; + out->dimension[2] = n; + + // Set tensor size and offsets + out->sz = out->dimension[0] * out->dimension[1] * out->dimension[2]; + out->b0 = out->dimension[1] * out->dimension[2]; + out->b1 = out->dimension[2]; + + if (!(out->value = kit_malloc(sizeof(*out->value) * k * m * n))) { + SXEL2(": Failed to allocate %zu bytes for tensor values", sizeof(*out->value) * k * m * n); + return false; + } + memcpy(out->value, a, sizeof(float) * k * m * n); + return true; +} + +int +main(void) +{ + struct kit_tensor tensor; + size_t num_values; + + plan_tests(279); + kit_memory_initialize(KIT_MEMORY_ABORT_ON_ENOMEM | KIT_MEMORY_CHECK_OVERFLOWS); + uint64_t start_allocations = kit_memory_allocations(); // Clocked the initial # of memory allocations + + diag("Happy path parsing"); + { + num_values = -1; + ok(kit_tensor_make_begin(&tensor, "DIMS:3:10", &num_values), "Began constructing a tensor"); + is(tensor.num_dims, 2, "2 dimensional tensor"); + is(tensor.dimension[0], 3, "3 rows"); + is(tensor.dimension[1], 10, "10 columns"); + is(num_values, 0, "In the beginning, there are no values"); + + ok(kit_tensor_make_add_values(&tensor, + "-0.5928198099136353,-0.7330870032310486,-0.3953676223754883,-0.3157353401184082,0.39351019263267517," + "0.015202959068119526,-0.8514717817306519,0.06482086330652237,-0.3223743736743927,0.24368461966514587," + "2.9728217124938965,-0.1696321666240692,-0.9596973657608032,-2.1359124183654785,0.5595744848251343," + "2.1181836128234863,0.30913060903549194,2.052929639816284,2.037630796432495,-0.5081333518028259," + "0.5283480882644653,-0.11088214814662933,-0.17134040594100952,0.4158063530921936,-1.7265350818634033", + &num_values), + "Added 25 values"); + ok(kit_tensor_make_add_values(&tensor, + "0.09420936554670334,0.5135141611099243,-0.4182659685611725,1.1057028770446777,0.8090568780899048", &num_values), + "Added 5 more values"); + ok(kit_tensor_make_end(&tensor, num_values), "Ended constructing a tensor"); + kit_tensor_fini(&tensor); + + } + + diag("Not so happy path parsing"); + { + num_values = -1; + ok(!kit_tensor_make_begin(&tensor, "XYZW:3:10", &num_values), "Wrong prefix"); + ok(!kit_tensor_make_begin(&tensor, "DIMS:A:10", &num_values), "Wrong dimension"); + ok(!kit_tensor_make_begin(&tensor, "DIMS:3c10", &num_values), "Wrong delimiter"); + ok(kit_tensor_make_begin(&tensor, "DIMS:2:2", &num_values), "Begin constructing a tensor"); + is(num_values, 0, "In the beginning, there are no values"); + is(tensor.num_dims, 2, "2 dimensional tensor"); + is(tensor.dimension[0], 2, "2 rows"); + is(tensor.dimension[1], 2, "2 columns"); + is(num_values, 0, "In the beginning, there are no values"); + + ok(!kit_tensor_make_add_values(&tensor, + "-0.5928198099136353,-0.7330870032310486,-0.3953676223754883,-0.3157353401184082,0.39351019263267517", + &num_values), + "Added one more value than there is room"); + kit_tensor_fini(&tensor); + ok(kit_tensor_make_begin(&tensor, "DIMS:2:2", &num_values), "Began constructing a tensor"); + ok(!kit_tensor_make_add_values(&tensor, + "abracadabra,0.5135141611099243,-0.4182659685611725,1.1057028770446777", + &num_values), + "Added wrong values"); + kit_tensor_fini(&tensor); + ok(kit_tensor_make_begin(&tensor, "DIMS:2:2", &num_values), "Began constructing a tensor"); + ok(!kit_tensor_make_add_values(&tensor, + "-0.5928198099136353+0.5135141611099243,-0.4182659685611725,1.1057028770446777", + &num_values), + "Wrong delimiter in number array"); + ok(!kit_tensor_make_end(&tensor, num_values), "Ended constructing a wrong tensor should fail"); + + kit_tensor_fini(&tensor); + MOCKFAIL_START_TESTS(1, KIT_TENSOR_MAKE_BEGIN); + ok(!kit_tensor_make_begin(&tensor, "DIMS:3:10", &num_values), + "Correctly defined tensor, but mocking allocation failure and returning false"); + kit_tensor_fini(&tensor); + MOCKFAIL_END_TESTS(); + } + + diag("Let's do some math"); + { + struct kit_tensor a, b, c, d, e; + float w, + x[4] = {1.0, 2.0, 3.0, 4.0}, + m1[4] = {0.0}, + m2[1] = {0.0}, + m3[16] = {0.0}, + m4[8] = {0.0}; + float y[27]; + + kit_array_inner_product(x, 1, x, 1, 4, &w); + is(w, 30, "Kit 4-dimensional array inner product calculation correct"); + + tensor_make_kxmxn(x, 1, 2, 2, &a); + tensor_make_kxmxn(x, 1, 2, 2, &b); + tensor_make_kxmxn(m1, 1, 2, 2, &c); + tensor_make_kxmxn(y, 3, 3, 3, &d); + tensor_make_kxmxn(y, 2, 2, 2, &e); + + is(kit_tensor_sz(&a), 4, "1x2x2 tensor has size 4"); + is(kit_tensor_matmul_sz(&a, &b), 4, "1x2x2 matmul 1x2x2 tensor matmul results in tensor size 4"); + ok(!kit_tensor_matmul(&a, &d, &b), "1x2x2 matmul 3x3x3 tensor matmul cannot be performed"); + ok(!kit_tensor_matmul(&a, &e, &b), "1x2x2 matmul 2x2x2 tensor matmul cannot be performed"); + ok(!kit_tensor_matmul(&a, &b, &d), "1x2x2 matmul 1x2x2 tensor matmul cannot be performed with output 3x3x3"); + + // We expect the following + // [ [ [ + // [1, 2], [1, 2], [7, 10], + // [3, 4], [3, 4] [15, 22] + // ] ] ] + // a @ b = c + + kit_tensor_matmul(&a, &b, &c); + is(c.num_dims, 2, "2x2 matmul outputs correct dims"); + is(c.dimension[0], 1, "2x2 matmul outputs correct dimension[0]"); + is(c.dimension[1], 2, "2x2 matmul outputs correct dimension[1]"); + is(c.dimension[2], 2, "2x2 matmul outputs correct dimension[2]"); + is(c.value[0], 7.0, "2x2 matmul x11 correct"); + is(c.value[1], 10.0, "2x2 matmul x12 correct"); + is(c.value[2], 15.0, "2x2 matmul x21 correct"); + is(c.value[3], 22.0, "2x2 matmul x22 correct"); + kit_tensor_fini(&c); // kit_tensor_matmul allocates a new resulting tensor c and needs to be freed + + // Turn a into 1x4 and b into 4x1 then multiply + a.dimension[0] = 1; + a.dimension[1] = 1; + a.dimension[2] = 4; + b.dimension[0] = 1; + b.dimension[1] = 4; + b.dimension[2] = 1; + + tensor_make_kxmxn(m2, 1, 1, 1, &c); + // We expect the following + // [1, 2, 3, 4] [[1], [2], [3], [4]] [[30]] + // a @ b = c + + kit_tensor_matmul(&a, &b, &c); + is(c.num_dims, 2, "1x4 by 4x1 matmul outputs correct dims"); + is(c.dimension[0], 1, "1x4 by 4x1 matmul outputs correct dimension[0]"); + is(c.dimension[1], 1, "1x4 by 4x1 matmul outputs correct dimension[1]"); + is(c.dimension[2], 1, "1x4 by 4x1 matmul outputs correct dimension[2]"); + is(c.value[0], 30.0, "1x4 by 4x1 matmul x11 correct"); + kit_tensor_fini(&c); // kit_tensor_matmul allocates a new resulting tensor c and needs to be freed + + // Turn a into 4x1 and b into 1x4 then multiply + a.dimension[0] = 1; + a.dimension[1] = 4; + a.dimension[2] = 1; + b.dimension[0] = 1; + b.dimension[1] = 1; + b.dimension[2] = 4; + + tensor_make_kxmxn(m3, 1, 4, 4, &c); + // We expect the following + // [ + // [1, 2, 3, 4], + // [2, 4, 6, 8], + // [3, 6, 9, 12], + // [4, 8, 12, 16] + // [[1], [2], [3], [4]] [1, 2, 3, 4] ] + // a @ b = c + + kit_tensor_matmul(&a, &b, &c); + is(c.num_dims, 2, "4x1 by 1x4 matmul outputs correct dims"); + is(c.dimension[0], 1, "4x1 by 1x4 matmul outputs correct dimension[0]"); + is(c.dimension[1], 4, "4x1 by 1x4 matmul outputs correct dimension[1]"); + is(c.dimension[2], 4, "4x1 by 1x4 matmul outputs correct dimension[2]"); + is(c.value[0], 1.0, "4x1 by 1x4 matmul x11 correct"); + is(c.value[1], 2.0, "4x1 by 1x4 matmul x12 correct"); + is(c.value[2], 3.0, "4x1 by 1x4 matmul x13 correct"); + is(c.value[3], 4.0, "4x1 by 1x4 matmul x14 correct"); + is(c.value[4], 2.0, "4x1 by 1x4 matmul x21 correct"); + is(c.value[5], 4.0, "4x1 by 1x4 matmul x22 correct"); + is(c.value[6], 6.0, "4x1 by 1x4 matmul x23 correct"); + is(c.value[7], 8.0, "4x1 by 1x4 matmul x24 correct"); + is(c.value[8], 3.0, "4x1 by 1x4 matmul x31 correct"); + is(c.value[9], 6.0, "4x1 by 1x4 matmul x32 correct"); + is(c.value[10], 9.0, "4x1 by 1x4 matmul x33 correct"); + is(c.value[11], 12.0, "4x1 by 1x4 matmul x34 correct"); + is(c.value[12], 4.0, "4x1 by 1x4 matmul x41 correct"); + is(c.value[13], 8.0, "4x1 by 1x4 matmul x42 correct"); + is(c.value[14], 12.0, "4x1 by 1x4 matmul x43 correct"); + is(c.value[15], 16.0, "4x1 by 1x4 matmul x44 correct"); + kit_tensor_fini(&c); // kit_tensor_matmul allocates a new resulting tensor c and needs to be freed + + // Turn a into 2x2x1 and b into 2x1x2 then multiply + a.dimension[0] = 2; + a.dimension[1] = 2; + a.dimension[2] = 1; + b.dimension[0] = 2; + b.dimension[1] = 1; + b.dimension[2] = 2; + + tensor_make_kxmxn(m4, 2, 2, 2, &c); + // We expect the following + // [ [ [ + // [[1], [2]], [[1, 2]], [[1, 2], [2, 4]], + // [[3], [4]] [[3, 4]] [[9, 12], [12, 16]] + // ] ] ] + // a @ b = c + + kit_tensor_matmul(&a, &b, &c); + is(c.num_dims, 2, "2x2x1 by 2x1x2 matmul outputs correct dims"); + is(c.dimension[0], 2, "2x2x1 by 2x1x2 matmul outputs correct dimension[0]"); + is(c.dimension[1], 2, "2x2x1 by 2x1x2 matmul outputs correct dimension[1]"); + is(c.dimension[2], 2, "2x2x1 by 2x1x2 matmul outputs correct dimension[2]"); + is(c.value[0], 1.0, "2x2x1 by 2x1x2 matmul x111 correct"); + is(c.value[1], 2.0, "2x2x1 by 2x1x2 matmul x112 correct"); + is(c.value[2], 2.0, "2x2x1 by 2x1x2 matmul x121 correct"); + is(c.value[3], 4.0, "2x2x1 by 2x1x2 matmul x122 correct"); + is(c.value[4], 9.0, "2x2x1 by 2x1x2 matmul x211 correct"); + is(c.value[5], 12.0, "2x2x1 by 2x1x2 matmul x212 correct"); + is(c.value[6], 12.0, "2x2x1 by 2x1x2 matmul x221 correct"); + is(c.value[7], 16.0, "2x2x1 by 2x1x2 matmul x222 correct"); + + ok(!kit_tensor_matmul(&a, &d, &c), "Dimensions do not match"); + ok(!kit_tensor_matmul(&a, &e, &c), "Dimensions do not match"); + + // Turn a into 1x2x2 and b into 2x2x1 then multiply + a.dimension[0] = 1; + a.dimension[1] = 2; + a.dimension[2] = 2; + b.dimension[0] = 2; + b.dimension[1] = 2; + b.dimension[2] = 1; + ok(!kit_tensor_matmul(&a, &b, &c), "Dimensions do not match"); + kit_tensor_fini(&a); + kit_tensor_fini(&b); + kit_tensor_fini(&c); + kit_tensor_fini(&d); + kit_tensor_fini(&e); + } + + diag("Let's test a tensor lookup"); + { + struct kit_tensor a, b, c, d; + float x[4]; + float w[16] = {0}; + unsigned y[4] = {1, 3, 2, 0}, z[2] = {1, 0}; + + // Embedding shape: [[1, 2], [3, 4]] + x[0] = 1.0, x[1] = 2.0, x[2] = 3.0, x[3] = 4.0; + tensor_make_kxmxn(x, 1, 2, 2, &a); + + for (int i = 0; i < 4; i++) + x[i] = 0; + tensor_make_kxmxn(x, 1, 2, 2, &d); + + ok(kit_tensor_embedding(&a, z, 2, &d), "Embedding lookup performed on chars: '1', '0'"); + is(d.value[0], 3.0, "Embedding lookup for char '1' (1st dim) correct"); + is(d.value[1], 4.0, "Embedding lookup for char '1' (2nd dim) correct"); + is(d.value[2], 1.0, "Embedding lookup for char '0' (1st dim) correct"); + is(d.value[3], 2.0, "Embedding lookup for char '0' (2nd dim) correct"); + is(d.num_dims, 2, "Embedding lookup results in tensor of 2-dimensions"); + is(d.dimension[0], 1, "Embedding lookup result k-dimension correct"); + is(d.dimension[1], 2, "Embedding lookup result n-dimension correct"); + is(d.dimension[2], 2, "Embedding lookup result m-dimension correct"); + kit_tensor_fini(&d); + + // Embedding shape: [[1], [2], [3], [4]] + a.dimension[1] = 4; + a.dimension[2] = 1; + + for (int i = 0; i < 4; i++) + x[i] = 0; + tensor_make_kxmxn(x, 1, 2, 2, &b); + + ok(kit_tensor_embedding(&a, y, 4, &b), "Embedding lookup performed on chars: '1', '3', '2', '0'"); + is(b.value[0], 2.0, "Embedding lookup for char '1' correct"); + is(b.value[1], 4.0, "Embedding lookup for char '3' correct"); + is(b.value[2], 3.0, "Embedding lookup for char '2' correct"); + is(b.value[3], 1.0, "Embedding lookup for char '0' correct"); + is(b.num_dims, 2, "Embedding lookup results in tensor of 2-dimensions"); + is(b.dimension[0], 1, "Embedding lookup result k-dimension correct"); + is(b.dimension[1], 4, "Embedding lookup result n-dimension correct"); + is(b.dimension[2], 1, "Embedding lookup result m-dimension correct"); + kit_tensor_fini(&a); + kit_tensor_fini(&b); + + tensor_make_kxmxn(w, 2, 2, 4, &b); + + // Test some corner cases + tensor_make_kxmxn(w, 1, 1, 8, &c); + is(c.num_dims,1, "Check that number of dimensions is correctly allocated"); + ok(!kit_tensor_embedding(&c, y, 4, &b), "Embed 1 dimensional tensor should fail"); + kit_tensor_fini(&c); + + tensor_make_kxmxn(w, 1, 2, 4, &c); + is(c.num_dims,2, "Check that number of dimensions is correctly allocated"); + ok(kit_tensor_embedding(&c, y, 4, &b), "Embed 2 dimensional tensor should succeed"); + is(b.num_dims,2, "Returning 2 dimensional tensor too"); + kit_tensor_fini(&c); + + + tensor_make_kxmxn(w, 2, 2, 2, &c); + is(c.num_dims,3, "Check that number of dimensions is correctly allocated"); + ok(kit_tensor_embedding(&c, y, 4, &b), "Embed 3 dimensional tensor should succeed"); + is(b.num_dims,3, "Returning 3 dimensional tensor too"); + kit_tensor_fini(&c); + + kit_tensor_fini(&b); + } + + diag("Let's permute a tensor"); + { + struct kit_tensor a,b,c; + float x[12] = {1,2,3,4,5, 6, 7, 8, 9, 10, 11, 12}; + float y[8] = {0}; + float exp[12] = {0}; + unsigned dims[KIT_TENSOR_MAX_DIMS] = {0, 1, 2}; + + tensor_make_kxmxn(x, 2, 3, 2, &a); + tensor_make_kxmxn(x, 2, 3, 2, &b); + tensor_make_kxmxn(y, 2, 2, 2, &c); + + // In [2]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(0, 1, 2) + // Out[2]: + // tensor([[[ 1, 2], + // [ 3, 4], + // [ 5, 6]], + + // [[ 7, 8], + // [ 9, 10], + // [11, 12]]]) + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 2; exp[2] = 3; exp[3] = 4; + exp[4] = 5; exp[5] = 6; exp[6] = 7; exp[7] = 8; + exp[8] = 9; exp[9] = 10; exp[10] = 11; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // In [3]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(2, 0, 1) + // Out[3]: + // tensor([[[ 1, 3, 5], + // [ 7, 9, 11]], + + // [[ 2, 4, 6], + // [ 8, 10, 12]]]) + dims[0] = 2; + dims[1] = 0; + dims[2] = 1; + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 3; exp[2] = 5; exp[3] = 7; + exp[4] = 9; exp[5] = 11; exp[6] = 2; exp[7] = 4; + exp[8] = 6; exp[9] = 8; exp[10] = 10; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // In [4]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(1, 2, 0) + // Out[4]: + // tensor([[[ 1, 7], + // [ 2, 8]], + + // [[ 3, 9], + // [ 4, 10]], + + // [[ 5, 11], + // [ 6, 12]]]) + dims[0] = 1; + dims[1] = 2; + dims[2] = 0; + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 7; exp[2] = 2; exp[3] = 8; + exp[4] = 3; exp[5] = 9; exp[6] = 4; exp[7] = 10; + exp[8] = 5; exp[9] = 11; exp[10] = 6; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // In [5]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(1, 0, 2) + // Out[5]: + // tensor([[[ 1, 2], + // [ 7, 8]], + + // [[ 3, 4], + // [ 9, 10]], + + // [[ 5, 6], + // [11, 12]]]) + dims[0] = 1; + dims[1] = 0; + dims[2] = 2; + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 2; exp[2] = 7; exp[3] = 8; + exp[4] = 3; exp[5] = 4; exp[6] = 9; exp[7] = 10; + exp[8] = 5; exp[9] = 6; exp[10] = 11; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // In [6]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(0, 2, 1) + // Out[6]: + // tensor([[[ 1, 3, 5], + // [ 2, 4, 6]], + + // [[ 7, 9, 11], + // [ 8, 10, 12]]]) + dims[0] = 0; + dims[1] = 2; + dims[2] = 1; + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 3; exp[2] = 5; exp[3] = 2; + exp[4] = 4; exp[5] = 6; exp[6] = 7; exp[7] = 9; + exp[8] = 11; exp[9] = 8; exp[10] = 10; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // In [7]: torch.tensor([[[1,2], [3,4], [5, 6]], [[7,8],[9,10],[11,12]]]).permute(2, 0, 1) + // Out[7]: + // tensor([[[ 1, 3, 5], + // [ 7, 9, 11]], + + // [[ 2, 4, 6], + // [ 8, 10, 12]]]) + dims[0] = 2; + dims[1] = 0; + dims[2] = 1; + ok(kit_tensor_permute(&a, dims, &b), "Permute tensor dimensions"); + + exp[0] = 1; exp[1] = 3; exp[2] = 5; exp[3] = 7; + exp[4] = 9; exp[5] = 11; exp[6] = 2; exp[7] = 4; + exp[8] = 6; exp[9] = 8; exp[10] = 10; exp[11] = 12; + for (int i = 0; i < 12; i++) + is(b.value[i], exp[i], "Permutation index is correct"); + + // Corner case failure test + ok(!kit_tensor_permute(&a, dims, &c), "Test failure if total number of items does not match in two tensors"); + kit_tensor_fini(&a); + kit_tensor_fini(&b); + kit_tensor_fini(&c); + } + + diag("Let's apply conv1d on a tensor"); + { + struct kit_tensor a, b, c, d, e; + float x[18] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float y[15] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + float z[12] = {0}; + float w[24] = {}; + float exp[12] = {178, 199, 220, 241, 412, 469, 526, 583, 646, 739, 832, 925}; + + // In [115]: x + // Out[115]: + // tensor([[[ 1, 2, 3, 4, 5], + // [ 6, 7, 8, 9, 10], + // [11, 12, 13, 14, 15]]]) + + // In [114]: conv1d.weight + // Out[114]: + // Parameter containing: + // tensor([[[ 1., 2.], + // [ 3., 4.], + // [ 5., 6.]], + + // [[ 7., 8.], + // [ 9., 10.], + // [11., 12.]], + + // [[13., 14.], + // [15., 16.], + // [17., 18.]]], requires_grad=True) + + // In [113]: conv1d(x.float()) + // Out[113]: + // tensor([[[177.8149, 198.8149, 219.8149, 240.8149], + // [411.7858, 468.7858, 525.7858, 582.7858], + // [645.6705, 738.6705, 831.6705, 924.6705]]], + // grad_fn=) + + tensor_make_kxmxn(x, 3, 3, 2, &a); + tensor_make_kxmxn(y, 1, 3, 5, &b); + tensor_make_kxmxn(z, 1, 3, 4, &c); + tensor_make_kxmxn(y, 3, 5, 1, &d); + tensor_make_kxmxn(w, 2, 3, 4, &e); + + ok(kit_tensor_conv1d(&a, &b, 1, &c), "Conv1D applied"); + for (int i = 0; i < 12; i++) + is(c.value[i], exp[i], "Conv1D output correct"); + ok(!kit_tensor_conv1d(&b, &b, 1, &c), "Checking corner cases for tensor dimensions"); + ok(!kit_tensor_conv1d(&d, &b, 1, &c), "Checking corner cases for tensor dimensions"); + ok(!kit_tensor_conv1d(&a, &b, 1, &d), "Checking corner cases for tensor dimensions"); + ok(!kit_tensor_conv1d(&a, &b, 1, &e), "Checking corner cases for tensor dimensions"); + + kit_tensor_fini(&a); + kit_tensor_fini(&b); + kit_tensor_fini(&c); + kit_tensor_fini(&d); + kit_tensor_fini(&e); + } + + diag("Let's apply batchnorm1d_affine!"); + { + struct kit_tensor bn, a, b; + float x[24] = { + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, + }; + float y[8] = { + 1.8860, 8.2284, .3, .4, + 3.3500, 8.5524, .7, .8 + }; + float z[24] = {0}; + float exp[24] = { + 0.3073, 0.4119, 0.5165, 0.6211, 0.7257, 0.8303, + 1.6737, 1.9130, 2.1524, 2.3918, 2.6311, 2.8705, + 1.5623, 1.6669, 1.7715, 1.8761, 1.9807, 2.0853, + 4.5460, 4.7854, 5.0247, 5.2641, 5.5035, 5.7428, + }; + + // In [174]: inp + // Out[174]: + // tensor([[[ 1., 2., 3., 4., 5., 6.], + // [ 7., 8., 9., 10., 11., 12.]], + + // [[13., 14., 15., 16., 17., 18.], + // [19., 20., 21., 22., 23., 24.]]]) + + // In [145]: m = nn.BatchNorm1d(2) + + // In [170]: m.running_mean + // Out[170]: + // Parameter containing: + // tensor([1.8860, 3.3500]) + + // In [171]: m.running_var + // Out[171]: + // Parameter containing: + // tensor([8.2284, 8.5524]) + + // In [172]: m.weight + // Out[172]: + // Parameter containing: + // tensor([0.3000, 0.7000], requires_grad=True) + + // In [173]: m.bias + // Out[173]: + // Parameter containing: + // tensor([0.4000, 0.8000], requires_grad=True) + + // In [176]: m(inp) + // Out[176]: + // tensor([[[0.3073, 0.4119, 0.5165, 0.6211, 0.7257, 0.8303], + // [1.6737, 1.9130, 2.1524, 2.3918, 2.6311, 2.8705]], + + // [[1.5623, 1.6669, 1.7715, 1.8761, 1.9807, 2.0853], + // [4.5460, 4.7854, 5.0247, 5.2641, 5.5035, 5.7428]]], + // grad_fn=) + + tensor_make_kxmxn(x, 2, 2, 6, &a); + tensor_make_kxmxn(y, 1, 2, 4, &bn); + tensor_make_kxmxn(z, 2, 2, 6, &b); + ok(kit_tensor_batchnorm1d_affine(&bn, &a, &b), "Ran batchnorm1d affine version"); + for (int i = 0; i < 12; i++) + ok(b.value[i] - exp[i] < .001, "Computed batchnorm1d affine value correctly"); + ok(!kit_tensor_batchnorm1d_affine(&a, &a, &b), "Check corner cases for tensor dimensions"); + ok(!kit_tensor_batchnorm1d_affine(&bn, &a, &bn), "Check corner cases for tensor dimensions"); + + kit_tensor_fini(&a); + kit_tensor_fini(&bn); + kit_tensor_fini(&b); + } + + diag("Let's apply ReLU!"); + { + struct kit_tensor a; + float x[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5}; + float exp[10] = {0, 0, 0, 0, 0, 1, 2, 3, 4, 5}; + + is(kit_relu(-1), 0., "ReLU works on negative numbers"); + is(kit_relu(0), 0., "ReLU works on negative numbers"); + is(kit_relu(1), 1, "ReLU works on negative numbers"); + + ok(tensor_make_kxmxn(x, 2, 1, 5, &a), "Make (2,1,5) tensor"); + ok(kit_tensor_relu(&a), "Applied ReLU on tensor (2, 1, 5)"); + for (int i = 0; i < 10; i++) + is(a.value[i], exp[i], "ReLU applied over tensor (2,1,5) is correct"); + + kit_tensor_fini(&a); + } + + diag("Let's flatten a tensor!"); + { + struct kit_tensor a; + float x[10] = {-4,-3,-2,-1,0,1,2,3,4,5}; + + ok(tensor_make_kxmxn(x, 2, 1, 5, &a), "Make (2,1,5) tensor"); + is(a.dimension[0], 2, "1st dimension is 2"); + is(a.dimension[1], 1, "2nd dimension is 1"); + is(a.dimension[2], 5, "3rd dimension is 5"); + ok(kit_tensor_flatten(&a), "Ran tensor flatten"); + is(a.dimension[0], 1, "1st dimension is 1"); + is(a.dimension[1], 1, "2nd dimension is 1"); + is(a.dimension[2], 10, "3rd dimension is 10"); + + kit_tensor_fini(&a); + } + + diag("Putting it all together"); + { + struct kit_tensor + runstate_embed, // (1, 3, 4) + runstate_permute, // (1, 4, 6) + runstate_conv1, // (1, 4, 5) + runstate_bn1, // (1, 4, 5) + runstate_fc, // (1, 1, 2) + net_embedding, // (1, 6, 4) + net_conv1d, // (4, 4, 2) + net_bn1d_affine, // (1, 4, 4) + net_fc_linear; // (1, 20, 2) + unsigned rs_input[3] = {1, 3, 5}; + float + rs_embed[12] = {0}, + rs_permute[24] = {0}, + rs_conv1[20] = {0}, + rs_bn1[20] = {0}, + rs_fc[2] = {0}, + n_embedding[24] = { + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7, + 5, 6, 7, 8, + }, + n_conv1d[32] = { + 1, 2, + 2, 3, + 3, 4, + 4, 5, + + 5, 6, + 6, 7, + 7, 8, + 8, 9, + + 9, 10, + 10, 11, + 11, 12, + 12, 13, + + 13, 14, + 14, 15, + 15, 16, + 16, 17 + }, + n_bn1d_affine[16] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + }, + n_fc_linear[40] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + }; + + tensor_make_kxmxn(rs_embed, 1, 3, 4, &runstate_embed); + tensor_make_kxmxn(rs_permute, 1, 4, 3, &runstate_permute); + tensor_make_kxmxn(rs_conv1, 1, 4, 2, &runstate_conv1); + tensor_make_kxmxn(rs_bn1, 1, 4, 2, &runstate_bn1); + tensor_make_kxmxn(rs_fc, 1, 1, 2, &runstate_fc); + tensor_make_kxmxn(n_embedding, 1, 6, 4, &net_embedding); + tensor_make_kxmxn(n_conv1d, 4, 4, 2, &net_conv1d); + tensor_make_kxmxn(n_bn1d_affine, 1, 4, 4, &net_bn1d_affine); + tensor_make_kxmxn(n_fc_linear, 1, 8, 2, &net_fc_linear); + + ok(kit_tensor_embedding(&net_embedding, rs_input, 3, &runstate_embed), "Take runstate input and extract embedding tensor"); + ok(kit_tensor_transpose(&runstate_embed, 1, 2, &runstate_permute), "Permute runstate embedding tensor"); + ok(kit_tensor_conv1d(&net_conv1d, &runstate_permute, 1, &runstate_conv1), "Apply conv1d on permuted embedding tensor"); + ok(kit_tensor_batchnorm1d_affine(&net_bn1d_affine, &runstate_conv1, &runstate_bn1), "Apply batchnorm1d on conv1d output tensor"); + ok(kit_tensor_relu(&runstate_bn1), "Apply ReLU on batchnorm1d output"); + ok(kit_tensor_flatten(&runstate_bn1), "Flatten ReLU output for input to final linear layer output"); + ok(kit_tensor_matmul(&runstate_bn1, &net_fc_linear, &runstate_fc), "Apply neural network linear layer to two state output"); + + kit_tensor_fini(&runstate_embed); + kit_tensor_fini(&runstate_permute); + kit_tensor_fini(&runstate_conv1); + kit_tensor_fini(&runstate_bn1); + kit_tensor_fini(&runstate_fc); + kit_tensor_fini(&net_embedding); + kit_tensor_fini(&net_conv1d); + kit_tensor_fini(&net_bn1d_affine); + kit_tensor_fini(&net_fc_linear); + } + + diag("Let's sum some tensors"); + { + struct kit_tensor a, b, c; + float x[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float y[12] = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + float z[12] = {0}; + float exp[12] = {5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27}; + + tensor_make_kxmxn(x, 3, 2, 2, &a); + tensor_make_kxmxn(y, 3, 2, 2, &b); + tensor_make_kxmxn(z, 3, 2, 2, &c); + + ok(kit_tensor_sum(&a, &b, &c), "Summing of tensors of same dimensions"); + for (int i = 0; i < 12; i++) + is(c.value[i], exp[i], "Output sum is correct"); + + kit_tensor_fini(&a); + kit_tensor_fini(&b); + kit_tensor_fini(&c); + + tensor_make_kxmxn(z, 2, 3, 2, &b); + ok(!kit_tensor_sum(&a, &b, &c), "Summing of tensors of mismatched dimensions fails"); + kit_tensor_fini(&b); + } + + diag("Tensor Allocation and Initialization"); + { + struct kit_tensor a,c; + float x[10] = {-4, -3, -2, -1, 0, 1, 2, 3, 4, 5}; + + ok(tensor_make_kxmxn(x, 2, 1, 5, &a), "Make (2,1,5) tensor"); + c.value = kit_malloc(10); + kit_tensor_init(&c); + is(c.dimension[0],0, "Dimension check"); + is(c.dimension[1],0, "Dimension check"); + is(c.dimension[2],0, "Dimension check"); + kit_tensor_fini(&c); + is(c.num_dims,0, "Dimension check"); + kit_tensor_fini(&a); + } + + diag("Tensor utils"); + { + struct kit_tensor a; + unsigned dims[3] = {1, 5, 1}; + float x[5] = {1., 2., 3., 4., 5.}; + + ok(tensor_make_kxmxn(x, 1, 1, 5, &a), "Make (1,1,5) tensor"); + for (int i = 0; i < 5; i++) + ok(x[i] == a.value[i], "Tensor values set correctly"); + + ok(a.sz == 5, "Tensor size is set correctly"); + kit_tensor_zeros(&a); + for (int i = 0; i < 5; i++) + ok(a.value[i] == 0., "Tensor values set to zero correctly"); + ok(kit_tensor_dimset(&a, dims), "Change dims"); + ok(a.dimension[0] == 1 && a.dimension[1] == 5 && a.dimension[2] == 1, "Tensor dims set correctly"); + kit_tensor_fini(&a); + } + + is(kit_memory_allocations(), start_allocations, "Tensor lookup memory allocations were freed"); + + return exit_status(); +} diff --git a/lib-kit/test/test-kit-timezone.c b/lib-kit/test/test-kit-timezone.c new file mode 100644 index 0000000..451b46b --- /dev/null +++ b/lib-kit/test/test-kit-timezone.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2024 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-timezone.h" + +static bool +tm_eq(const struct tm *left, const struct tm *right) +{ + return left->tm_sec == right->tm_sec && left->tm_min == right->tm_min && left->tm_hour == right->tm_hour + && left->tm_mday == right->tm_mday && left->tm_mon == right->tm_mon && left->tm_year == right->tm_year + && left->tm_wday == right->tm_wday && left->tm_yday == right->tm_yday && left->tm_isdst == right->tm_isdst + && left->tm_gmtoff == right->tm_gmtoff; +} + +static time_t gm_time; +static struct tm gm_tm; + +static void * +thread_start(void *path) +{ + const struct kit_timezone *timezone; + struct tm test_tm; + + ok(timezone = kit_timezone_load(path, 0), "Got test_zoneinfo"); + ok(kit_timezone_lock(timezone), "Locked the timezone"); + ok(kit_timezone_time_to_localtime(timezone, gm_time, &test_tm), "Got the local time in the test zone"); + ok(tm_eq(&gm_tm, &test_tm), "It's in UTC"); + kit_timezone_unlock(timezone); + return NULL; +} + +int +main(void) +{ + struct tm local_tm, test_tm; + const struct kit_timezone *local_tz, *timezone; + size_t len; + pthread_t thread; + char path[PATH_MAX]; + + plan_tests(31); + kit_memory_initialize(KIT_MEMORY_ABORT_ON_ENOMEM | KIT_MEMORY_CHECK_OVERFLOWS); + uint64_t start_allocations = kit_memory_allocations(); // Clocked the initial # of memory allocations +// KIT_ALLOC_SET_LOG(1); + + unlink("test_zoneinfo"); + kit_timezone_initialize(1); // Check for new updates if zoneinfo is older than 1 second + + diag("Verify a invalid timezone can be loaded but not used"); + { + ok(!(local_tz = kit_timezone_load("America/nowhere", sizeof("America/nowhere") - 1)), "Failed to get timezone for nowhere"); + ok(!kit_timezone_time_to_localtime(local_tz, gm_time, &local_tm), "Unable to get the local time in nowhere"); + } + + diag("Verify that a timezone can be loaded and used"); + { + ok(local_tz = kit_timezone_load("America/Vancouverxxx", sizeof("America/Vancouver") - 1), "Got zoneinfo for Vancouver"); + gm_time = time(NULL); + ok(kit_timezone_time_to_localtime(local_tz, gm_time, &local_tm), "Got the local time in Vancouver"); + SXEA1(gmtime_r(&gm_time, &gm_tm), "Failed to get the UTC time"); + + if (gm_tm.tm_hour > local_tm.tm_hour) { + is(gm_tm.tm_yday, local_tm.tm_yday, "Days are the same"); + ok(gm_tm.tm_hour - local_tm.tm_hour == 7 || gm_tm.tm_hour - local_tm.tm_hour == 8, "Times differ by 7 or 8"); + } + else { + is(gm_tm.tm_yday, local_tm.tm_yday + 1, "UTC is on to the next day"); + ok(gm_tm.tm_hour + 24 - local_tm.tm_hour == 7 || gm_tm.tm_hour + 24 - local_tm.tm_hour == 8, "Times differ by 7 or 8"); + } + + is_eq(kit_timezone_get_name(local_tz, &len), "America/Vancouver", "Got the correct name"); + is(len, sizeof("America/Vancouver") - 1, "Got the correct name length"); + } + + diag("Verify files not present and added"); + { + SXEA1(getcwd(path, sizeof(path)), "Failed to get the current working directory path"); + SXEA1(strlcat(path, "/test_zoneinfo", sizeof(path)) < sizeof(path), "Failed to append /test_zoneinfo"); + is(kit_timezone_load(path, 0), NULL, "Correctly couldn't get test_zoneinfo"); + + SXEA1(system("cp /usr/share/zoneinfo/America/Vancouver ./test_zoneinfo") == 0, "Failed to copy Vancouver zoneinfo"); + is(kit_timezone_load(path, 0), NULL, "Still couldn't get test_zoneinfo"); + + ok(!kit_timezone_load("America/nowhere", sizeof("America/nowhere") - 1), "Got no zoneinfo for nowhere"); + ok(!kit_timezone_load("America/nowhere", sizeof("America/nowhere") - 1), "Got no zoneinfo for nowhere again"); + sleep(2); + ok(!kit_timezone_load("America/nowhere", sizeof("America/nowhere") - 1), "Got no zoneinfo for nowhere yet again (after 2 seconds)"); + + ok(timezone = kit_timezone_load(path, 0), "Got test_zoneinfo"); + ok(kit_timezone_time_to_localtime(timezone, gm_time, &test_tm), "Got the local time in the test zone"); + ok(tm_eq(&local_tm, &test_tm), "Same as the local time in Vancouver"); + } + + diag("Verify files changing and thread safety"); + { + SXEA1(system("cp /usr/share/zoneinfo/UTC ./test_zoneinfo") == 0, "Failed to copy UTC zoneinfo"); + ok(timezone = kit_timezone_load(path, 0), "Got test_zoneinfo"); + ok(kit_timezone_lock(timezone), "Locked the timezone"); + ok(kit_timezone_time_to_localtime(timezone, gm_time, &test_tm), "Got the local time in the test zone"); + ok(tm_eq(&local_tm, &test_tm), "Still same as the local time in Vancouver"); + + SXEA1(pthread_create(&thread, NULL, thread_start, path) == 0, "Failed to create a thread"); + sleep(2); // Delay before unlocking to make sure thread waits to lock + kit_timezone_unlock(timezone); + SXEA1(pthread_join(thread, NULL) == 0, "Failed to wait for thread to terminate"); + } + + diag("Verify coverage"); + { + unlink("test_zoneinfo"); + ok(kit_timezone_lock(timezone), "Able to lock the test zone when the zoneinfo file was deleted"); + kit_timezone_unlock(timezone); + sleep(2); + ok(!kit_timezone_lock(timezone), "Failed to lock the test zone after the zoneinfo file was deleted"); + ok(!kit_timezone_lock(timezone), "Still failed to lock the test zone after the zoneinfo file was deleted"); + + ok(kit_timezone_lock(local_tz), "Able to lock the America/Vancouver timezone"); + + SXEA1(system("touch ./test_zoneinfo") == 0, "Failed to create an empty zoneinfo"); + sleep(2); + ok(!kit_timezone_lock(timezone), "Failed to lock the test zone after when the zoneinfo file was invalid"); + + MOCKFAIL_START_TESTS(1, kit_timezone_load); + ok(!kit_timezone_load("UTC", 0), "Failed to load UTC on simulated failure to add to cache"); + MOCKFAIL_END_TESTS(); + } + + kit_timezone_finalize(); + + is(kit_memory_allocations(), start_allocations, "All memory allocations were freed"); + return exit_status(); +} diff --git a/lib-kit/test/test-kit-udp.c b/lib-kit/test/test-kit-udp.c index d4c0221..4fd5d81 100644 --- a/lib-kit/test/test-kit-udp.c +++ b/lib-kit/test/test-kit-udp.c @@ -28,10 +28,10 @@ #include #include #include -#include #include #include "kit.h" +#include "kit-mock.h" #define TEST_MESSAGE "0123456789ABCDEF" #define TEST_MESSAGE_SIZE (sizeof(TEST_MESSAGE) - 1) diff --git a/lib-sxe-cdb/GNUmakefile b/lib-sxe-cdb/GNUmakefile new file mode 100644 index 0000000..213ba12 --- /dev/null +++ b/lib-sxe-cdb/GNUmakefile @@ -0,0 +1,2 @@ +LIBRARIES = sxe-cdb +include ../dependencies.mak diff --git a/lib-sxe-cdb/sxe-cdb-private.h b/lib-sxe-cdb/sxe-cdb-private.h new file mode 100644 index 0000000..4aab2b4 --- /dev/null +++ b/lib-sxe-cdb/sxe-cdb-private.h @@ -0,0 +1,193 @@ +/* Copyright (c) 2013 OpenDNS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include /* defines uint32_t etc */ + +#include "sxe-spinlock.h" + +#define SXE_CDB_KERNEL_PAGE_BYTES (4096) +#define SXE_CDB_CACHE_LINE_BYTES (64) /* todo: calculate the cache line size at runtime! */ +#define SXE_CDB_CACHE_LINE_FACTOR (2) /* figure out why this is faster than 1 or 4 :-) */ +#define SXE_CDB_KEYS_PER_ROW ((SXE_CDB_CACHE_LINE_BYTES / SXE_CDB_CACHE_LINE_FACTOR) / sizeof(uint16_t)) +#define SXE_CDB_KEYS_PER_ROW_BITS (4) + +typedef struct SXE_CDB_ROW_KEY__HASH_LO { + uint16_t u16[SXE_CDB_KEYS_PER_ROW]; +} __attribute__((packed)) SXE_CDB_ROW_KEY__HASH_LO; + +typedef struct SXE_CDB_ROW_KEY__HASH_HI { + uint16_t u16[SXE_CDB_KEYS_PER_ROW]; +} __attribute__((packed)) SXE_CDB_ROW_KEY__HASH_HI; + +typedef struct SXE_CDB_ROW_KEY_POSITION { + uint32_t u32[SXE_CDB_KEYS_PER_ROW]; +} SXE_CDB_ROW_KEY_POSITION; + +typedef struct SXE_CDB_ROW { + SXE_CDB_ROW_KEY__HASH_LO hash_lo; /* hash.u16[1]; 1 in 2048(=65536/32) chance of correct key */ + SXE_CDB_ROW_KEY__HASH_HI hash_hi; /* hash.u16[0]; used to calculate sheet */ + SXE_CDB_ROW_KEY_POSITION hkv_pos; /* 0 = cell not used */ +} __attribute__((packed)) SXE_CDB_ROW; + +#define SXE_CDB_ROW_BYTES (sizeof(SXE_CDB_ROW)) /* 256 bytes per row */ +#define SXE_CDB_SHEET_BYTES (1<<19) /* 512 KB per sheet */ +#define SXE_CDB_ROWS_PER_SHEET (SXE_CDB_SHEET_BYTES / SXE_CDB_ROW_BYTES) /* 2048 rows per sheet */ +#define SXE_CDB_ROWS_PER_SHEET_BITS (12) +#define SXE_CDB_KEYS_PER_SHEET (SXE_CDB_ROWS_PER_SHEET * SXE_CDB_KEYS_PER_ROW) /* 65536 keys per sheet */ +#define SXE_CDB_SHEETS_MAX ((1<<30) / 8 / SXE_CDB_KEYS_PER_SHEET * 4) /* 8192 sheet indexes */ +#define SXE_CDB_COUNTS_LISTS_MAX (256) /* 256 count lists */ + +/** + * - SXE_CDB_SHEETS_MAX explanation: + * - Assuming smallest size for header + key + value is 8 + * bytes, and each sheet holds a finite (e.g. 65536) number + * of keys, and the largest kvdata is 4GB (due to the 32bit + * offset), then the most sheets we'll need is 2^32 / 8 / + * SXE_CDB_KEYS_PER_SHEET or 8192 sheets. + * - This means that the most keys we can store in a 4GB + * kvdata address space is 2^32 / 8 or 536,870,912 keys. + */ + +typedef struct SXE_CDB_SHEET { + SXE_CDB_ROW row[SXE_CDB_ROWS_PER_SHEET]; +} __attribute__((packed)) SXE_CDB_SHEET; + +#define KEY_HEADER_LEN_1_KEY_BITS 3 +#define KEY_HEADER_LEN_3_KEY_BITS 7 +#define KEY_HEADER_LEN_5_KEY_BITS 16 +#define KEY_HEADER_LEN_8_KEY_BITS 24 + +#define KEY_HEADER_LEN_1_VAL_BITS 4 +#define KEY_HEADER_LEN_3_VAL_BITS 16 +#define KEY_HEADER_LEN_5_VAL_BITS 16 +#define KEY_HEADER_LEN_8_VAL_BITS 32 + +//why does gcc suck so badly? #define KEY_HEADER_LEN_8_VAL_LEN_MAX ((1<counts[] */ + uint32_t next_hkv_pos; /* next hkv pos at same count */ + uint32_t last_hkv_pos; /* last hkv pos at same count */ +} __attribute__((packed)) SXE_CDB_HKV_LIST; + +#define SXE_CDB_COUNT_BYTES (sizeof(SXE_CDB_COUNT)) +#define SXE_CDB_HKV_LIST_BYTES (sizeof(SXE_CDB_HKV_LIST)) + +// SXE_CDB_CACHE_LINE_BYTES : 64 +// SXE_CDB_ROW_BYTES : 128 +// SXE_CDB_KEYS_PER_ROW : 16 +// SXE_CDB_SHEET_BYTES : 524288 +// SXE_CDB_ROWS_PER_SHEET : 4096 +// SXE_CDB_KEYS_PER_SHEET : 65536 +// SXE_CDB_SHEETS_MAX : 8192 +// SXE_CDB_KERNEL_PAGE_BYTES: 4096 +// SXE_CDB_COUNT_BYTES : 24 + +struct SXE_CDB_INSTANCE { + SXE_CDB_COUNT * counts ; /* pointer to mremap()able key count memory */ + SXE_CDB_SHEET * sheets ; /* pointer to mremap()able key index memory */ + uint8_t * kvdata ; /* pointer to mremap()able key,value store memory */ + uint32_t kvdata_size ; /* bytes allocated & used or not to store key,value pairs */ + uint32_t kvdata_used ; /* bytes allocated & used to store key,value pairs */ + uint32_t kvdata_maximum ; /* bytes allocated max threshold to store key,value pairs */ + uint32_t sheets_size ; /* bytes allocated : sheets_size * SXE_CDB_SHEET_BYTES */ + uint32_t sheets_cells_size ; /* cells allocated & used or not to index a key */ + uint32_t sheets_cells_used ; /* cells allocated & used to index a key */ + uint32_t sheets_split ; /* times 1 sheet split into 2 sheets */ + uint64_t sheets_split_keys ; /* accumulated total of all keys examined during splits */ + uint64_t keylen_misses ; /* times hash matched but keylen didn't match */ + uint64_t memcmp_misses ; /* times keylen matched but key didn't match */ + uint32_t keys_at_start ; /* sxe_cdb_instance_new() copy for sxe_cdb_instance_reboot() */ + uint32_t counts_pages ; /* kernel pages @ counts */ //todo: add _pages to sheets & kvdata + uint32_t counts_size ; /* bytes allocated : counts_size * SXE_CDB_COUNT_BYTES */ + uint32_t counts_next_free ; /* unused next in generic double linked *counts* list */ + uint32_t counts_free ; /* unused total in generic double linked *counts* list */ + uint32_t counts_used ; /* used total in generic double linked *counts* list */ + uint32_t counts_hi[SXE_CDB_COUNTS_LISTS_MAX]; /* highest count in particular double linked *counts* list */ + uint32_t counts_lo[SXE_CDB_COUNTS_LISTS_MAX]; /* lowest count in particular double linked *counts* list */ + uint16_t sheets_index[SXE_CDB_SHEETS_MAX] ; +} __attribute__((packed)); + +struct SXE_CDB_ENSEMBLE { + uint32_t cdb_count ; /* instances of SXE_CDB_INSTANCE; max 4GB kvdata per instance */ + struct SXE_CDB_INSTANCE ** cdb_instances ; /* pointers to SXE_CDB_INSTANCE */ + SXE_SPINLOCK * cdb_instance_locks; /* locks for each SXE_CDB_INSTANCE */ + uint32_t cdb_is_locked : 1 ; /* use locks for SXE_CDB_ENSEMBLE? */ +} __attribute__((packed)); + +#include "sxe-cdb.h" + diff --git a/lib-sxe-cdb/sxe-cdb.c b/lib-sxe-cdb/sxe-cdb.c new file mode 100644 index 0000000..96687ef --- /dev/null +++ b/lib-sxe-cdb/sxe-cdb.c @@ -0,0 +1,1326 @@ +/* Copyright (c) 2013 OpenDNS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include /* for mremap() */ +#include /* for memset() */ + +#include "kit-alloc.h" +#include "sxe-hash.h" +#include "sxe-log.h" +#include "sxe-util.h" +#include "sxe-spinlock.h" +#include "sxe-cdb-private.h" + + __thread uint32_t sxe_cdb_tls_hkv_len_max = 0 ; + __thread SXE_CDB_HKV * sxe_cdb_tls_hkv = NULL; + __thread SXE_CDB_HKV_PART sxe_cdb_tls_hkv_part ; + __thread uint32_t sxe_cdb_tls_walk_cnt_pos ; + __thread uint32_t sxe_cdb_tls_walk_hkv_pos ; + __thread uint64_t sxe_cdb_tls_walk_count ; + __thread SXE_CDB_HASH sxe_cdb_hash ; + +static SXE_SPINLOCK sxe_cdb_ensemble_lock = { 0 }; /* usid by sxe_cdb_ensemble_(new|destroy)() */ +static __thread const uint8_t * sxe_cdb_key ; /* used by sxe_cdb_prepare() */ +static __thread uint32_t sxe_cdb_key_len ; /* used by sxe_cdb_prepare() */ + +/*- + * Finalize the sxe-cdb package per thread + */ +void +sxe_cdb_finalize_thread(void) +{ + if (sxe_cdb_tls_hkv) { + kit_free(sxe_cdb_tls_hkv); + sxe_cdb_tls_hkv = NULL; + } +} + +SXE_CDB_HKV * +sxe_cdb_copy_hkv_to_tls(SXE_CDB_INSTANCE * cdb_instance, uint32_t hkv_pos) +{ + SXE_CDB_HKV * hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; + sxe_cdb_hkv_unpack(hkv, &sxe_cdb_tls_hkv_part); + if (sxe_cdb_tls_hkv_part.hkv_len > sxe_cdb_tls_hkv_len_max) { + SXEL7("%s(){} // realloc() from %u bytes to %u bytes", __FUNCTION__, sxe_cdb_tls_hkv_len_max, sxe_cdb_tls_hkv_part.hkv_len); + sxe_cdb_tls_hkv = kit_realloc(sxe_cdb_tls_hkv , sxe_cdb_tls_hkv_part.hkv_len); + sxe_cdb_tls_hkv_len_max = sxe_cdb_tls_hkv ? sxe_cdb_tls_hkv_part.hkv_len : 0; // sxe_realloc() failed :-( + SXEA1(sxe_cdb_tls_hkv, "ERROR: INTERNAL: realloc() failed for %zu bytes // %s(){}", sizeof(sxe_cdb_tls_hkv_part.hkv_len), __FUNCTION__); + } + if ( sxe_cdb_tls_hkv_len_max >= sxe_cdb_tls_hkv_part.hkv_len) { + memcpy(sxe_cdb_tls_hkv , hkv, sxe_cdb_tls_hkv_part.hkv_len); /* copy hkv into a tls buffer (for caller) */ + sxe_cdb_tls_hkv_part.key = &((uint8_t *) sxe_cdb_tls_hkv)[sxe_cdb_tls_hkv_part.hkv_len - sxe_cdb_tls_hkv_part.key_len - sxe_cdb_tls_hkv_part.val_len]; /* as a courtesy to the caller, */ + sxe_cdb_tls_hkv_part.val = &((uint8_t *) sxe_cdb_tls_hkv)[sxe_cdb_tls_hkv_part.hkv_len - sxe_cdb_tls_hkv_part.val_len]; /* pretend we've just unpacked the tls */ + } + return sxe_cdb_tls_hkv; +} /* sxe_cdb_copy_hkv_to_tls() */ + +void +sxe_cdb_copy_tls_to_hkv(SXE_CDB_INSTANCE * cdb_instance, uint32_t hkv_pos) +{ + SXE_CDB_HKV * hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; + memcpy(hkv, sxe_cdb_tls_hkv, sxe_cdb_tls_hkv_part.hkv_len); /* copy tls buffer (from caller) into hkv */ + return; +} /* sxe_cdb_copy_tls_to_hkv() */ + +void +sxe_cdb_prepare(const uint8_t * key, uint32_t key_len) +{ + if (key) { + sxe_hash_128(key, key_len, &sxe_cdb_hash.u08[0]); + + if (sxe_cdb_hash.u16[1] == sxe_cdb_hash.u16[2]) { /* guarantee that sxe_cdb_hash.u16[1] and sxe_cdb_hash.u16[2] are unique ... (in the worst possible way?) */ + sxe_cdb_hash.u16[1] = sxe_cdb_hash.u16[3]; + + if (sxe_cdb_hash.u16[1] == sxe_cdb_hash.u16[2]) { + sxe_cdb_hash.u16[1] = sxe_cdb_hash.u16[4]; /* COVERAGE EXCLUSION: todo: calculate some murmurhash3 collisions to coverage this line! */ + } + + if (sxe_cdb_hash.u16[1] == sxe_cdb_hash.u16[2]) { + sxe_cdb_hash.u16[1] = sxe_cdb_hash.u16[2] + 1; /* COVERAGE EXCLUSION: todo: calculate some murmurhash3 collisions to coverage this line! */ + } + } + } + + sxe_cdb_key = key; + sxe_cdb_key_len = key_len; + SXEL6("%s(key=%.*s, key_len=%u){} // 0xhash=%04x-%04x-%04x", __FUNCTION__, key_len, key, key_len, sxe_cdb_hash.u16[0], sxe_cdb_hash.u16[1], sxe_cdb_hash.u16[2]); +} + +static void +sxe_cdb_instance_new_init( + SXE_CDB_INSTANCE * cdb_instance , + uint32_t keys_at_start , + uint32_t kvdata_maximum) /* maximum bytes for kvdata memory or zero means no limit (i.e. up to 4GB) */ +{ + unsigned cl; + unsigned si; + unsigned sheet_index = 0; + unsigned sheet_index_max = keys_at_start / SXE_CDB_KEYS_PER_SHEET * 2; + SXEL6("initializing sheet indexes // sheet_index_max=%u", sheet_index_max); + for (si = 0; si < SXE_CDB_SHEETS_MAX; si++) { /* loop over all sheet indexes */ + cdb_instance->sheets_index[si] = sheet_index; + sheet_index ++; + sheet_index = sheet_index >= sheet_index_max ? 0 : sheet_index; + } + + cdb_instance->sheets_size = sheet_index_max ? sheet_index_max : 1; /* always create at least one sheet */ + cdb_instance->kvdata_size = SXE_CDB_KERNEL_PAGE_BYTES; + cdb_instance->kvdata_used = 1; /* 0 means cell is unused in table; yeah, we're 'wasting' 1 byte here :-) */ + cdb_instance->sheets_cells_size = SXE_CDB_KEYS_PER_SHEET; + cdb_instance->sheets_cells_used = 0; + cdb_instance->sheets_split = 0; + cdb_instance->sheets_split_keys = 0; + cdb_instance->counts = NULL; /* mmap()ed on demand */ + cdb_instance->counts_pages = 0; + cdb_instance->counts_size = 0; + cdb_instance->counts_next_free = 1; + cdb_instance->counts_free = 0; + cdb_instance->counts_used = 0; + cdb_instance->keylen_misses = 0; + cdb_instance->memcmp_misses = 0; + cdb_instance->kvdata_maximum = kvdata_maximum; + cdb_instance->keys_at_start = keys_at_start; + + for (cl = 0; cl < SXE_CDB_COUNTS_LISTS_MAX; cl ++) { + cdb_instance->counts_hi[cl] = SXE_CDB_COUNT_NONE; + cdb_instance->counts_lo[cl] = SXE_CDB_COUNT_NONE; + } + + cdb_instance->sheets = mmap(NULL /* kernel chooses addr */, SXE_CDB_SHEET_BYTES * cdb_instance->sheets_size , PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + cdb_instance->kvdata = mmap(NULL /* kernel chooses addr */, cdb_instance->kvdata_size , PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + SXEL7("cdb_instance->sheets : %p // 4k kernel pages: %u", cdb_instance->sheets, SXE_CDB_SHEET_BYTES * cdb_instance->sheets_size / 4096); + SXEL7("cdb_instance->kvdata : %p // 4k kernel pages: %u", cdb_instance->kvdata, cdb_instance->kvdata_size / 4096); + SXEA1(MAP_FAILED != cdb_instance->sheets, "ERROR: FATAL: expected mmap() not to fail // %s(){}", __FUNCTION__); + SXEA1(MAP_FAILED != cdb_instance->kvdata, "ERROR: FATAL: expected mmap() not to fail // %s(){}", __FUNCTION__); +} /* sxe_cdb_instance_new_init() */ + +SXE_CDB_INSTANCE * +sxe_cdb_instance_new( + uint32_t keys_at_start , + uint32_t kvdata_maximum) /* maximum bytes for kvdata memory or zero means no limit (i.e. up to 4GB) */ +{ + SXE_CDB_INSTANCE * cdb_instance = kit_malloc(sizeof(*cdb_instance)); + SXEA1(cdb_instance, "ERROR: INTERNAL: sxe_malloc() failed for %zu bytes // %s(){}", sizeof(*cdb_instance), __FUNCTION__); + + SXEE6("(keys_at_start=%u, kvdata_maximum=%u)", keys_at_start, kvdata_maximum); + + SXEA6(getpagesize() == SXE_CDB_KERNEL_PAGE_BYTES, "ERROR: INTERNAL: expected %u=getpagesize() but got %u; todo: change code to work with other page sizes", SXE_CDB_KERNEL_PAGE_BYTES, getpagesize()); + + SXEA6(SXE_CDB_KEYS_PER_ROW == (1<MEMBER) + SXEA6( 1 == SIZEOF_MEMBER(SXE_CDB_HKV ,header_len_1), "ERROR: INTERNAL: unexpected sizeof header_len_1"); + SXEA6( 3 == SIZEOF_MEMBER(SXE_CDB_HKV ,header_len_3), "ERROR: INTERNAL: unexpected sizeof header_len_3"); + SXEA6( 5 == SIZEOF_MEMBER(SXE_CDB_HKV ,header_len_5), "ERROR: INTERNAL: unexpected sizeof header_len_5"); + SXEA6( 8 == SIZEOF_MEMBER(SXE_CDB_HKV ,header_len_8), "ERROR: INTERNAL: unexpected sizeof header_len_8"); + SXEA6(18 == sizeof(SXE_CDB_COUNT ), "ERROR: INTERNAL: unexpected sizeof SXE_CDB_COUNT"); + SXEA6(12 == sizeof(SXE_CDB_HKV_LIST ), "ERROR: INTERNAL: unexpected sizeof SXE_CDB_HKV_LIST"); + SXEA6( 5 == SIZEOF_MEMBER(SXE_CDB_UID ,as_part ), "ERROR: INTERNAL: unexpected sizeof SXE_CDB_UID"); + SXEA6( 5 == SIZEOF_MEMBER(SXE_CDB_UID ,as_u40 ), "ERROR: INTERNAL: unexpected sizeof SXE_CDB_UID"); + SXEA6( 8 == SIZEOF_MEMBER(SXE_CDB_UID ,as_u64 ), "ERROR: INTERNAL: unexpected sizeof SXE_CDB_UID"); + + SXEL7("SXE_CDB_CACHE_LINE_BYTES : %u" , SXE_CDB_CACHE_LINE_BYTES ); + SXEL7("SXE_CDB_ROW_BYTES : %zu", SXE_CDB_ROW_BYTES ); + SXEL7("SXE_CDB_KEYS_PER_ROW : %zu", SXE_CDB_KEYS_PER_ROW ); + SXEL7("SXE_CDB_SHEET_BYTES : %u" , SXE_CDB_SHEET_BYTES ); + SXEL7("SXE_CDB_ROWS_PER_SHEET : %zu", SXE_CDB_ROWS_PER_SHEET ); + SXEL7("SXE_CDB_KEYS_PER_SHEET : %zu", SXE_CDB_KEYS_PER_SHEET ); + SXEL7("SXE_CDB_SHEETS_MAX : %zu", SXE_CDB_SHEETS_MAX ); + SXEL7("SXE_CDB_KERNEL_PAGE_BYTES: %u" , SXE_CDB_KERNEL_PAGE_BYTES); + SXEL7("SXE_CDB_COUNT_BYTES : %zu", SXE_CDB_COUNT_BYTES ); + + sxe_cdb_instance_new_init(cdb_instance, keys_at_start, kvdata_maximum); + + SXER6("return %p=cdb_instance", cdb_instance); + return cdb_instance; +} /* sxe_cdb_instance_new() */ + +static void +sxe_cdb_instance_destroy_mmaps(SXE_CDB_INSTANCE * cdb_instance) +{ + SXEL6("%s(cdb_instance=?){}", __FUNCTION__); + SXEA1(0 == munmap(cdb_instance->sheets, SXE_CDB_SHEET_BYTES * cdb_instance->sheets_size ), "ERROR: INTERNAL: munmap() failed for sheets"); + SXEA1(0 == munmap(cdb_instance->kvdata, cdb_instance->kvdata_size ), "ERROR: INTERNAL: munmap() failed for kvdata"); + if (cdb_instance->counts) { SXEA1(0 == munmap(cdb_instance->counts, SXE_CDB_KERNEL_PAGE_BYTES * cdb_instance->counts_pages), "ERROR: INTERNAL: munmap() failed for counts"); } +} /* sxe_cdb_instance_destroy_mmaps() */ + +void +sxe_cdb_instance_destroy(SXE_CDB_INSTANCE * cdb_instance) +{ + SXEL6("%s(cdb_instance=?){}", __FUNCTION__); + sxe_cdb_instance_destroy_mmaps(cdb_instance); + kit_free(cdb_instance); +} /* sxe_cdb_instance_destroy() */ + +void +sxe_cdb_instance_reboot(SXE_CDB_INSTANCE * cdb_instance) +{ + SXEL6("%s(cdb_instance=?){}", __FUNCTION__); + + sxe_cdb_instance_destroy_mmaps(cdb_instance ); /* goodbye mmaps */ + sxe_cdb_instance_new_init (cdb_instance, cdb_instance->keys_at_start, cdb_instance->kvdata_maximum); /* hello mmaps */ +} /* sxe_cdb_instance_reboot() */ + +#if SXE_DEBUG +void +sxe_cdb_instance_debug_validate(SXE_CDB_INSTANCE * cdb_instance, const char * debug) +{ + uint16_t sheet; + unsigned row; + unsigned cell; + + for (sheet = 0; sheet < cdb_instance->sheets_size; sheet ++) { + unsigned key_total = 0; + unsigned key_used = 0; + unsigned row_count_hi = 0; + unsigned row_count_avg = 0; + for (row = 0; row < SXE_CDB_ROWS_PER_SHEET; row ++) { + unsigned row_count = 0; + for (cell = 0; cell < SXE_CDB_KEYS_PER_ROW; cell ++) { + key_total ++; + if (cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]) { /* if cell used */ + key_used ++; + row_count ++; + row_count_avg ++; + uint16_t hash_hi = cdb_instance->sheets[sheet].row[row].hash_hi.u16[cell]; + uint16_t mysheet = cdb_instance->sheets_index[hash_hi % SXE_CDB_SHEETS_MAX]; + SXEA1(mysheet == sheet, "ERROR: INTERNAL: unexpected sheet index %u for sheet %u, row %u, cell %u; // %s", mysheet, sheet, row, cell, debug); + } + } // for (cell + row_count_hi = row_count > row_count_hi ? row_count : row_count_hi; + } // for (row + SXEL5("sheet %5u has %5u used from %5u keys or %2u%%; row_count_hi %u, row_count_avg %zu", sheet, key_used, key_total, key_used * 100 / key_total, row_count_hi, row_count_avg / SXE_CDB_ROWS_PER_SHEET); + } // for (sheet +} /* sxe_cdb_instance_debug_validate() */ +#endif + +void +sxe_cdb_instance_split_sheet(SXE_CDB_INSTANCE * cdb_instance, uint16_t sheet) +{ + uint16_t this_sheet = sheet ; + uint16_t that_sheet = cdb_instance->sheets_size; + unsigned row; + unsigned cell; + + SXEE6("(cdb_instance=?, sheet=%u)", sheet); + + //debug sxe_cdb_debug_validate(cdb, "a"); + + SXEL7("cdb_instance->sheets : %p // old base", cdb_instance->sheets); + cdb_instance->sheets = mremap(cdb_instance->sheets, SXE_CDB_SHEET_BYTES * cdb_instance->sheets_size, SXE_CDB_SHEET_BYTES * (1 + cdb_instance->sheets_size), MREMAP_MAYMOVE); + SXEL7("cdb_instance->sheets : %p // new base after mremap()", cdb_instance->sheets); + cdb_instance->sheets_size ++; /* count extra sheet */ + cdb_instance->sheets_cells_size += SXE_CDB_KEYS_PER_SHEET; + + SXEA1(MAP_FAILED != cdb_instance->sheets, "ERROR: FATAL: expected mremap() not to fail // %s(){}", __FUNCTION__); + + SXEL7("split sheet indexes into roughly two"); + unsigned si; + unsigned sheet_toggle = 0; + unsigned sheet_toggles = 0; + for (si = 0; si < SXE_CDB_SHEETS_MAX; si++) { /* loop over all sheet indexes */ + SXEA6(cdb_instance->sheets_index[si] <= that_sheet, "ERROR: INTERNAL: expected ->sheets_index[%u] <= %u=that_sheet but found %u", si, that_sheet, cdb_instance->sheets_index[si]); + if (this_sheet == cdb_instance->sheets_index[si]) { /* if this is the full sheet? */ + cdb_instance->sheets_index[si] = sheet_toggle ? that_sheet : this_sheet; /* split */ + sheet_toggle = sheet_toggle ? 0 : 1; + sheet_toggles ++; + } + } + SXEA6(sheet_toggles > 1, "ERROR: INTERNAL: too many keys? need at least two toggles to grow cdb!"); + +#if SXE_DEBUG + unsigned keys_total = 0; + unsigned keys_moved = 0; +#endif + SXEL7("split sheet into roughly two; visit %zu keys * %zu rows", SXE_CDB_KEYS_PER_ROW, SXE_CDB_ROWS_PER_SHEET); + for (row = 0; row < SXE_CDB_ROWS_PER_SHEET; row ++) { + for (cell = 0; cell < SXE_CDB_KEYS_PER_ROW; cell ++) { + if (cdb_instance->sheets[this_sheet].row[row].hkv_pos.u32[cell]) { /* if cell used */ + cdb_instance->sheets_split_keys ++; +#if SXE_DEBUG + keys_total ++; +#endif + uint16_t hash_hi = cdb_instance->sheets[this_sheet].row[row].hash_hi.u16[cell]; + uint16_t mysheet = cdb_instance->sheets_index[hash_hi % SXE_CDB_SHEETS_MAX]; + SXEA6((mysheet == this_sheet) || (mysheet == that_sheet), "ERROR: INTERNAL: expecting sheet %u or %u but got %u (while splitting cell %u in row %u with hash_hi %u)", this_sheet, that_sheet, mysheet, cell, row, hash_hi); + if (mysheet == that_sheet) { /* cell should split to that other sheet? */ + cdb_instance->sheets[that_sheet].row[row].hash_lo.u16[cell] = cdb_instance->sheets[this_sheet].row[row].hash_lo.u16[cell]; /* copy cell */ + cdb_instance->sheets[that_sheet].row[row].hash_hi.u16[cell] = cdb_instance->sheets[this_sheet].row[row].hash_hi.u16[cell]; /* from old */ + cdb_instance->sheets[that_sheet].row[row].hkv_pos.u32[cell] = cdb_instance->sheets[this_sheet].row[row].hkv_pos.u32[cell]; /* to new sheet */ + + cdb_instance->sheets[this_sheet].row[row].hkv_pos.u32[cell] = 0; /* mark cell as unused */ +#if SXE_DEBUG + keys_moved ++; +#endif + } + } + } // for (cell + } // for (row + SXEL6("split %u from %u sheet %5u keys to sheet %5u; now %u from %u keys total", keys_moved, keys_total, this_sheet, that_sheet, cdb_instance->sheets_cells_used, cdb_instance->sheets_cells_size); + + //debug sxe_cdb_debug_validate(cdb, "b"); + + cdb_instance->sheets_split ++; + + SXER6("return"); +} /* sxe_cdb_instance_split_sheet() */ + +uint64_t /* SXE_CDB_UID; SXE_CDB_UID_NONE means something went wrong and key not appended */ +sxe_cdb_instance_put_val(SXE_CDB_INSTANCE * cdb_instance, const uint8_t * val, uint32_t val_len) +{ + SXE_CDB_UID uid; + + uid.as_u64.u = SXE_CDB_UID_NONE; + + if ((cdb_instance->kvdata_maximum > 0) && (cdb_instance->kvdata_size > cdb_instance->kvdata_maximum)) { /* test here so sheet splits can no longer happen */ + SXEL6("%s(cdb_instance=?, val=?, val_len=%u){} // return %010lx=ii[%04x]%03x-%01x=%s; <-- Want %u but reached caller set maximum ->kvdata_maximum=%u; early out with no append", + __FUNCTION__, val_len, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, SXE_CDB_UID_NONE == uid.as_u64.u ? "failure" : "success", cdb_instance->kvdata_size, cdb_instance->kvdata_maximum); + goto SXE_EARLY_OUT; + } + + unsigned header_len = 0; + if ( sxe_cdb_key_len == 0 ) { SXEL3("WARNING: %s(): unexpected %u=key_len and/or %u=val_len; early out with no append for key #%u", __FUNCTION__, sxe_cdb_key_len, val_len, cdb_instance->sheets_cells_used); goto SXE_EARLY_OUT; } + else if ((sxe_cdb_key_len <= KEY_HEADER_LEN_1_KEY_LEN_MAX) && (val_len <= KEY_HEADER_LEN_1_VAL_LEN_MAX)) { header_len = 1; } + else if ((sxe_cdb_key_len <= KEY_HEADER_LEN_3_KEY_LEN_MAX) && (val_len <= KEY_HEADER_LEN_3_VAL_LEN_MAX)) { header_len = 3; } + else if ((sxe_cdb_key_len <= KEY_HEADER_LEN_5_KEY_LEN_MAX) && (val_len <= KEY_HEADER_LEN_5_VAL_LEN_MAX)) { header_len = 5; } + else if ((sxe_cdb_key_len <= KEY_HEADER_LEN_8_KEY_LEN_MAX) && (val_len <= KEY_HEADER_LEN_8_VAL_LEN_MAX)) { header_len = 8; } + else { SXEL3("WARNING: %s(): unexpected %u=key_len and/or %u=val_len; early out with no append for key #%u", __FUNCTION__, sxe_cdb_key_len, val_len, cdb_instance->sheets_cells_used); goto SXE_EARLY_OUT; } + +SXE_CDB_ADD_RETRY_ROW_SCAN:; + uint16_t sheet_index = sxe_cdb_hash.u16[0] % SXE_CDB_SHEETS_MAX; + uint16_t sheet = cdb_instance->sheets_index[sheet_index]; + SXEA6(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb->sheets_size", sheet, cdb_instance->sheets_size); + + unsigned row; + unsigned cell; + unsigned row_1 = sxe_cdb_hash.u16[1] & (SXE_CDB_ROWS_PER_SHEET - 1); + unsigned row_2 = sxe_cdb_hash.u16[2] & (SXE_CDB_ROWS_PER_SHEET - 1); + unsigned row_1_cell = SXE_CDB_KEYS_PER_ROW, row_1_used = 0; + unsigned row_2_cell = SXE_CDB_KEYS_PER_ROW, row_2_used = 0; + for (cell = 0; cell < SXE_CDB_KEYS_PER_ROW; cell ++) { + if (cdb_instance->sheets[sheet].row[row_1].hkv_pos.u32[cell]) { row_1_used ++; } else { row_1_cell = SXE_CDB_KEYS_PER_ROW == row_1_cell ? cell : row_1_cell; } + if (cdb_instance->sheets[sheet].row[row_2].hkv_pos.u32[cell]) { row_2_used ++; } else { row_2_cell = SXE_CDB_KEYS_PER_ROW == row_2_cell ? cell : row_2_cell; } + } + + if ((SXE_CDB_KEYS_PER_ROW == row_1_used) + && (SXE_CDB_KEYS_PER_ROW == row_2_used)) { /* if both sheet rows full */ + if (cdb_instance->sheets_size >= SXE_CDB_SHEETS_MAX) { SXEL3("WARNING: %s(): cannot split at maximum sheet; early out with no append for key #%u", __FUNCTION__, cdb_instance->sheets_cells_used); goto SXE_EARLY_OUT; } + sxe_cdb_instance_split_sheet(cdb_instance, sheet); + goto SXE_CDB_ADD_RETRY_ROW_SCAN; + } + + cell = row_1_used < row_2_used ? row_1_cell : row_2_cell; /* last free cell on row to use */ + row = row_1_used < row_2_used ? row_1 : row_2 ; /* row to use */ + + uint32_t k = cdb_instance->kvdata_used; + uint64_t key_bytes_free = cdb_instance->kvdata_size - cdb_instance->kvdata_used; + uint64_t key_bytes_want = header_len + sxe_cdb_key_len + val_len; + + if (key_bytes_want > key_bytes_free) { /* come here to mremap() more key space! */ + uint32_t want_size_rounded_to_kernel_pages = (((key_bytes_want + (SXE_CDB_KERNEL_PAGE_BYTES - 1)) / SXE_CDB_KERNEL_PAGE_BYTES) * SXE_CDB_KERNEL_PAGE_BYTES) + SXE_CDB_KERNEL_PAGE_BYTES; + if (cdb_instance->kvdata_size + want_size_rounded_to_kernel_pages < cdb_instance->kvdata_size) { SXEL3("WARNING: %s(): avoiding 4GB kvdata wrap; early out with no append for key #%u", __FUNCTION__, cdb_instance->sheets_cells_used); goto SXE_EARLY_OUT; } + cdb_instance->kvdata = mremap(cdb_instance->kvdata, cdb_instance->kvdata_size, cdb_instance->kvdata_size + want_size_rounded_to_kernel_pages, MREMAP_MAYMOVE); + cdb_instance->kvdata_size += want_size_rounded_to_kernel_pages; + SXEA1(MAP_FAILED != cdb_instance->kvdata, "ERROR: FATAL: expected mremap() not to fail // %s(){}", __FUNCTION__); + } + + cdb_instance->sheets[sheet].row[row].hash_lo.u16[cell] = sxe_cdb_hash.u16[1]; + cdb_instance->sheets[sheet].row[row].hash_hi.u16[cell] = sxe_cdb_hash.u16[0]; + cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell] = k; + + SXE_CDB_HKV * hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[k]; + if (1 == header_len) { hkv->header_len_1.flag = 0; hkv->header_len_1.key_len = sxe_cdb_key_len; hkv->header_len_1.val_len = val_len; memcpy(&hkv->header_len_1.content[0], sxe_cdb_key, sxe_cdb_key_len); memcpy(&hkv->header_len_1.content[sxe_cdb_key_len], val, val_len); } + else if (3 == header_len) { hkv->header_len_3.flag = 1; hkv->header_len_3.key_len = sxe_cdb_key_len; hkv->header_len_3.val_len = val_len; memcpy(&hkv->header_len_3.content[0], sxe_cdb_key, sxe_cdb_key_len); memcpy(&hkv->header_len_3.content[sxe_cdb_key_len], val, val_len); } + else if (5 == header_len) { hkv->header_len_5.flag = 0; hkv->header_len_5.xxx_len = 0; hkv->header_len_5.yyy_len = 0; hkv->header_len_5.key_len = sxe_cdb_key_len; hkv->header_len_5.val_len = val_len; memcpy(&hkv->header_len_5.content[0], sxe_cdb_key, sxe_cdb_key_len); memcpy(&hkv->header_len_5.content[sxe_cdb_key_len], val, val_len); } + else if (8 == header_len) { hkv->header_len_8.flag = 0; hkv->header_len_8.xxx_len = 0; hkv->header_len_8.yyy_len = 1; hkv->header_len_8.key_len = sxe_cdb_key_len; hkv->header_len_8.val_len = val_len; memcpy(&hkv->header_len_8.content[0], sxe_cdb_key, sxe_cdb_key_len); memcpy(&hkv->header_len_8.content[sxe_cdb_key_len], val, val_len); } + + cdb_instance->kvdata_used += key_bytes_want; + cdb_instance->sheets_cells_used ++; + + uid.as_u64.u = 0 ; + uid.as_part.sheets_index_index = sheet_index; + uid.as_part.row = row ; + uid.as_part.cell = cell ; /* uid to new key */ + + SXEL6("%s(cdb_instance=?, val=?, val_len=%u){} // return %010lx=ii[%04x]%03x-%01x=%s; sheet=%05u row_1/2=%04u/%04u row_1/2_used=%02u/%02u row=%04u cell=%02u key_bytes_want=%lu", + __FUNCTION__, val_len, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, SXE_CDB_UID_NONE == uid.as_u64.u ? "failure" : "success", sheet, row_1, row_2, row_1_used, row_2_used, row, cell, key_bytes_want); + + //todo: fantasize about mremap() for each sheet? look through 64k keys to remove deletions + +SXE_EARLY_OUT:; + return uid.as_u64.u; +} /* sxe_cdb_instance_put_val() */ + +void +sxe_cdb_hkv_unpack( + SXE_CDB_HKV * hkv , + SXE_CDB_HKV_PART * hkv_part) +{ + if (0 == hkv->header_len_8.flag && hkv->header_len_8.xxx_len == 0 && hkv->header_len_8.yyy_len == 1) { hkv_part->hkv_len = sizeof(hkv->header_len_8) + hkv->header_len_8.key_len + hkv->header_len_8.val_len; hkv_part->key_len = hkv->header_len_8.key_len; hkv_part->val_len = hkv->header_len_8.val_len; hkv_part->key = &hkv->header_len_8.content[0]; hkv_part->val = &hkv->header_len_8.content[hkv_part->key_len]; } + else if (0 == hkv->header_len_5.flag && hkv->header_len_5.xxx_len == 0 && hkv->header_len_5.yyy_len == 0) { hkv_part->hkv_len = sizeof(hkv->header_len_5) + hkv->header_len_5.key_len + hkv->header_len_5.val_len; hkv_part->key_len = hkv->header_len_5.key_len; hkv_part->val_len = hkv->header_len_5.val_len; hkv_part->key = &hkv->header_len_5.content[0]; hkv_part->val = &hkv->header_len_5.content[hkv_part->key_len]; } + else if (0 == hkv->header_len_1.flag && hkv->header_len_1.key_len != 0 ) { hkv_part->hkv_len = sizeof(hkv->header_len_1) + hkv->header_len_1.key_len + hkv->header_len_1.val_len; hkv_part->key_len = hkv->header_len_1.key_len; hkv_part->val_len = hkv->header_len_1.val_len; hkv_part->key = &hkv->header_len_1.content[0]; hkv_part->val = &hkv->header_len_1.content[hkv_part->key_len]; } + else /* header_len_3 */ { hkv_part->hkv_len = sizeof(hkv->header_len_3) + hkv->header_len_3.key_len + hkv->header_len_3.val_len; hkv_part->key_len = hkv->header_len_3.key_len; hkv_part->val_len = hkv->header_len_3.val_len; hkv_part->key = &hkv->header_len_3.content[0]; hkv_part->val = &hkv->header_len_3.content[hkv_part->key_len]; } +} /* sxe_cdb_hkv_unpack() */ + +uint32_t +sxe_cdb_h_len(const SXE_CDB_HKV * hkv) +{ + uint32_t h_len; + if (0 == hkv->header_len_8.flag && hkv->header_len_8.xxx_len == 0 && hkv->header_len_8.yyy_len == 1) { h_len = sizeof(hkv->header_len_8); } + else if (0 == hkv->header_len_5.flag && hkv->header_len_5.xxx_len == 0 && hkv->header_len_5.yyy_len == 0) { h_len = sizeof(hkv->header_len_5); } + else if (0 == hkv->header_len_1.flag && hkv->header_len_1.key_len != 0 ) { h_len = sizeof(hkv->header_len_1); } + else /* header_len_3 */ { h_len = sizeof(hkv->header_len_3); } + SXEL6("%s(hkv=?){} // %u=h_len", __FUNCTION__, h_len); + return h_len; +} /* sxe_cdb_hkv_len() */ + +#define SXE_CDB_GET_HKV_IN_CELL_IN(ROW) \ + do { \ + if ((sxe_cdb_hash.u16[1] == cdb_instance->sheets[sheet].row[ROW].hash_lo.u16[cell]) \ + && (sxe_cdb_hash.u16[0] == cdb_instance->sheets[sheet].row[ROW].hash_hi.u16[cell]) \ + && ( cdb_instance->sheets[sheet].row[ROW].hkv_pos.u32[cell])) { \ + hkv_pos = cdb_instance->sheets[sheet].row[ROW].hkv_pos.u32[cell]; \ + tmp_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; \ + sxe_cdb_hkv_unpack(tmp_hkv, &sxe_cdb_tls_hkv_part); \ + if (sxe_cdb_key_len == sxe_cdb_tls_hkv_part.key_len) { \ + if (0 == memcmp(sxe_cdb_tls_hkv_part.key, sxe_cdb_key, sxe_cdb_key_len)) { \ + tls_hkv = tmp_hkv; /* key exists! */ \ + goto SXE_EARLY_OUT; \ + } \ + else { \ + cdb_instance->memcmp_misses ++; \ + } \ + } \ + else { \ + cdb_instance->keylen_misses ++; \ + } \ + } \ + } while (0) + +#define SXE_CDB_GET_UID_IN_CELL_IN(ROW) \ + do { \ + if ((sxe_cdb_hash.u16[1] == cdb_instance->sheets[sheet].row[ROW].hash_lo.u16[cell]) \ + && (sxe_cdb_hash.u16[0] == cdb_instance->sheets[sheet].row[ROW].hash_hi.u16[cell]) \ + && ( cdb_instance->sheets[sheet].row[ROW].hkv_pos.u32[cell])) { \ + hkv_pos = cdb_instance->sheets[sheet].row[ROW].hkv_pos.u32[cell]; \ + tmp_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; \ + sxe_cdb_hkv_unpack(tmp_hkv, &sxe_cdb_tls_hkv_part); \ + if (sxe_cdb_key_len == sxe_cdb_tls_hkv_part.key_len) { \ + if (0 == memcmp(sxe_cdb_tls_hkv_part.key, sxe_cdb_key, sxe_cdb_key_len)) { \ + uid.as_part.row = ROW ; \ + uid.as_part.cell = cell; /* uid to existing key */ \ + goto SXE_EARLY_OUT; \ + } \ + else { \ + cdb_instance->memcmp_misses ++; \ + } \ + } \ + else { \ + cdb_instance->keylen_misses ++; \ + } \ + } \ + } while (0) + +SXE_CDB_HKV * /* NULL or *dangerous* direct pointer to original SXE_CDB_HKV; header + key + value bytes */ +sxe_cdb_instance_get_hkv_raw( /* NOTE: hkv only usable in-between sxe_cdb_*() calls in single-threaded environment due to mremap() and/or sxe_cdb_ensemble_swap_instances() possibility; pulling the memory rug from under us */ + SXE_CDB_INSTANCE * cdb_instance) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + uint16_t sheet = cdb_instance->sheets_index[sxe_cdb_hash.u16[0] % SXE_CDB_SHEETS_MAX]; + SXEA6(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb_instance->sheets_size", sheet, cdb_instance->sheets_size); + + uint32_t hkv_pos ; + SXE_CDB_HKV * tmp_hkv ; + uint32_t cell ; + uint32_t row_1 = sxe_cdb_hash.u16[1] & (SXE_CDB_ROWS_PER_SHEET - 1); + uint32_t row_2 = sxe_cdb_hash.u16[2] & (SXE_CDB_ROWS_PER_SHEET - 1); +#if SXE_DEBUG + sxe_cdb_tls_hkv_part.key_len = 0; + sxe_cdb_tls_hkv_part.val_len = 0; +#endif + for (cell = 0; cell < SXE_CDB_KEYS_PER_ROW; cell ++) { + SXE_CDB_GET_HKV_IN_CELL_IN(row_1); + SXE_CDB_GET_HKV_IN_CELL_IN(row_2); + } + +SXE_EARLY_OUT:; + SXEL6("%s(cdb_instance=?){} // return %p (%s); sxe_cdb_tls_hkv_part.val_len=%u", __FUNCTION__, tls_hkv, tls_hkv ? "key exists" : "key doesn't exist", sxe_cdb_tls_hkv_part.val_len); + return tls_hkv; +} /* sxe_cdb_instance_get_hkv_raw() */ + +uint64_t /* SXE_CDB_UID; SXE_CDB_UID_NONE means key not found */ +sxe_cdb_instance_get_uid(SXE_CDB_INSTANCE * cdb_instance) +{ + SXE_CDB_UID uid; + + uid.as_u64.u = 0; + + uid.as_part.sheets_index_index = sxe_cdb_hash.u16[0] % SXE_CDB_SHEETS_MAX; + uint16_t sheet = cdb_instance->sheets_index[uid.as_part.sheets_index_index]; + SXEA6(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb_instance->sheets_size", sheet, cdb_instance->sheets_size); + + uint32_t hkv_pos ; + SXE_CDB_HKV * tmp_hkv ; + uint32_t cell ; + uint32_t row_1 = sxe_cdb_hash.u16[1] & (SXE_CDB_ROWS_PER_SHEET - 1); + uint32_t row_2 = sxe_cdb_hash.u16[2] & (SXE_CDB_ROWS_PER_SHEET - 1); +#if SXE_DEBUG + sxe_cdb_tls_hkv_part.key_len = 0; + sxe_cdb_tls_hkv_part.val_len = 0; +#endif + for (cell = 0; cell < SXE_CDB_KEYS_PER_ROW; cell ++) { + SXE_CDB_GET_UID_IN_CELL_IN(row_1); + SXE_CDB_GET_UID_IN_CELL_IN(row_2); + } + + uid.as_u64.u = SXE_CDB_UID_NONE; + +SXE_EARLY_OUT:; + SXEL6("%s(cdb_instance=?){} // return %010lx=ii[%04x]%03x-%01x=%s // sxe_cdb_tls_hkv_part.val_len=%u", __FUNCTION__, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, SXE_CDB_UID_NONE == uid.as_u64.u ? "key doesn't exist" : "key exists", sxe_cdb_tls_hkv_part.val_len); + return uid.as_u64.u; +} /* sxe_cdb_instance_get_uid() */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV raw; not copy */ +sxe_cdb_instance_get_uid_hkv_raw(SXE_CDB_INSTANCE * cdb_instance, SXE_CDB_UID uid) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + uint32_t cell = uid.as_part.cell ; SXEA1(cell < SXE_CDB_KEYS_PER_ROW , "ERROR: INTERNAL: %u=cell < %lu=SXE_CDB_KEYS_PER_ROW" , cell , SXE_CDB_KEYS_PER_ROW ); + uint32_t row = uid.as_part.row ; SXEA1(row < SXE_CDB_ROWS_PER_SHEET , "ERROR: INTERNAL: %u=row < %lu=SXE_CDB_ROWS_PER_SHEET" , row , SXE_CDB_ROWS_PER_SHEET ); + uint16_t sheet_index = uid.as_part.sheets_index_index ; SXEA1(sheet_index < SXE_CDB_SHEETS_MAX , "ERROR: INTERNAL: %u=sheet_index < %lu=SXE_CDB_SHEETS_MAX" , sheet_index, SXE_CDB_SHEETS_MAX ); + uint16_t sheet = cdb_instance->sheets_index[sheet_index]; SXEA1(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb_instance->sheets_size", sheet , cdb_instance->sheets_size); + if (cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]) { + uint32_t hkv_pos = cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]; + tls_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; + sxe_cdb_hkv_unpack(tls_hkv, &sxe_cdb_tls_hkv_part); + } + + SXEL6("%s(cdb_instance=?, uid=%010lx=ii[%04x]%03x-%01x){} // return %p", __FUNCTION__, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, tls_hkv); + return tls_hkv; +} /* sxe_cdb_instance_get_uid_hkv_raw() */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV copy */ +sxe_cdb_instance_get_uid_hkv(SXE_CDB_INSTANCE * cdb_instance, SXE_CDB_UID uid) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + uint32_t cell = uid.as_part.cell ; SXEA1(cell < SXE_CDB_KEYS_PER_ROW , "ERROR: INTERNAL: %u=cell < %lu=SXE_CDB_KEYS_PER_ROW" , cell , SXE_CDB_KEYS_PER_ROW ); + uint32_t row = uid.as_part.row ; SXEA1(row < SXE_CDB_ROWS_PER_SHEET , "ERROR: INTERNAL: %u=row < %lu=SXE_CDB_ROWS_PER_SHEET" , row , SXE_CDB_ROWS_PER_SHEET ); + uint16_t sheet_index = uid.as_part.sheets_index_index ; SXEA1(sheet_index < SXE_CDB_SHEETS_MAX , "ERROR: INTERNAL: %u=sheet_index < %lu=SXE_CDB_SHEETS_MAX" , sheet_index, SXE_CDB_SHEETS_MAX ); + uint16_t sheet = cdb_instance->sheets_index[sheet_index]; SXEA1(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb_instance->sheets_size", sheet , cdb_instance->sheets_size); + if (cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]) { + uint32_t hkv_pos = cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]; + tls_hkv = sxe_cdb_copy_hkv_to_tls(cdb_instance, hkv_pos); + } + + SXEL6("%s(cdb_instance=?, uid=%010lx=ii[%04x]%03x-%01x){} // return %p", __FUNCTION__, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, tls_hkv); + return tls_hkv; +} /* sxe_cdb_instance_get_uid_hkv() */ + +void /* only call this function directly *after* sxe_cdb_instance_get_uid_hkv() */ +sxe_cdb_instance_set_uid_hkv(SXE_CDB_INSTANCE * cdb_instance, SXE_CDB_UID uid) +{ + uint32_t cell = uid.as_part.cell ; SXEA1(cell < SXE_CDB_KEYS_PER_ROW , "ERROR: INTERNAL: %u=cell < %lu=SXE_CDB_KEYS_PER_ROW" , cell , SXE_CDB_KEYS_PER_ROW ); + uint32_t row = uid.as_part.row ; SXEA1(row < SXE_CDB_ROWS_PER_SHEET , "ERROR: INTERNAL: %u=row < %lu=SXE_CDB_ROWS_PER_SHEET" , row , SXE_CDB_ROWS_PER_SHEET ); + uint16_t sheet_index = uid.as_part.sheets_index_index ; SXEA1(sheet_index < SXE_CDB_SHEETS_MAX , "ERROR: INTERNAL: %u=sheet_index < %lu=SXE_CDB_SHEETS_MAX" , sheet_index, SXE_CDB_SHEETS_MAX ); + uint16_t sheet = cdb_instance->sheets_index[sheet_index]; SXEA1(sheet < cdb_instance->sheets_size, "ERROR: INTERNAL: %u=sheet < %u=cdb_instance->sheets_size", sheet , cdb_instance->sheets_size); + if (cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]) { + uint32_t hkv_pos = cdb_instance->sheets[sheet].row[row].hkv_pos.u32[cell]; + sxe_cdb_copy_tls_to_hkv(cdb_instance, hkv_pos); + } + + SXEL6("%s(cdb_instance=?, uid=%010lx=ii[%04x]%03x-%01x){}", __FUNCTION__, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + return; +} /* sxe_cdb_instance_set_uid_hkv() */ + +void +sxe_cdb_instance_free_count_push(SXE_CDB_INSTANCE * cdb_instance, uint32_t from_c, uint32_t to___c) +{ + uint32_t this_c = to___c; + while (this_c >= from_c) { + uint32_t next_c = cdb_instance->counts_next_free; + cdb_instance->counts_next_free = this_c; + cdb_instance->counts[this_c].next = next_c; + cdb_instance->counts_free ++; + this_c --; + } + + SXEL6("%s(cdb_instance=?, from_c=%u, to___c=%u){}", __FUNCTION__, from_c, to___c); + return; +} /* sxe_cdb_instance_free_count_push() */ + +uint32_t +sxe_cdb_instance_free_count_pop(SXE_CDB_INSTANCE * cdb_instance) +{ + uint32_t next_c; + + if (0 == cdb_instance->counts_free) { + cdb_instance->counts_pages ++; /* count extra page */ + if (cdb_instance->counts) { cdb_instance->counts = mremap(cdb_instance->counts , SXE_CDB_KERNEL_PAGE_BYTES * (cdb_instance->counts_pages - 1), SXE_CDB_KERNEL_PAGE_BYTES * cdb_instance->counts_pages, MREMAP_MAYMOVE ); } + else { cdb_instance->counts = mmap(NULL /* kernel chooses addr */, SXE_CDB_KERNEL_PAGE_BYTES * cdb_instance->counts_pages , PROT_READ | PROT_WRITE , MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } + SXEA1(MAP_FAILED != cdb_instance->counts, "ERROR: FATAL: expected m(re)map() not to fail // %s(){}", __FUNCTION__); + + cdb_instance->counts_size += 1 == cdb_instance->counts_pages ? 1 : 0; /* skip over item zero because it's used for SXE_CDB_COUNT_NONE */ + + uint32_t counts_size_new = ((SXE_CDB_KERNEL_PAGE_BYTES * cdb_instance->counts_pages) / SXE_CDB_COUNT_BYTES) - cdb_instance->counts_size; + SXEL7("%s(cdb_instance=?){} // ... growing ->counts by %u from %u to %u, ->counts_used=%u, ->counts_pages=%u, %lu page bytes unused ...", __FUNCTION__, counts_size_new, cdb_instance->counts_size, cdb_instance->counts_size + counts_size_new - 1, cdb_instance->counts_used, cdb_instance->counts_pages, (SXE_CDB_KERNEL_PAGE_BYTES * cdb_instance->counts_pages) - (SXE_CDB_COUNT_BYTES * (cdb_instance->counts_size + counts_size_new))); + sxe_cdb_instance_free_count_push(cdb_instance, cdb_instance->counts_size, cdb_instance->counts_size + counts_size_new - 1); + cdb_instance->counts_size += counts_size_new; + } + + next_c = cdb_instance->counts_next_free; + SXEA6(SXE_CDB_COUNT_NONE != next_c, "ERROR: INTERNAL: unexpected SXE_CDB_COUNT_NONE == next_c"); + cdb_instance->counts_next_free = cdb_instance->counts[next_c].next; + cdb_instance->counts_free --; + cdb_instance->counts_used ++; + + SXEL6("%s(cdb_instance=?){} // return %u=next_c", __FUNCTION__, next_c); + return next_c; +} /* sxe_cdb_instance_free_count_pop() */ + +/** + * For a given key, do something similar to incrementing a 64bit + * counter, but in reality additionally keep a list of all keys + * incremented, sorted by the counter value. + */ + +uint64_t /* count or zero means e.g. key couldn't be put */ +sxe_cdb_instance_inc(SXE_CDB_INSTANCE * cdb_instance, uint32_t counts_list) +{ + uint64_t count_new = 0; + SXE_CDB_HKV_PART this; + SXE_CDB_HKV_PART last; + SXE_CDB_HKV_PART next; + + SXEE6("(cdb_instance=?)"); + + SXEA6(counts_list < SXE_CDB_COUNTS_LISTS_MAX, "ERROR: INTERNAL: expected counts_list < %u but got %u", SXE_CDB_COUNTS_LISTS_MAX, counts_list); + + SXE_CDB_HKV * this_hkv; uint32_t this_hkv_pos; SXE_CDB_HKV_LIST * this_val_ptr; uint32_t this_c; + SXE_CDB_HKV * last_hkv; uint32_t next_hkv_pos; SXE_CDB_HKV_LIST * next_val_ptr; uint32_t last_c; + SXE_CDB_HKV * next_hkv; uint32_t last_hkv_pos; SXE_CDB_HKV_LIST * last_val_ptr; uint32_t next_c; + this_hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance); + if (NULL == this_hkv) { + SXEL7("key doesn't exist; so create it!"); + uint8_t val_dummy[SXE_CDB_HKV_LIST_BYTES]; + if (SXE_CDB_UID_NONE != sxe_cdb_instance_put_val(cdb_instance, &val_dummy[0], sizeof(val_dummy))) { /* if key appended */ + this_hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance); + SXEA6(this_hkv, "ERROR: INTERNAL: did sxe_cdb_instance_put_val() but can't sxe_cdb_instance_get_hkv()"); + sxe_cdb_hkv_unpack(this_hkv, &this); + + this_hkv_pos = (uint8_t *) this_hkv - cdb_instance->kvdata; + this_val_ptr = (SXE_CDB_HKV_LIST *) this.val; + this_c = cdb_instance->counts_lo[counts_list]; + + if (SXE_CDB_COUNT_NONE == this_c) { + SXEL7("no counts exist; so create the very first one & add our key to it"); + count_new = 1; + this_c = sxe_cdb_instance_free_count_pop(cdb_instance); + this_val_ptr->count_to_use = this_c; + this_val_ptr->next_hkv_pos = SXE_CDB_HKV_POS_NONE; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + cdb_instance->counts[this_c].count = count_new; + cdb_instance->counts[this_c].next = SXE_CDB_COUNT_NONE; + cdb_instance->counts[this_c].last = SXE_CDB_COUNT_NONE; + cdb_instance->counts[this_c].hkv1 = this_hkv_pos; + cdb_instance->counts_lo[counts_list] = this_c; + cdb_instance->counts_hi[counts_list] = this_c; + } + else if (1 < cdb_instance->counts[this_c].count) { + SXEL7("lowest count %lu is still higher than desired count of 1; insert new count with count of 1", (uint64_t)cdb_instance->counts[this_c].count); + count_new = 1; + next_c = this_c; + this_c = sxe_cdb_instance_free_count_pop(cdb_instance); + this_val_ptr->count_to_use = this_c; + this_val_ptr->next_hkv_pos = SXE_CDB_HKV_POS_NONE; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + cdb_instance->counts[this_c].count = count_new; + cdb_instance->counts[this_c].next = cdb_instance->counts_lo[counts_list]; + cdb_instance->counts[this_c].last = SXE_CDB_COUNT_NONE; + cdb_instance->counts[this_c].hkv1 = this_hkv_pos; + cdb_instance->counts_lo[counts_list] = this_c; + cdb_instance->counts[next_c].last = this_c; + } + else { + SXEA6(1 == cdb_instance->counts[this_c].count, "ERROR: INTERNAL: unexpected count %lu", (uint64_t)cdb_instance->counts[this_c].count); + SXEL7("add key to the count '1' list of key(s)"); + count_new = 1; + last_hkv_pos = cdb_instance->counts[this_c].hkv1; + SXEA6(SXE_CDB_HKV_POS_NONE != last_hkv_pos , "ERROR: INTERNAL: SXE_CDB_HKV_POS_NONE == last_hkv_pos"); + last_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[last_hkv_pos]; + sxe_cdb_hkv_unpack(last_hkv, &last); + last_val_ptr = (SXE_CDB_HKV_LIST *) last.val; + SXEA6(SXE_CDB_HKV_POS_NONE == last_val_ptr->last_hkv_pos, "ERROR: INTERNAL: SXE_CDB_HKV_POS_NONE != last_val_ptr->last_hkv_pos"); + last_val_ptr->last_hkv_pos = this_hkv_pos; + this_val_ptr->count_to_use = this_c; + this_val_ptr->next_hkv_pos = last_hkv_pos; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + cdb_instance->counts[this_c].hkv1 = this_hkv_pos; + } + } /* if sxe_cdb_instance_put() */ + } /* if sxe_cdb_instance_get() */ + else { + sxe_cdb_hkv_unpack(this_hkv, &this); + + if (sizeof(SXE_CDB_HKV_LIST) != sxe_cdb_tls_hkv_part.val_len) { + SXEL3("WARN: %s(){} tried to increment key which is not a counter; early out without incrementing // value length is %u but expected %lu", __FUNCTION__, sxe_cdb_tls_hkv_part.val_len, sizeof(SXE_CDB_HKV_LIST)); + goto SXE_EARLY_OUT; + } + + this_hkv_pos = (uint8_t *) this_hkv - cdb_instance->kvdata; + this_val_ptr = (SXE_CDB_HKV_LIST *) this.val ; + this_c = this_val_ptr->count_to_use ; SXEA6(SXE_CDB_COUNT_NONE != this_c, "ERROR: INTERNAL: unexpected SXE_CDB_COUNT_NONE"); + + if (this_c >= cdb_instance->counts_size) { + SXEL3("WARN: %s(){} tried to increment key which is not a counter; early out without incrementing // referenced counter %u but only have %u counters", __FUNCTION__, this_c, cdb_instance->counts_size); + goto SXE_EARLY_OUT; + } + + next_c = cdb_instance->counts[this_c].next ; + count_new = cdb_instance->counts[this_c].count + 1 ; + + SXEL7("key exists; so 'increment' it to %lu", count_new); + + if ((cdb_instance->counts[this_c].hkv1 == this_hkv_pos ) /* if this count points to our key */ + && (this_val_ptr->next_hkv_pos == SXE_CDB_HKV_POS_NONE)) { /* and this is the only key in the list */ + if (( next_c == SXE_CDB_COUNT_NONE ) /* if there is no next highest count */ + || (cdb_instance->counts[next_c].count > count_new )) { /* or next highest count is not the desired count */ + cdb_instance->counts[this_c].count ++; /* then opportunistically increment this count! */ + SXEL7("opportunistically incremented associated count"); + goto SXE_EARLY_OUT; /* and early out for this lone climbing count :-) */ + } + } + + next_hkv_pos = this_val_ptr->next_hkv_pos ; + last_hkv_pos = this_val_ptr->last_hkv_pos ; + last_c = cdb_instance->counts[this_c].last ; + + SXEL7("remove hkv from this count hkv chain"); + + if (next_hkv_pos != SXE_CDB_HKV_POS_NONE) { /* remove this_hkv from next_hkv in chain */ + next_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[next_hkv_pos]; sxe_cdb_hkv_unpack(next_hkv, &next); + next_val_ptr = (SXE_CDB_HKV_LIST *) next.val ; SXEA6(next_val_ptr->last_hkv_pos == this_hkv_pos, "ERROR: INTERNAL: expected %u==next_val_ptr->last but got %u // ->counts_used=%u", this_hkv_pos, next_val_ptr->last_hkv_pos, cdb_instance->counts_used); + next_val_ptr->last_hkv_pos = last_hkv_pos; + } + + if (last_hkv_pos != SXE_CDB_HKV_POS_NONE) { /* remove this_hkv from last_hkv in chain */ + last_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[last_hkv_pos]; sxe_cdb_hkv_unpack(last_hkv, &last); + last_val_ptr = (SXE_CDB_HKV_LIST *) last.val ; SXEA6(last_val_ptr->next_hkv_pos == this_hkv_pos, "ERROR: INTERNAL: expected %u==last_val_ptr->next but got %u // ->counts_used=%u", this_hkv_pos, last_val_ptr->next_hkv_pos, cdb_instance->counts_used); + last_val_ptr->next_hkv_pos = next_hkv_pos; + } + + if (cdb_instance->counts[this_c].hkv1 == this_hkv_pos) { + cdb_instance->counts[this_c].hkv1 = next_hkv_pos; SXEL7("hkv was 1st in count list; update 1st hkv to 2nd in list"); + } + + if (SXE_CDB_HKV_POS_NONE == cdb_instance->counts[this_c].hkv1) { + SXEL7("removing count %lu because no keys with this count", (uint64_t)cdb_instance->counts[this_c].count); + + sxe_cdb_instance_free_count_push(cdb_instance, this_c, this_c); + cdb_instance->counts_used --; + + if (next_c != SXE_CDB_COUNT_NONE) { cdb_instance->counts[next_c].last = last_c ; } + else { cdb_instance->counts_hi[counts_list] = cdb_instance->counts_hi[counts_list] == this_c ? last_c : cdb_instance->counts_hi[counts_list]; SXEA1(0, "/* COVERAGE EXCLUSION: impossible code path due to opportunistic increment (see above) */"); } + + if (last_c != SXE_CDB_COUNT_NONE) { cdb_instance->counts[last_c].next = next_c ; } + else { cdb_instance->counts_lo[counts_list] = cdb_instance->counts_lo[counts_list] == this_c ? next_c : cdb_instance->counts_lo[counts_list]; } + } + + /* add hkv to next count hkv chain */ + + if (SXE_CDB_COUNT_NONE == next_c) { + SXEL7("higher count: doesn't exist; so create it & add our key to it"); + next_c = sxe_cdb_instance_free_count_pop(cdb_instance); + this_val_ptr->count_to_use = next_c; + this_val_ptr->next_hkv_pos = SXE_CDB_HKV_POS_NONE; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + cdb_instance->counts_hi[counts_list] = next_c; + cdb_instance->counts[next_c].count = count_new; /* 'increment' count */ + cdb_instance->counts[next_c].next = SXE_CDB_COUNT_NONE; + cdb_instance->counts[next_c].last = this_c; + cdb_instance->counts[next_c].hkv1 = this_hkv_pos; + cdb_instance->counts[this_c].next = next_c; + } + else { + if (cdb_instance->counts[next_c].count == count_new) { + SXEL7("higher count: exists; it's the right count, so add our key to it"); + this_val_ptr->count_to_use = next_c; + this_val_ptr->next_hkv_pos = cdb_instance->counts[next_c].hkv1; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + + next_hkv_pos = cdb_instance->counts[next_c].hkv1; + next_hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[next_hkv_pos]; sxe_cdb_hkv_unpack(next_hkv, &next); + next_val_ptr = (SXE_CDB_HKV_LIST *) next.val ; SXEA6(next_val_ptr->last_hkv_pos == SXE_CDB_HKV_POS_NONE, "ERROR: INTERNAL: expected SXE_CDB_HKV_POS_NONE==next_val_ptr->last but got %u // ->counts_used=%u", next_val_ptr->last_hkv_pos, cdb_instance->counts_used); + next_val_ptr->last_hkv_pos = this_hkv_pos; + + cdb_instance->counts[next_c].hkv1 = this_hkv_pos; + } + else { + SXEL7("higher count: exists; count %lu is higher than wanted, so insert a new count %lu & add our key to it", (uint64_t)cdb_instance->counts[next_c].count, count_new); + uint32_t nex2_c = next_c; + next_c = sxe_cdb_instance_free_count_pop(cdb_instance); + this_val_ptr->count_to_use = next_c; + this_val_ptr->next_hkv_pos = SXE_CDB_HKV_POS_NONE; + this_val_ptr->last_hkv_pos = SXE_CDB_HKV_POS_NONE; + cdb_instance->counts[next_c].count = count_new; /* 'increment' count */ + cdb_instance->counts[next_c].next = cdb_instance->counts[this_c].next; + cdb_instance->counts[next_c].last = this_c; + cdb_instance->counts[next_c].hkv1 = this_hkv_pos; + cdb_instance->counts[this_c].next = next_c; + cdb_instance->counts[nex2_c].last = next_c; + } + } + + SXEL7("existing count is %lu", (uint64_t)cdb_instance->counts[this_c].count); + } + +SXE_EARLY_OUT:; + SXER6("return %lu // key count", count_new); + return count_new; +} /* sxe_cdb_instance_inc() */ + +int +sxe_cdb_instance_walk_pos_is_bad( + SXE_CDB_INSTANCE * cdb_instance, + uint32_t cnt_pos , /* SXE_CDB_COUNT_NONE means start of list, or current index for count */ + uint32_t hkv_pos , /* SXE_CDB_HKV_POS_NONE means start of list, or current hkv for count */ + const char * warning_hint) +{ + int result = 1; + + if (SXE_CDB_COUNT_NONE != cnt_pos) { /* if valid looking cnt */ + if (SXE_CDB_HKV_POS_NONE != hkv_pos) { /* if valid looking hkv */ + /* scrutinize *_pos as they may have come from outside lib-sxe-cdb ! */ + if (cnt_pos > cdb_instance->counts_size) { + SXEL3("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // WARNING: given cnt_pos is out of range; early out // %s; ->counts_size=%u", __FUNCTION__, cnt_pos, hkv_pos, warning_hint, cdb_instance->counts_size); + goto SXE_EARLY_OUT; + } + + if ((hkv_pos + 1) > cdb_instance->kvdata_used) { + SXEL3("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // WARNING: given hkv_pos is out of range; early out // %s", __FUNCTION__, cnt_pos, hkv_pos, warning_hint); + goto SXE_EARLY_OUT; + } + + SXE_CDB_HKV * hkv = (SXE_CDB_HKV *) &cdb_instance->kvdata[hkv_pos]; + uint32_t h_len = sxe_cdb_h_len(hkv); + if ((hkv_pos + h_len) > cdb_instance->kvdata_used) { + SXEL3("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // WARNING: given hkv_pos + header len is out of range; early out // %s", __FUNCTION__, cnt_pos, hkv_pos, warning_hint); + goto SXE_EARLY_OUT; + } + + sxe_cdb_hkv_unpack(hkv, &sxe_cdb_tls_hkv_part); + if ((hkv_pos + sxe_cdb_tls_hkv_part.hkv_len) > cdb_instance->kvdata_used) { + SXEL3("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // WARNING: given hkv_pos + %u=hkv_len is out of range; early out // %s", __FUNCTION__, cnt_pos, hkv_pos, sxe_cdb_tls_hkv_part.hkv_len, warning_hint); + goto SXE_EARLY_OUT; + } + + const SXE_CDB_HKV_LIST * val_ptr = (SXE_CDB_HKV_LIST *) sxe_cdb_tls_hkv_part.val; + if (sxe_cdb_tls_hkv_part.val_len == sizeof(SXE_CDB_HKV_LIST) && val_ptr->count_to_use != cnt_pos) { + SXEL3("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // WARNING: val_len incorrect, or, value as SXE_CDB_HKV_LIST does not reference cnt_pos; early out // %s", __FUNCTION__, cnt_pos, hkv_pos, warning_hint); + goto SXE_EARLY_OUT; + } + } + } + + result = 0; /* *_pos doesn't look bad :-) */ + SXEL6("%s(cdb_instance=?, cnt_pos=%u, hkv_pos=%u){} // %u=%s", __FUNCTION__, cnt_pos, hkv_pos, result, result ? "is true" : "is false"); + +SXE_EARLY_OUT:; + return result; +} /* sxe_cdb_instance_walk_pos_is_bad() */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV copy */ +sxe_cdb_instance_walk( + SXE_CDB_INSTANCE * cdb_instance, + uint32_t direction , /* n=hi2lo, 0=lo2hi */ + uint32_t cnt_pos , /* SXE_CDB_COUNT_NONE means start of list, or current index for count */ + uint32_t hkv_pos , /* SXE_CDB_HKV_POS_NONE means start of list, or current hkv for count */ + uint32_t counts_list ) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + if (SXE_CDB_COUNT_NONE == cnt_pos) { /* if start of list */ + if (counts_list >= SXE_CDB_COUNTS_LISTS_MAX) { + SXEL3("%s(cdb_instance=?, direction=%u, cnt_pos=%u, hkv_pos=%u, counts_list=%u){} // WARNING: given counts_list is out of range; early out", __FUNCTION__, direction, cnt_pos, hkv_pos, counts_list); + cnt_pos = SXE_CDB_COUNT_NONE; /* fake end of list */ + hkv_pos = SXE_CDB_HKV_POS_NONE; + goto SXE_EARLY_OUT; + } + cnt_pos = direction ? cdb_instance->counts_hi[counts_list] : cdb_instance->counts_lo[counts_list]; /* where to start? */ + hkv_pos = SXE_CDB_COUNT_NONE == cnt_pos ? SXE_CDB_HKV_POS_NONE : cdb_instance->counts[cnt_pos].hkv1; + } + + if (SXE_CDB_COUNT_NONE != cnt_pos) { + if (SXE_CDB_HKV_POS_NONE != hkv_pos) { /* if valid looking hkv */ + if (sxe_cdb_instance_walk_pos_is_bad(cdb_instance, cnt_pos, hkv_pos, "validating hkv_pos given")) { /* todo: consider having a faster, 'unsafe' sxe_cdb_instance_walk() without this check */ + cnt_pos = SXE_CDB_COUNT_NONE ; /* fake end of list */ + hkv_pos = SXE_CDB_HKV_POS_NONE; + goto SXE_EARLY_OUT; + } + + /* come here if given hkv_pos appears to reference a valid SXE_CDB_HKV_LIST which references a valid cnt_pos */ + const SXE_CDB_HKV_LIST * val_ptr = (SXE_CDB_HKV_LIST *) sxe_cdb_tls_hkv_part.val; + uint32_t hkv_pos_new = val_ptr->next_hkv_pos; + + if (sxe_cdb_instance_walk_pos_is_bad(cdb_instance, cnt_pos, hkv_pos_new, "validating hkv_pos discovered")) {/* todo: consider having a faster, 'unsafe' sxe_cdb_instance_walk() without this check */ + cnt_pos = SXE_CDB_COUNT_NONE ; /* fake end of list */ + hkv_pos = SXE_CDB_HKV_POS_NONE; /* COVERAGE EXCLUSION: todo: create unit test for this paranoid check */ + goto SXE_EARLY_OUT; + } + + /* come here if discovered hkv_pos appears to reference a valid SXE_CDB_HKV_LIST which references a valid cnt_pos */ + tls_hkv = sxe_cdb_copy_hkv_to_tls(cdb_instance, hkv_pos); + sxe_cdb_tls_walk_count = cdb_instance->counts[cnt_pos].count; + hkv_pos = hkv_pos_new; + SXEL6("dumping key with count %lu: [%u]=%.*s // hkv_pos=%u", sxe_cdb_tls_walk_count, sxe_cdb_tls_hkv_part.key_len, sxe_cdb_tls_hkv_part.key_len, sxe_cdb_tls_hkv_part.key, hkv_pos); + } + if (SXE_CDB_HKV_POS_NONE == hkv_pos) { /* if no new hkv then advance to next count */ + cnt_pos = direction ? cdb_instance->counts[cnt_pos].last : cdb_instance->counts[cnt_pos].next; /* move in count direction */ + hkv_pos = SXE_CDB_COUNT_NONE == cnt_pos ? SXE_CDB_HKV_POS_NONE : cdb_instance->counts[cnt_pos].hkv1; + } + } + +SXE_EARLY_OUT:; + sxe_cdb_tls_walk_cnt_pos = cnt_pos; /* SXE_CDB_COUNT_NONE means reached end of list */ + sxe_cdb_tls_walk_hkv_pos = hkv_pos; + + return tls_hkv; +} /* sxe_cdb_instance_walk() */ + +SXE_CDB_ENSEMBLE * +sxe_cdb_ensemble_new( + uint32_t keys_at_start , + uint64_t kvdata_maximum, + uint32_t cdb_count , + uint32_t cdb_is_locked ) +{ + SXE_CDB_ENSEMBLE * cdb_ensemble = NULL; + + SXEE6("(keys_at_start=%u, kvdata_maximum=%lu, cdb_count=%u, cdb_is_locked=%u)", keys_at_start, kvdata_maximum, cdb_count, cdb_is_locked); + + while (SXE_SPINLOCK_STATUS_TAKEN != sxe_spinlock_take(&sxe_cdb_ensemble_lock)) { + SXEL5("%s() failed to acquire sxe_cdb_ensemble_lock; trying again", __FUNCTION__); /* COVERAGE EXCLUSION: todo: create multi-threaded test to show this informational lock message */ + } + + if (cdb_count <= 256) { /* limit uid size to 5 bytes; max. kvdata space is 256 * 4GB or 1,024GB */ + cdb_ensemble = kit_malloc(sizeof(* cdb_ensemble)); + SXEA1(cdb_ensemble, "ERROR: INTERNAL: sxe_malloc() failed for %zu bytes // %s(){}", sizeof(* cdb_ensemble), __FUNCTION__); + cdb_ensemble->cdb_instances = kit_malloc(cdb_count * sizeof(cdb_ensemble->cdb_instances)); + cdb_ensemble->cdb_instance_locks = kit_malloc(cdb_count * sizeof(cdb_ensemble->cdb_instance_locks)); + SXEA1(cdb_ensemble->cdb_instances, "ERROR: INTERNAL: sxe_malloc() failed for %zu bytes // %s(){}", + cdb_count * sizeof(cdb_ensemble->cdb_instances), __FUNCTION__); + SXEA1(cdb_ensemble->cdb_instance_locks , "ERROR: INTERNAL: sxe_malloc() failed for %zu bytes // %s(){}", + cdb_count * sizeof(cdb_ensemble->cdb_instance_locks), __FUNCTION__); + SXEL6("cdb_ensemble = sxe_malloc(%zu)", sizeof(* cdb_ensemble)); + SXEL6("cdb_ensemble->cdb_instances = sxe_malloc(%u * %zu)", cdb_count, sizeof(cdb_ensemble->cdb_instances)); + SXEL6("cdb_ensemble->cdb_instance_locks = sxe_malloc(%u * %zu)", cdb_count, sizeof(cdb_ensemble->cdb_instance_locks)); + + SXEL6("creating array of cdb pointers, each with its own lock:"); + uint32_t i; + for (i = 0; i < cdb_count; i++) { + cdb_ensemble->cdb_instances[i] = sxe_cdb_instance_new(keys_at_start / cdb_count, kvdata_maximum / cdb_count); + sxe_spinlock_construct(&cdb_ensemble->cdb_instance_locks[i]); /* in case we need locks */ + } + cdb_ensemble->cdb_count = cdb_count; + cdb_ensemble->cdb_is_locked = cdb_is_locked ? 1 : 0; + } + + sxe_spinlock_give(&sxe_cdb_ensemble_lock); + + SXER6("return %p=cdb_ensemble", cdb_ensemble); + return cdb_ensemble; +} /* sxe_cdb_ensemble_new() */ + +void +sxe_cdb_ensemble_destroy(SXE_CDB_ENSEMBLE * cdb_ensemble) +{ + uint32_t i; + SXEE6("(cdb_ensemble=?)"); + + while (SXE_SPINLOCK_STATUS_TAKEN != sxe_spinlock_take(&sxe_cdb_ensemble_lock)) { + SXEL5("%s() failed to acquire sxe_cdb_ensemble_lock; trying again", __FUNCTION__); /* COVERAGE EXCLUSION: todo: create multi-threaded test to show this informational lock message */ + } + + SXEL6("destroying array of cdb pointers:"); + + for (i = 0; i < cdb_ensemble->cdb_count; i++) { + sxe_cdb_instance_destroy(cdb_ensemble->cdb_instances[i]); + } + + kit_free(cdb_ensemble->cdb_instance_locks); + kit_free(cdb_ensemble->cdb_instances); + kit_free(cdb_ensemble); + sxe_spinlock_give(&sxe_cdb_ensemble_lock); + SXER6("return"); +} + +#define SXE_CDB_FUNCTION(NAME) #NAME + +#define SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(CDB_ENSEMBLE,FUNCTION) \ + do { \ + if (CDB_ENSEMBLE->cdb_is_locked) { \ + SXEL7("about to lock instance %u of %u for %s()", instance, CDB_ENSEMBLE->cdb_count, SXE_CDB_FUNCTION(FUNCTION)); \ + while (SXE_SPINLOCK_STATUS_TAKEN != sxe_spinlock_take(&CDB_ENSEMBLE->cdb_instance_locks[instance])) { \ + SXEL5("%s() failed to acquire lock instance %u of %u for %s; trying again", __FUNCTION__, instance, CDB_ENSEMBLE->cdb_count, SXE_CDB_FUNCTION(FUNCTION)); \ + } \ + } \ + } while (0) + +#define SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK(CDB_ENSEMBLE) \ + do { \ + if (CDB_ENSEMBLE->cdb_is_locked) { \ + sxe_spinlock_give(&CDB_ENSEMBLE->cdb_instance_locks[instance]); \ + } \ + } while (0) + +void +sxe_cdb_ensemble_reboot(SXE_CDB_ENSEMBLE * cdb_ensemble) +{ + uint32_t instance; + SXEE6("(cdb_ensemble=?)"); + + while (SXE_SPINLOCK_STATUS_TAKEN != sxe_spinlock_take(&sxe_cdb_ensemble_lock)) { + SXEL5("%s() failed to acquire sxe_cdb_ensemble_lock; trying again", __FUNCTION__); /* COVERAGE EXCLUSION: todo: create multi-threaded test to show this informational lock message */ + } + + SXEL6("rebooting array of cdb pointers:"); + for (instance = 0; instance < cdb_ensemble->cdb_count; instance++) { + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_ensemble_reboot); + } + + for (instance = 0; instance < cdb_ensemble->cdb_count; instance++) { + sxe_cdb_instance_reboot(cdb_ensemble->cdb_instances[instance]); + } + + for (instance = 0; instance < cdb_ensemble->cdb_count; instance++) { + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK(cdb_ensemble); + } + + sxe_spinlock_give(&sxe_cdb_ensemble_lock); + + SXER6("return"); +} /* sxe_cdb_ensemble_reboot() */ + +/** + * USE WITH CAUTION: Caller responsible for unlock! + * + * Why would caller want to live dangerously like this? + * + * Whereas the other get/set functions copy to/from a tls buffer + * so that the caller never touches the bytes which can possibly + * get mremap()ed, sxe_cdb_ensemble_get_hkv_locked() returns a + * pointer to a SXE_CDB_HKV structure which contains pointers to + * those mremap()able key value bytes but never unlocks itself + * therefore allowing the caller to operate on the bytes + * directly and then unlock after. + * + * Example use case: The key and/or value is large enough that + * we do not want to incur the cost of copying it into a tls + * buffer for access. + */ + +SXE_CDB_HKV * /* NULL or *dangerous* direct pointer to original SXE_CDB_HKV; header + key + value bytes; caller responsible for later calling sxe_cdb_ensemble_get_hkv_unlock() */ +sxe_cdb_ensemble_get_hkv_raw_locked(SXE_CDB_ENSEMBLE * cdb_ensemble) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + SXEE6("(cdb_ensemble=?)"); + + uint32_t instance = sxe_cdb_hash.u16[3] % cdb_ensemble->cdb_count; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_hkv_raw function); + tls_hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance); +// SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + /* todo: consider asserting in other functions if they are called before sxe_cdb_ensemble_get_hkv_unlock() */ + + SXER6("return tls_hkv=%p (%s); sxe_cdb_tls_hkv_part: .key_len=%u .val_len=%u", tls_hkv, NULL == tls_hkv ? "key doesn't exist" : "key exists", sxe_cdb_tls_hkv_part.key_len, sxe_cdb_tls_hkv_part.val_len); + return tls_hkv; +} /* sxe_cdb_ensemble_get_hkv_raw_locked() */ + +void +sxe_cdb_ensemble_get_hkv_raw_unlock(SXE_CDB_ENSEMBLE * cdb_ensemble) +{ + SXEE6("(cdb_ensemble=?)"); + + uint32_t instance = sxe_cdb_hash.u16[3] % cdb_ensemble->cdb_count; +// SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + +// SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_hkv_raw function); +// tls_hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + SXER6("return"); +} /* sxe_cdb_ensemble_get_hkv_raw_unlock() */ + +uint64_t /* SXE_CDB_UID */ +sxe_cdb_ensemble_get_uid(SXE_CDB_ENSEMBLE * cdb_ensemble) +{ + SXE_CDB_UID uid; + + SXEE6("(cdb_ensemble=?)"); + + uint32_t instance = sxe_cdb_hash.u16[3] % cdb_ensemble->cdb_count; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_uid function); + uid.as_u64.u = sxe_cdb_instance_get_uid(cdb_instance); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + uid.as_part.instance = SXE_CDB_UID_NONE == uid.as_u64.u ? uid.as_part.instance : instance; /* set instance if uid exists */ + + SXER6("return uid=%010lx=%02x[%04x]%03x-%01x=%s // sxe_cdb_tls_hkv_part.val_len=%u", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, SXE_CDB_UID_NONE == uid.as_u64.u ? "key doesn't exist" : "key exists", sxe_cdb_tls_hkv_part.val_len); + return uid.as_u64.u; +} /* sxe_cdb_ensemble_get_uid() */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV raw; not copy */ +sxe_cdb_ensemble_get_uid_hkv_raw_locked(SXE_CDB_ENSEMBLE * cdb_ensemble, SXE_CDB_UID uid) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + SXEE6("(cdb_ensemble=?, uid=%010lx=%02x[%04x]%03x-%01x", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + + uint32_t instance = uid.as_part.instance; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_uid_hkv_raw function); + tls_hkv = sxe_cdb_instance_get_uid_hkv_raw(cdb_instance, uid); +// SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + /* todo: consider asserting in other functions if they are called before sxe_cdb_ensemble_get_hkv_unlock() */ + + SXER6("return tls_hkv=%p (%s); sxe_cdb_tls_hkv_part: .key_len=%u .val_len=%u", tls_hkv, NULL == tls_hkv ? "key doesn't exist" : "key exists", sxe_cdb_tls_hkv_part.key_len, sxe_cdb_tls_hkv_part.val_len); + return tls_hkv; +} /* sxe_cdb_ensemble_get_uid_hkv_raw_locked() */ + +void +sxe_cdb_ensemble_get_uid_hkv_raw_unlock(SXE_CDB_ENSEMBLE * cdb_ensemble, SXE_CDB_UID uid) +{ + SXEE6("(cdb_ensemble=?, uid=%010lx=%02x[%04x]%03x-%01x", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + + uint32_t instance = uid.as_part.instance; +// SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + +// SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_uid_hkv_raw function); +// tls_hkv = sxe_cdb_instance_get_uid_hkv_raw(cdb_instance, uid); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + SXER6("return"); +} /* sxe_cdb_ensemble_get_uid_hkv_raw_locked() */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV copy */ +sxe_cdb_ensemble_get_uid_hkv(SXE_CDB_ENSEMBLE * cdb_ensemble, SXE_CDB_UID uid) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + SXEE6("(cdb_ensemble=?, uid=%010lx=%02x[%04x]%03x-%01x", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + + uint32_t instance = uid.as_part.instance; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_get_uid_hkv function); + tls_hkv = sxe_cdb_instance_get_uid_hkv(cdb_instance, uid); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + SXER6("return tls_hkv=%p // sxe_cdb_tls_hkv_part.val_len=%u", tls_hkv, sxe_cdb_tls_hkv_part.val_len); + return tls_hkv; +} /* sxe_cdb_ensemble_get_uid_hkv() */ + +void /* only call this function directly *after* sxe_cdb_ensemble_set_uid_hkv() */ +sxe_cdb_ensemble_set_uid_hkv(SXE_CDB_ENSEMBLE * cdb_ensemble, SXE_CDB_UID uid) +{ + SXEE6("(cdb_ensemble=?, uid=%010lx=%02x[%04x]%03x-%01x", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + + uint32_t instance = uid.as_part.instance; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_set_uid_hkv function); + sxe_cdb_instance_set_uid_hkv(cdb_instance, uid); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + SXER6("return // sxe_cdb_tls_hkv_part.val_len=%u", sxe_cdb_tls_hkv_part.val_len); + return; +} /* sxe_cdb_ensemble_set_uid_hkv() */ + +uint64_t /* SXE_CDB_UID; SXE_CDB_UID_NONE means something went wrong and key not appended */ +sxe_cdb_ensemble_put_val(SXE_CDB_ENSEMBLE * cdb_ensemble, const uint8_t * val, uint32_t val_len) +{ + SXE_CDB_UID uid; + + uint32_t instance = sxe_cdb_hash.u16[3] % cdb_ensemble->cdb_count; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXEE6("(cdb_ensemble=?, val=?, val_len=%u) // instance=%u", val_len, instance); + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_put_val function); + uid.as_u64.u = sxe_cdb_instance_put_val(cdb_instance, val, val_len); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + uid.as_part.instance = SXE_CDB_UID_NONE == uid.as_u64.u ? uid.as_part.instance : instance; /* set instance if uid exists */ + + SXER6("return uid=%010lx=%02x[%04x]%03x-%01x=%s", uid.as_u64.u, uid.as_part.instance, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell, SXE_CDB_UID_NONE == uid.as_u64.u ? "failure" : "success"); + return uid.as_u64.u; +} /* sxe_cdb_ensemble_put_val() */ + +uint64_t /* count or zero means e.g. key couldn't be put */ +sxe_cdb_ensemble_inc(SXE_CDB_ENSEMBLE * cdb_ensemble, uint32_t counts_list) +{ + uint64_t count_new; + + uint32_t instance = sxe_cdb_hash.u16[3] % cdb_ensemble->cdb_count; + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + + SXEE6("(cdb_ensemble=?) // instance=%u", instance); + + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(cdb_ensemble, sxe_cdb_instance_inc function); + count_new = sxe_cdb_instance_inc(cdb_instance, counts_list); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( cdb_ensemble); + + SXER6("return %lu // key count", count_new); + return count_new; +} /* sxe_cdb_ensemble_inc() */ + +/* + * Note: sxe_cdb_ensemble_walk() contains no locks. + * Why? Because walking while updating will not guarantee walking iterates over all counter keys. + * Therefore, the caller should ensure that no updating happens during walking. + */ + +SXE_CDB_HKV * /* NULL or tls SXE_CDB_HKV copy */ +sxe_cdb_ensemble_walk( + const SXE_CDB_ENSEMBLE * cdb_ensemble, + uint32_t direction , /* n=hi2lo, 0=lo2hi */ + uint32_t cnt_pos , /* SXE_CDB_COUNT_NONE means start of list, or current index for count */ + uint32_t hkv_pos , /* SXE_CDB_HKV_POS_NONE means start of list, or current hkv for count */ + uint32_t count_list , + uint32_t instance ) +{ + SXE_CDB_HKV * tls_hkv = NULL; /* result */ + + if (instance < cdb_ensemble->cdb_count) { + SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + tls_hkv = sxe_cdb_instance_walk(cdb_instance, direction, cnt_pos, hkv_pos, count_list); + } + + return tls_hkv; +} /* sxe_cdb_ensemble_walk() */ + +uint32_t /* 0 if invalid instance or ->kvdata_used */ +sxe_cdb_ensemble_kvdata_used( + const SXE_CDB_ENSEMBLE * cdb_ensemble, + uint32_t instance ) +{ + uint32_t kvdata_used = 0; + + if (instance < cdb_ensemble->cdb_count) { + const SXE_CDB_INSTANCE * cdb_instance = cdb_ensemble->cdb_instances[instance]; + kvdata_used = cdb_instance->kvdata_used; + } + + return kvdata_used; +} /* sxe_cdb_ensemble_kvdata_used() */ + +/** + * Given two sxe cdb ensembles then swap the underlying + * instances (using locking if the ensembles were initially + * created using locks). + * + * Example use case: A bunch of threads use a locked sxe cdb + * ensemble for reading (and maybe a bit of writing). While + * another thread creates a brand new unlocked (read: faster + * creation) sxe cdb ensemble that it wants the threads to start + * using. This function can be used to swap in the underlying + * instances in a locked way. + */ + +void +sxe_cdb_ensemble_swap_instances( + SXE_CDB_ENSEMBLE * this_cdb_ensemble, /* swap SXE_CDB_INSTANCEs in this SXE_CDB_ENSEMBLE */ + SXE_CDB_ENSEMBLE * that_cdb_ensemble) /* with SXE_CDB_INSTANCEs in that SXE_CDB_ENSEMBLE */ +{ + SXEA1(this_cdb_ensemble->cdb_count == that_cdb_ensemble->cdb_count, "ERROR: cannot swap SXE_CDB_INSTANCEs of two SXE_CDB_ENSEMBLEs with differing numbers of instances!"); + + uint32_t instance; + for (instance = 0; instance < this_cdb_ensemble->cdb_count; instance++) { + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(this_cdb_ensemble, this_cdb_ensemble); + SXE_CDB_ENSEMBLE_INSTANCE_LOCK_BEFORE(that_cdb_ensemble, that_cdb_ensemble); + struct SXE_CDB_INSTANCE * temp = this_cdb_ensemble->cdb_instances[instance]; + this_cdb_ensemble->cdb_instances[instance] = that_cdb_ensemble->cdb_instances[instance]; + that_cdb_ensemble->cdb_instances[instance] = temp; + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( that_cdb_ensemble); + SXE_CDB_ENSEMBLE_INSTANCE_UNLOCK( this_cdb_ensemble); + + } +} /* sxe_cdb_ensemble_swap_instances() */ diff --git a/lib-sxe-cdb/sxe-cdb.h b/lib-sxe-cdb/sxe-cdb.h new file mode 100644 index 0000000..2a1fe03 --- /dev/null +++ b/lib-sxe-cdb/sxe-cdb.h @@ -0,0 +1,214 @@ +/* Copyright (c) 2013 OpenDNS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_CDB_H__ +#define __SXE_CDB_H__ + +/** + * - What is sxe-cdb? sxe-cdb tries to have similar advantages + * to cdb "constant database" but with less disadvantages. + * Differences: + * - cdb has two distinct phases; creation & reading. sxe-cdb + * merges these phases; keys can be appended during the + * reading phase. + * - Both the hash table index & the key,value memory areas + * are mremap()able meaning that e.g. n 100 million keys + * can be serialized quickly to disk by saving 2 blocks of + * memory rather than iterating over n 100 million keys. + * - cdb is very fast but algorithm is stone-age. sxe-cdb is + * designed to take advantage of the cache-line-based + * architecture of modern CPUs. + * - The hash table index starts off small -- about 512KB + * per cdb instance -- and expands in size gracefully + * without ever having to rehash all keys; there is never + * any rehashing, although hash table indexes may be moved + * around but this is limited to 64k indexes in contiguous + * cache-line memory and is therefore very fast. + * - cdb has a 4GB size limitation. sxe-cdb has the same + * limitation -- in order to keep the data structures + * compact -- but multiple sxe-cdb instances can be used in + * parallel to achieve any multiple of 4GB, e.g. 128GB. + * sxe-cdb decides which instance to put a unique key in. + * - sxe-cdb allows individual instances to be locked, + * meaning that in a multi-threaded environment there is + * less lock contention than if using one big lock. + * - cdb is a key,value database. sxe-cdb has an instant sort + * feature where sxe_cdb_instance_inc() counters get + * automatically sorted on the fly and the only over-head is + * memory, not CPU. + */ + +/* + * How it works: + * + * +--+ <-- cdb_instance->sheet_index[<65536] + * | | Initially, all sheet indexes are zero. + * | | Min/max size is 128KB/128KB. + * +--+ + * | | + * | | + * +--+ + * : : + * +--+ + * | | + * | | + * +--+ + * + * +--+--+..+--+ <-- cdb_instance->sheets[<65536].row[<4096].hkv_pos.u32[<16] + * | | | | | Grows by 512 KB via mremap() after sxe_cdb_split_sheet(). + * | | | | | sxe_cdb_split_sheet() called upon all row cells used. + * +--+--+..+--+ sxe_cdb_instance_put() inserts in row a or b with fewest cells. + * | | | | | Key overhead is 8 bytes. + * | | | | | Min/max size is 512KB/32GB. + * +--+--+..+--+ + * : : : : : + * +--+--+..+--+ + * | | | | | + * | | | | | + * +------..---+ + * + * +--+--+..+--+ <-- cdb_instance->kvdata[<2^32] + * | | | | | Grows by n*4 KB via mremap() after sxe_cdb_instance_put(). + * | | | | | Key/value length header is 1, 3, 5, or 8 bytes. + * +--+--+..+--+ Min/max size is 4KB/4GB. + * + * +--+--+..+--+ <-- cdb_instance->counts[<2^32] + * | | | | | Grows by 4 KB via mremap() after sxe_cdb_instance_inc(). + * | | | | | Min/max size is 0KB/4GB. + * +--+--+..+--+ + * + * Note: Delete all key/values by deleting the 3 contiguous memory blocks. + * Note: Save all key/values by saving the 3 contiguous memory blocks. + * Note: Load all key/values by loading the 3 contiguous memory blocks. + * Note: 28M 4 byte key/values = 511 splits, 256MB sheet & 240MB kvdata. + * + * - Important characteristics: + * - row/cell never changes; not even for sheet split. + * - sheet index index never changes; although sheet index will change. + * - <1 byte instance><2 byte sheet index index><2 byte row/cell>: + * - Is a short UID to any of 1 trillion hkv (header, key, value). + * - Will always reference the hkv even if mremap() moves instance base. + * - The cost of using a uid as key counter is: + * - kvdata: 18 bytes: <1 byte header><5 byte uid><12 byte SXE_CDB_HKV_LIST> + * - counts: 18 bytes: <12 byte SXE_CDB_COUNT> + */ + +/** + * - FAQ: + * - Can I reference an hkv by its 64bit memory location? No, + * because the ->kvdata can get mremap()ed at any time. + * - If I store a key then is there an alternative (shorter?) + * way to reference it -- e.g. in other keys and values, or + * within the data structures of the caller -- rather than + * use the key bytes again? Yes, by using the 5 byte UID of + * the key. + * - Example of common value: + * - common-value-1 = very-very-long-value + * - key-1 = common-value-1 + * - key-2 = common-value-1 + * - Example of hash in hash (is key in sub-hash?): + * - domain.one = val + * - domain.two = val + * - domain.three = val + * - unique-sub-hash-1domain.one = val + * - unique-sub-hash-1domain.two = val + * - unique-sub-hash-2domain.two = val + * - unique-sub-hash-2domain.three = val + * - Can I call sxe_cdb_*_walk() iteratively while calling + * sxe_cdb_*_inc() in between? No, otherwise the walk + * results will be inaccurate. + * - What happens if I sxe_cdb_*_walk() with the wrong cnt_pos + * and/or hkv_pos? sxe_cdb_instance_walk_pos_is_bad() will + * hopefully detect this :-) + * - Can I store a key without a value? Yes. + */ + +/** + * - TO DO: + * - Todo: sxe_cdb_instance_update_val(). + * - Todo: sxe_cdb_instance_del(). + * - Mark keys as deleted somehow. + * - Consider e.g. sxe_instance_compact(). + * - Consider auto compact on granularized ->kvdata. + * - Todo: sxe_cdb_instance_(push|unshift)(). + * - E.g. similar to Perl's push(@{$hash->{key}}, "value"); + * - Consider how to sxe_cdb_instance_walk() array. + * - E.g. useful for replacing text log with sxe cdb? + */ + +typedef struct SXE_CDB_INSTANCE SXE_CDB_INSTANCE; +typedef struct SXE_CDB_ENSEMBLE SXE_CDB_ENSEMBLE; +typedef union SXE_CDB_HKV SXE_CDB_HKV ; + +#ifndef SXE_CDB_UID_INTERNAL +#define SXE_CDB_UID_INTERNAL +#endif + +typedef union SXE_CDB_UID { + SXE_CDB_UID_INTERNAL; + struct { uint64_t u : 32; /* 4 bytes; instance level */ } __attribute__((packed)) as_u32; + struct { uint64_t u : 40; /* 5 bytes; ensemble level */ } __attribute__((packed)) as_u40; + struct { uint64_t u ; /* 8 bytes; c/api level */ } __attribute__((packed)) as_u64; +} SXE_CDB_UID; + +typedef struct SXE_CDB_HKV_PART { + uint32_t hkv_len; + uint8_t * key ; + uint32_t key_len; + uint8_t * val ; + uint32_t val_len; +} __attribute__((packed)) SXE_CDB_HKV_PART; + +typedef union SXE_CDB_HASH { + uint64_t u64[2]; + uint32_t u32[4]; + uint16_t u16[8]; + uint8_t u08[16]; +} __attribute__((packed)) SXE_CDB_HASH; + +#define SXE_CDB_COUNT_NONE 0 +#define SXE_CDB_HKV_POS_NONE 0 +#define SXE_CDB_UID_NONE UINT64_MAX + +/** + * - With the exception of sxe_cdb_instance_get_hkv() then all + * functions returning an hkv pointer actually return a + * pointer to a tls copy of the hkv. + * - Why? Because if we returned an hkv pointer to the real + * hkv then we cannot guarantee that it won't change in the + * future due to an mremap(). + * - Why not return the hkv copy via malloc()? Because + * performance tests showed that malloc() is about 50% + * slower than using tls. + */ + +extern __thread uint32_t sxe_cdb_tls_hkv_len_max ; /* tls: realloc()ed buffer current size */ +extern __thread SXE_CDB_HKV * sxe_cdb_tls_hkv ; /* tls: realloc()ed buffer used for copying hkv into for caller */ +extern __thread SXE_CDB_HKV_PART sxe_cdb_tls_hkv_part ; /* tls: realloc()ed buffer parts */ +extern __thread uint32_t sxe_cdb_tls_walk_cnt_pos ; /* tls: walk: SXE_CDB_COUNT_NONE means start of list, or current index for count */ +extern __thread uint32_t sxe_cdb_tls_walk_hkv_pos ; /* tls: walk: SXE_CDB_HKV_POS_NONE means start of list, or current hkv for count */ +extern __thread uint64_t sxe_cdb_tls_walk_count ; /* tls: walk: 64bit count of key just walked to */ +extern __thread SXE_CDB_HASH sxe_cdb_hash ; /* tls: hash after last sxe_cdb_prepare() */ + +#include "sxe-cdb-proto.h" + +#endif /* __SXE_CDB_H__ */ + diff --git a/lib-sxe-cdb/test/test-sxe-cdb.c b/lib-sxe-cdb/test/test-sxe-cdb.c new file mode 100644 index 0000000..a5216ee --- /dev/null +++ b/lib-sxe-cdb/test/test-sxe-cdb.c @@ -0,0 +1,612 @@ +/* Copyright (c) 2013 OpenDNS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "kit-alloc.h" +#include "kit-time.h" +#include "sxe-cdb-private.h" +#include "sxe-hash.h" +#include "sxe-log.h" +#include "sxe-util.h" +#include "tap.h" + +#define TEST_SXE_CDB_PREPARE(KEY,NUM) sxe_cdb_prepare(KEY[NUM], KEY##_len[NUM]) +#define TEST_IS(HKV,KEY,KEY_LEN,VARIANT,SUB_VARIANT) \ + hkv = HKV; \ + ok(NULL != hkv , "%s: %s: hkv not NULL as expected", VARIANT, SUB_VARIANT); \ + is( sxe_cdb_tls_hkv_part.key_len, KEY_LEN , "%s: %s: hkv_part.key_len is as expected", VARIANT, SUB_VARIANT); \ + ok(0 == memcmp(KEY, sxe_cdb_tls_hkv_part.key , KEY_LEN), "%s: %s: hkv_part.key bytes are as expected", VARIANT, SUB_VARIANT); + +static void +test_runaway_variant( + int variant, + const char * variant_text, + int lo2hi_key_1st, + int lo2hi_key_2nd, + int lo2hi_key_3rd, + int lo2hi_key_4th, + int hi2lo_key_1st, + int hi2lo_key_2nd, + int hi2lo_key_3rd, + int hi2lo_key_4th) +{ + SXE_CDB_INSTANCE * ci = sxe_cdb_instance_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */); + uint32_t cl = 0; /* counts_list */ + SXE_CDB_HKV * hkv; + SXE_CDB_UID uid[4+1]; /* ignore [0] */ + uint8_t * key[4+1]; /* ignore [0] */ + uint8_t key_1[] = "key_1" ; + uint8_t key_2[] = "_key_2" ; + uint8_t key_3[] = "__key_3" ; + uint8_t key_4[] = "___key_4"; + uint32_t key_len[4+1]; + int top[2][4] = { { lo2hi_key_1st, lo2hi_key_2nd, lo2hi_key_3rd, lo2hi_key_4th }, { hi2lo_key_1st, hi2lo_key_2nd, hi2lo_key_3rd, hi2lo_key_4th } }; + int cnt[2][4] = { { 1, 2, 2, 4 }, { 4, 2, 2, 1 } }; + + key[1] = key_1; key_len[1] = sizeof(key_1); + key[2] = key_2; key_len[2] = sizeof(key_2); + key[3] = key_3; key_len[3] = sizeof(key_3); + key[4] = key_4; key_len[4] = sizeof(key_4); + + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 1, "%s: key[1] has expected count", variant_text); /* start off by incrementing 3 keys */ + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 1, "%s: key[2] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 1, "%s: key[3] has expected count", variant_text); + + switch (variant) { /* increment all the keys again but in different variants one key always get incremented more */ + case 0: + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[1] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 3, "%s: key[1] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 4, "%s: key[1] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[2] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[3] has expected count", variant_text); + break; + case 1: + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[2] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 3, "%s: key[2] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 4, "%s: key[2] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[1] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[3] has expected count", variant_text); + break; + case 2: + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[3] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 3, "%s: key[3] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); is(sxe_cdb_instance_inc(ci, cl), 4, "%s: key[3] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 1); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[1] has expected count", variant_text); + TEST_SXE_CDB_PREPARE(key, 2); is(sxe_cdb_instance_inc(ci, cl), 2, "%s: key[2] has expected count", variant_text); + break; + default: + SXEA1(0,"ERROR: INTERNAL: unexpected variant %d", variant); + } + + TEST_SXE_CDB_PREPARE(key, 4); is(sxe_cdb_instance_inc(ci, cl), 1 , "%s: key[4] has expected count", variant_text); /* finally a new 4th and last key gets incremented to 1 which is the lowest count */ + + /* test finding the 4 keys via the uid api */ + + TEST_SXE_CDB_PREPARE(key, 1); uid[1].as_u64.u = sxe_cdb_instance_get_uid(ci); ok(uid[1].as_u64.u != SXE_CDB_UID_NONE, "%s: found uid[1]", variant_text); + TEST_SXE_CDB_PREPARE(key, 2); uid[2].as_u64.u = sxe_cdb_instance_get_uid(ci); ok(uid[2].as_u64.u != SXE_CDB_UID_NONE, "%s: found uid[2]", variant_text); + TEST_SXE_CDB_PREPARE(key, 3); uid[3].as_u64.u = sxe_cdb_instance_get_uid(ci); ok(uid[3].as_u64.u != SXE_CDB_UID_NONE, "%s: found uid[3]", variant_text); + TEST_SXE_CDB_PREPARE(key, 4); uid[4].as_u64.u = sxe_cdb_instance_get_uid(ci); ok(uid[4].as_u64.u != SXE_CDB_UID_NONE, "%s: found uid[4]", variant_text); + + TEST_IS(sxe_cdb_instance_get_uid_hkv(ci, uid[1]), key[1], key_len[1], variant_text, "uid2hkv"); + TEST_IS(sxe_cdb_instance_get_uid_hkv(ci, uid[2]), key[2], key_len[2], variant_text, "uid2hkv"); + TEST_IS(sxe_cdb_instance_get_uid_hkv(ci, uid[3]), key[3], key_len[3], variant_text, "uid2hkv"); + TEST_IS(sxe_cdb_instance_get_uid_hkv(ci, uid[4]), key[4], key_len[4], variant_text, "uid2hkv"); + + /* test enumerating the 4 keys via the walk api; from lo2hi & hi2lo */ + + int way; + for (way = 0; way < 2; way++) { + const char * way_text; + way_text = way ? "hi2lo " : "lo2hi "; + sxe_cdb_tls_walk_cnt_pos = SXE_CDB_COUNT_NONE; + TEST_IS(sxe_cdb_instance_walk(ci, way, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, cl), key[top[way][0]], key_len[top[way][0]], variant_text, way_text); is(sxe_cdb_tls_walk_count, cnt[way][0], "%s: %s: sxe_cdb_instance_walk() %d is expected count", variant_text, way_text, cnt[way][0]); + TEST_IS(sxe_cdb_instance_walk(ci, way, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, cl), key[top[way][1]], key_len[top[way][1]], variant_text, way_text); is(sxe_cdb_tls_walk_count, cnt[way][1], "%s: %s: sxe_cdb_instance_walk() %d is expected count", variant_text, way_text, cnt[way][1]); + TEST_IS(sxe_cdb_instance_walk(ci, way, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, cl), key[top[way][2]], key_len[top[way][2]], variant_text, way_text); is(sxe_cdb_tls_walk_count, cnt[way][2], "%s: %s: sxe_cdb_instance_walk() %d is expected count", variant_text, way_text, cnt[way][2]); + TEST_IS(sxe_cdb_instance_walk(ci, way, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, cl), key[top[way][3]], key_len[top[way][3]], variant_text, way_text); is(sxe_cdb_tls_walk_count, cnt[way][3], "%s: %s: sxe_cdb_instance_walk() %d is expected count", variant_text, way_text, cnt[way][3]); + is(sxe_cdb_tls_walk_cnt_pos, SXE_CDB_COUNT_NONE , "%s: %s: sxe_cdb_instance_walk() expected end" , variant_text, way_text ); + } + + sxe_cdb_instance_destroy(ci); +} /* test_runaway_variant() */ + +static double +kit_timestamp_to_double_seconds(kit_timestamp_t duration) +{ + return (double)(duration) / KIT_TIMESTAMP_1_SEC; +} + +#if SXE_DEBUG +# define DIV 32 // Divide the number of keys by this amount to speed up debug builds +#else +# define DIV 1 +#endif + +int +main(void) +{ + kit_timestamp_t end_time, start_time; + double elapsed_time; + uint32_t i; + unsigned keys = 109375 / DIV; + uint8_t header_len_3_key[KEY_HEADER_LEN_3_KEY_LEN_MAX]; /* 127 bytes */ + uint8_t header_len_5_key[KEY_HEADER_LEN_5_KEY_LEN_MAX]; /* 65535 bytes */ + uint8_t header_len_8_key[KEY_HEADER_LEN_5_KEY_LEN_MAX + 1 /* 2^24 too big :-) */]; + + plan_tests(224); + uint64_t start_allocations = kit_memory_allocations(); +// KIT_ALLOC_SET_LOG(1); // Turn off when done + + /* tests for key count double double linked lists; different runaway variants test different linked list fine details :-) */ + + test_runaway_variant(0, "runaway key1", 4,3,2,1, 1,3,2,4); // expected walk key order: lo2hi 4 3 2 1, hi2lo 1 3 2 4 + test_runaway_variant(1, "runaway key2", 4,3,1,2, 2,3,1,4); // expected walk key order: lo2hi 4 3 1 2, hi2lo 2 3 1 4 + test_runaway_variant(2, "runaway key3", 4,2,1,3, 3,2,1,4); // expected walk key order: lo2hi 4 2 1 3, hi2lo 3 2 1 4 + + diag("tests for coverage"); + { + SXE_CDB_INSTANCE * cdb_instance = sxe_cdb_instance_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */); + uint32_t counts_list = 0; + + sxe_cdb_prepare (&header_len_3_key[0], sizeof(header_len_3_key)); + is(sxe_cdb_instance_get_uid(cdb_instance ), SXE_CDB_UID_NONE, "coverage: missed header_len_3_key as expected"); + + sxe_cdb_prepare (&header_len_5_key[0], sizeof(header_len_5_key)); + is(sxe_cdb_instance_get_uid(cdb_instance ), SXE_CDB_UID_NONE, "coverage: missed header_len_5_key as expected"); + + sxe_cdb_prepare (&header_len_8_key[0], sizeof(header_len_8_key)); + is(sxe_cdb_instance_get_uid(cdb_instance ), SXE_CDB_UID_NONE, "coverage: missed header_len_8_key as expected"); + + ok(sxe_cdb_instance_put_val(cdb_instance, NULL, 0) != SXE_CDB_UID_NONE, "coverage: append header_len_3_key as expected"); + ok(sxe_cdb_instance_get_uid(cdb_instance ) != SXE_CDB_UID_NONE, "coverage: found header_len_3_key as expected"); + + sxe_cdb_prepare (&header_len_5_key[0], sizeof(header_len_5_key)); + ok(sxe_cdb_instance_put_val(cdb_instance, NULL, 0) != SXE_CDB_UID_NONE, "coverage: append header_len_5_key as expected"); + ok(sxe_cdb_instance_get_uid(cdb_instance ) != SXE_CDB_UID_NONE, "coverage: found header_len_5_key as expected"); + + sxe_cdb_prepare (&header_len_8_key[0], sizeof(header_len_8_key)); + ok(sxe_cdb_instance_put_val(cdb_instance, NULL, 0) != SXE_CDB_UID_NONE, "coverage: found header_len_8_key as expected"); + ok(sxe_cdb_instance_get_uid(cdb_instance ) != SXE_CDB_UID_NONE, "coverage: found header_len_8_key as expected"); + + sxe_cdb_prepare(NULL, 0); is(sxe_cdb_instance_put_val(cdb_instance, NULL, 0), SXE_CDB_UID_NONE, "coverage: invalid key length causes sxe_cdb_instance_put() to fail"); + sxe_cdb_prepare(NULL, KEY_HEADER_LEN_8_KEY_LEN_MAX + 1); is(sxe_cdb_instance_put_val(cdb_instance, NULL, 0), SXE_CDB_UID_NONE, "coverage: invalid key length causes sxe_cdb_instance_put() to fail"); + + /* test for sxe_cdb_instance_walk_pos_is_bad() coverage */ + + sxe_cdb_prepare ((const uint8_t *) "foo", 3); + SXEA1(sxe_cdb_instance_inc(cdb_instance, counts_list), "ERROR: INTERNAL: sxe_cdb_instance_inc() unexpectedly failing"); /* create a single counter instance for tests below */ + + is(sxe_cdb_instance_walk_pos_is_bad(cdb_instance, 1 + cdb_instance->counts_size, 1 + cdb_instance->kvdata_used, "test: coverage: sxe_cdb_instance_walk_pos_is_bad(): cnt_pos is too large"), 1, "coverage: sxe_cdb_instance_walk_pos_is_bad: cnt_pos is too large"); + is(sxe_cdb_instance_walk_pos_is_bad(cdb_instance, 1 , 1 + cdb_instance->kvdata_used, "test: coverage: sxe_cdb_instance_walk_pos_is_bad(): hkv_pos is too large"), 1, "coverage: sxe_cdb_instance_walk_pos_is_bad: hkv_pos is too large"); + + SXE_CDB_HKV_LIST hkv_list = { UINT32_MAX, 0, UINT32_MAX }; /* set everything to the max to create overflow situation below */ + sxe_cdb_prepare ((const uint8_t *) "bar", 3); + ok(sxe_cdb_instance_put_val(cdb_instance, (const uint8_t *) &hkv_list, sizeof(hkv_list)) != SXE_CDB_UID_NONE, "coverage: hkv_pos + header len is out of range: added bogus SXE_CDB_HKV_LIST"); + SXE_CDB_HKV * hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance); + uint32_t hkv_pos = (uint8_t *) hkv - cdb_instance->kvdata; + is(sxe_cdb_instance_walk_pos_is_bad(cdb_instance, 1, hkv_pos + 3 + sizeof(hkv_list) - 1, "test: coverage: sxe_cdb_instance_walk_pos_is_bad(): hkv_pos + header len is out of range" ), 1, "coverage: sxe_cdb_instance_walk_pos_is_bad: hkv_pos + header len is out of range" ); + is(sxe_cdb_instance_walk_pos_is_bad(cdb_instance, 1, hkv_pos + 0 + sizeof(hkv_list) - 1, "test: coverage: sxe_cdb_instance_walk_pos_is_bad(): hkv_pos + given hkv_pos + hkv_len is out of range" ), 1, "coverage: sxe_cdb_instance_walk_pos_is_bad: given hkv_pos + hkv_len is out of range" ); + is(sxe_cdb_instance_walk_pos_is_bad(cdb_instance, 1, hkv_pos , "test: coverage: sxe_cdb_instance_walk_pos_is_bad(): val_len incorrect, or, value as SXE_CDB_HKV_LIST does not reference cnt_pos"), 1, "coverage: sxe_cdb_instance_walk_pos_is_bad: val_len incorrect, or, value as SXE_CDB_HKV_LIST does not reference cnt_pos"); + + ok(sxe_cdb_instance_walk(cdb_instance, 1, 0 , 0 , SXE_CDB_COUNTS_LISTS_MAX) == NULL, "coverage: sxe_cdb_instance_walk()"); + ok(sxe_cdb_instance_walk(cdb_instance, 1, 1 + cdb_instance->counts_used, 1 + cdb_instance->kvdata_used, counts_list ) == NULL, "coverage: sxe_cdb_instance_walk_pos_is_bad(): sxe_cdb_instance_walk()"); + + /* test for sxe_cdb_instance_inc() coverage */ + + sxe_cdb_prepare ((const uint8_t *) "bad", 3); + ok(sxe_cdb_instance_put_val(cdb_instance, NULL, 0 ) != SXE_CDB_UID_NONE, "coverage: bad key counter: put regular key"); + ok(sxe_cdb_instance_inc (cdb_instance, counts_list ) == 0 , "coverage: bad key counter: try to increment regular key"); + + sxe_cdb_prepare ((const uint8_t *) "bar", 3); + ok(sxe_cdb_instance_inc (cdb_instance, counts_list ) == 0 , "coverage: bad key counter: try to increment key with counter size value with bad reference"); + + sxe_cdb_instance_destroy(cdb_instance); + } + + diag("tests for ensemble swap & reboot"); + { + SXE_CDB_UID uid ; + SXE_CDB_UID uids[keys] ; + uint32_t instances = 8; + uint32_t keys_to_swap = 5; + SXE_CDB_ENSEMBLE * this_cdb_ensemble = sxe_cdb_ensemble_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */, instances /* number of cdb instances */, 1 /* locked */); + SXE_CDB_ENSEMBLE * that_cdb_ensemble = sxe_cdb_ensemble_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */, instances /* number of cdb instances */, 0 /* unlocked */); + + for (i = 0; i < (2 * keys_to_swap); i++) { /* put half of the keys in this ensemble and the other half in that ensemble */ + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uids[i].as_u64.u = sxe_cdb_ensemble_put_val(i < keys_to_swap ? this_cdb_ensemble : that_cdb_ensemble, (const uint8_t *) &i, sizeof(i)); + SXEA1(SXE_CDB_UID_NONE != uids[i].as_u64.u, "ERROR: INTERNAL: sxe_cdb_ensemble_put_val() unexpectedly failing"); + } + + sxe_cdb_ensemble_swap_instances(this_cdb_ensemble, that_cdb_ensemble); /* note: 'this_cdb_ensemble : that_cdb_ensemble' before and 'that_cdb_ensemble : this_cdb_ensemble' after :-) */ + + for (i = 0; i < (2 * keys_to_swap); i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uid.as_u64.u = sxe_cdb_ensemble_get_uid(i < keys_to_swap ? that_cdb_ensemble : this_cdb_ensemble); SXEA1(uids[i].as_u64.u == uid.as_u64.u, "ERROR: INTERNAL: unexpected uid at i=%u", i); + } + + sxe_cdb_ensemble_reboot(this_cdb_ensemble); + + for (i = 0; i < (2 * keys_to_swap); i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uid.as_u64.u = sxe_cdb_ensemble_get_uid(i < keys_to_swap ? that_cdb_ensemble : this_cdb_ensemble); SXEA1((i < keys_to_swap ? uids[i].as_u64.u : SXE_CDB_UID_NONE) == uid.as_u64.u, "ERROR: INTERNAL: unexpected uid at i=%u", i); + } + + sxe_cdb_ensemble_destroy(that_cdb_ensemble); + sxe_cdb_ensemble_destroy(this_cdb_ensemble); + } + + diag("test sxe_cdb_ensemble_set_uid_hkv() & sxe_cdb_ensemble_get_hkv_locked() & sxe_cdb_ensemble_get_hkv_unlock()"); + { + uint32_t instances = 8; + SXE_CDB_ENSEMBLE * cdb_ensemble = sxe_cdb_ensemble_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */, instances /* number of cdb instances */, 1 /* locked */); + SXE_CDB_UID uid ; + SXE_CDB_HKV * tls_hkv; + + sxe_cdb_prepare ( (const uint8_t *) "foo", 3); + uid.as_u64.u = sxe_cdb_ensemble_put_val (cdb_ensemble, (const uint8_t *) "BAR", 3); + tls_hkv = sxe_cdb_ensemble_get_hkv_raw_locked (cdb_ensemble ); ok(tls_hkv != NULL , "Update same-sized value: non-NULL tls_hkv as expected"); + is(sxe_cdb_tls_hkv_part.val[0] , 'B', "Update same-sized value: before direct update: got 'B' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'A', "Update same-sized value: before direct update: got 'A' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 'R', "Update same-sized value: before direct update: got 'R' as expected"); + sxe_cdb_tls_hkv_part.val[0] = 'b'; + sxe_cdb_tls_hkv_part.val[1] = 'r'; + sxe_cdb_tls_hkv_part.val[2] = 'a'; + sxe_cdb_tls_hkv_part.val = NULL; /* ensure sxe_cdb_ensemble_get_uid_hkv_raw_locked() sets .val */ + sxe_cdb_ensemble_get_hkv_raw_unlock (cdb_ensemble ); + sxe_cdb_ensemble_get_uid_hkv_raw_locked(cdb_ensemble, uid); ok(tls_hkv != NULL , "Update same-sized value: non-NULL tls_hkv as expected"); + is(sxe_cdb_tls_hkv_part.val[0] , 'b', "Update same-sized value: before direct update: got 'b' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'r', "Update same-sized value: before direct update: got 'r' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 'a', "Update same-sized value: before direct update: got 'a' as expected"); + sxe_cdb_tls_hkv_part.val[0] = 'b'; + sxe_cdb_tls_hkv_part.val[1] = 'a'; + sxe_cdb_tls_hkv_part.val[2] = 'r'; + sxe_cdb_tls_hkv_part.val = NULL; /* ensure sxe_cdb_ensemble_get_uid_hkv() sets .val */ + sxe_cdb_ensemble_get_uid_hkv_raw_unlock(cdb_ensemble, uid); + sxe_cdb_ensemble_get_uid_hkv (cdb_ensemble, uid); is(sxe_cdb_tls_hkv_part.val[0] , 'b', "Update same-sized value: before indirect update: got 'b' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'a', "Update same-sized value: before indirect update: got 'a' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 'r', "Update same-sized value: before indirect update: got 'r' as expected"); + sxe_cdb_tls_hkv_part.val[0] = 'f'; sxe_cdb_ensemble_set_uid_hkv(cdb_ensemble, uid); sxe_cdb_ensemble_get_uid_hkv (cdb_ensemble, uid); is(sxe_cdb_tls_hkv_part.val[0] , 'f', "Update same-sized value: update indirect char 1: got 'f' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'a', "Update same-sized value: update indirect char 1: got 'a' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 'r', "Update same-sized value: update indirect char 1: got 'r' as expected"); + sxe_cdb_tls_hkv_part.val[1] = 'i'; sxe_cdb_ensemble_set_uid_hkv(cdb_ensemble, uid); sxe_cdb_ensemble_get_uid_hkv (cdb_ensemble, uid); is(sxe_cdb_tls_hkv_part.val[0] , 'f', "Update same-sized value: update indirect char 2: got 'f' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'i', "Update same-sized value: update indirect char 2: got 'i' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 'r', "Update same-sized value: update indirect char 2: got 'r' as expected"); + sxe_cdb_tls_hkv_part.val[2] = 't'; sxe_cdb_ensemble_set_uid_hkv(cdb_ensemble, uid); sxe_cdb_ensemble_get_uid_hkv (cdb_ensemble, uid); is(sxe_cdb_tls_hkv_part.val[0] , 'f', "Update same-sized value: update indirect char 3: got 'f' as expected"); + is(sxe_cdb_tls_hkv_part.val[1] , 'i', "Update same-sized value: update indirect char 3: got 'i' as expected"); + is(sxe_cdb_tls_hkv_part.val[2] , 't', "Update same-sized value: update indirect char 3: got 't' as expected"); + + sxe_cdb_ensemble_destroy(cdb_ensemble); + } + + diag("stress test cdb instance put/get/miss"); + + putenv(SXE_CAST_NOCONST(char *, "SXE_LOG_LEVEL_LIBSXE_LIB_SXE_CDB=5")); /* Set to 5 to suppress sxe-cdb logging during test */ + sxe_log_control_forget_all_levels(); + + { + SXE_CDB_INSTANCE * cdb_instance = sxe_cdb_instance_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */); + SXE_CDB_UID uid ; + SXE_CDB_UID uids[keys]; + SXE_CDB_HKV * hkv ; + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare((const uint8_t *) &i, sizeof(i)); + } + + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: onlyhash: prepare %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, (unsigned)(((uint64_t)keys << 32) / (kit_timestamp_get() - start_time))); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uids[i].as_u64.u = sxe_cdb_instance_put_val(cdb_instance, (const uint8_t *) &i, sizeof(i)); + SXEA1(SXE_CDB_UID_NONE != uids[i].as_u64.u, "ERROR: INTERNAL: sxe_cdb_instance_put_val() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds((end_time = kit_timestamp_get()) - start_time); + SXEL5("test: instance: put-val %u keys in %6.2f seconds or %8u keys per second with %u sheets_split & avg sheet keys during split %lu", + keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (end_time - start_time)), + cdb_instance->sheets_split, + cdb_instance->sheets_split ? cdb_instance->sheets_split_keys / cdb_instance->sheets_split : 0); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance ); SXEA1(hkv, "ERROR: INTERNAL: unexpected NULL hkv at i=%u", i); + SXEA1( sxe_cdb_tls_hkv_part.key_len == sizeof(i) , "ERROR: INTERNAL: unexpected key len at i=%u", i); + SXEA1( sxe_cdb_tls_hkv_part.val_len == sizeof(i) , "ERROR: INTERNAL: unexpected val len at i=%u", i); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.key ) == i , "ERROR: INTERNAL: unexpected key at i=%u", i); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.val ) == i , "ERROR: INTERNAL: unexpected val at i=%u", i); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: get-hkv %u keys in %6.2f seconds or %8u keys per second // keylen_misses %lu; memcmp_misses %lu", keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), cdb_instance->keylen_misses, cdb_instance->memcmp_misses); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uid.as_u64.u = sxe_cdb_instance_get_uid(cdb_instance); SXEA1(uids[i].as_u64.u == uid.as_u64.u, "ERROR: INTERNAL: unexpected SXE_CDB_UID_NONE at i=%u", i); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: get-uid %u keys in %6.2f seconds or %8u keys per second // keylen_misses %lu; memcmp_misses %lu", keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), cdb_instance->keylen_misses, cdb_instance->memcmp_misses); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + uid.as_u64.u = uids[i].as_u64.u; + hkv = sxe_cdb_instance_get_uid_hkv(cdb_instance, uid); SXEA1(hkv, "ERROR: INTERNAL: unexpected NULL hkv at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1( sxe_cdb_tls_hkv_part.key_len == sizeof(i) , "ERROR: INTERNAL: unexpected key len at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1( sxe_cdb_tls_hkv_part.val_len == sizeof(i) , "ERROR: INTERNAL: unexpected val len at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.key ) == i , "ERROR: INTERNAL: unexpected key at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.val ) == i , "ERROR: INTERNAL: unexpected val at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: tls-hkv %u keys in %6.2f seconds or %8u keys per second // keylen_misses %lu; memcmp_misses %lu", keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), cdb_instance->keylen_misses, cdb_instance->memcmp_misses); + + start_time = kit_timestamp_get(); + for (i = keys; i < (keys * 2); i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uid.as_u64.u = sxe_cdb_instance_get_uid(cdb_instance ); SXEA1(SXE_CDB_UID_NONE == uid.as_u64.u, "ERROR: INTERNAL: unexpected existing uid at i=%u", i); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: no-uid %u keys in %6.2f seconds or %8u keys per second // keylen_misses %lu; memcmp_misses %lu", keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), cdb_instance->keylen_misses, cdb_instance->memcmp_misses); + + start_time = kit_timestamp_get(); + for (i = keys; i < (keys * 2); i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + hkv = sxe_cdb_instance_get_hkv_raw(cdb_instance ); SXEA1(NULL == hkv, "ERROR: INTERNAL: unexpected existing hkv at i=%u", i); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: no-hkv %u keys in %6.2f seconds or %8u keys per second // keylen_misses %lu; memcmp_misses %lu", keys, elapsed_time, (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), cdb_instance->keylen_misses, cdb_instance->memcmp_misses); + + SXEL5("test: instance: %.1fMB total memory in sheets, %.1fMB total memory in kvdata; cells used %u, size %u or %u%% full; kv used %u, size %u", + cdb_instance->sheets_size * SXE_CDB_SHEET_BYTES / 1024 / 1024.0, + cdb_instance->kvdata_used / 1024 / 1024.0, + cdb_instance->sheets_cells_used, + cdb_instance->sheets_cells_size, + cdb_instance->sheets_cells_used * 100 / cdb_instance->sheets_cells_size, + cdb_instance->kvdata_used, + cdb_instance->kvdata_size); + + sxe_cdb_instance_destroy(cdb_instance); + } + + diag("stress test cdb instance inc"); + { + SXE_CDB_INSTANCE * cdb_instance = sxe_cdb_instance_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */); + uint32_t counts_list = 0; + uint64_t manual_count[keys]; + uint64_t manual_count_hi = 0; + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + manual_count[i] = 1; + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + SXEA1(manual_count[i] == sxe_cdb_instance_inc(cdb_instance, counts_list ), "ERROR: INTERNAL: sxe_cdb_instance_inc() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: inc-1st %u keys in %6.2f seconds or %8u keys per second with %u sheets_split & avg sheet keys during split %lu, ->counts_used=%u", + keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), + cdb_instance->sheets_split, + cdb_instance->sheets_split ? cdb_instance->sheets_split_keys / cdb_instance->sheets_split : 0, + cdb_instance->counts_used); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + manual_count[i] ++; + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + SXEA1(manual_count[i] == sxe_cdb_instance_inc(cdb_instance, counts_list ), "ERROR: INTERNAL: sxe_cdb_instance_inc() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: inc-2nd %u keys in %6.2f seconds or %8u keys per second with %u sheets_split & avg sheet keys during split %lu, ->counts_used=%u", + keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), + cdb_instance->sheets_split, + cdb_instance->sheets_split ? cdb_instance->sheets_split_keys / cdb_instance->sheets_split : 0, + cdb_instance->counts_used); + + start_time = kit_timestamp_get(); + + for (i = 0; i < keys; i++) { + uint32_t myhash[4]; + sxe_hash_128((const uint8_t *)&i, sizeof(i), (uint8_t *)&myhash[0]); + uint32_t myrandom = myhash[0] % (keys * 2 * DIV / 4096); /* incrementing deterministically smaller number of random keys will cause ->counts mremap() (and will cause code to run faster due to cache line re-usage) */ + manual_count[myrandom] ++; + sxe_cdb_prepare((const uint8_t *) &myrandom, sizeof(myrandom)); + SXEA1(manual_count[myrandom] == sxe_cdb_instance_inc(cdb_instance, counts_list), "ERROR: INTERNAL: sxe_cdb_instance_inc() unexpectedly failing"); + manual_count_hi = manual_count[myrandom] > manual_count_hi ? manual_count[myrandom] : manual_count_hi; + } + + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: instance: inc-rnd %u keys in %6.2f seconds or %8u keys per second with %u sheets_split & avg sheet keys during split %lu, ->counts_used=%u", + keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time)), + cdb_instance->sheets_split, + cdb_instance->sheets_split ? cdb_instance->sheets_split_keys / cdb_instance->sheets_split : 0, + cdb_instance->counts_used); + + sxe_cdb_tls_walk_cnt_pos = SXE_CDB_COUNT_NONE; + for (i = 0; i < 5; i++) { + sxe_cdb_instance_walk(cdb_instance, 1 /* hi2lo */, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, counts_list); + if (0 == i) { + is(manual_count_hi, sxe_cdb_tls_walk_count, "test: just for fun top key: has expected count"); + } + SXEL5("test: just for fun top key #%u: binary key=count; %08x=%lu", i, *((uint32_t *) sxe_cdb_tls_hkv_part.key), sxe_cdb_tls_walk_count); + } + + SXEL5("test: instance: %.1fMB total memory in sheets, %.1fMB total memory in kvdata; cells used %u, size %u or %u%% full; kv used %u, size %u", + cdb_instance->sheets_size * SXE_CDB_SHEET_BYTES / 1024 / 1024.0, + cdb_instance->kvdata_used / 1024 / 1024.0, + cdb_instance->sheets_cells_used, + cdb_instance->sheets_cells_size, + cdb_instance->sheets_cells_used * 100 / cdb_instance->sheets_cells_size, + cdb_instance->kvdata_used, + cdb_instance->kvdata_size); + + sxe_cdb_instance_destroy(cdb_instance); + } + + //sxe_cdb_debug_validate(cdb, "final validate"); + + diag("tests for maximum size"); + + SXE_CDB_INSTANCE * cdb_instance_with_limit = sxe_cdb_instance_new(0 /* grow from minimum size */, 99999 /* grow to max. 10000 bytes */); + + sxe_cdb_prepare(&header_len_5_key[0], sizeof(header_len_5_key)); + ok(sxe_cdb_instance_put_val(cdb_instance_with_limit, NULL, 0) != SXE_CDB_UID_NONE, "append header_len_5_key as expected"); + ok(sxe_cdb_instance_put_val(cdb_instance_with_limit, NULL, 0) != SXE_CDB_UID_NONE, "append header_len_5_key as expected // straddles kvdata limit"); + is(sxe_cdb_instance_put_val(cdb_instance_with_limit, NULL, 0) , SXE_CDB_UID_NONE, "append header_len_5_key as expected // failed due to kvdata limit"); + + sxe_cdb_instance_destroy(cdb_instance_with_limit); + + diag("tests for ensemble functionality"); + + //debug putenv(SXE_CAST_NOCONST(char *, "SXE_LOG_LEVEL_LIBSXE_LIB_SXE_CDB=7")); /* Set to 5 to suppress sxe-cdb logging during test */ + //debug sxe_log_control_forget_all_levels(); + + { + uint32_t instances = 8; + SXE_CDB_ENSEMBLE * cdb_ensemble = sxe_cdb_ensemble_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */, instances /* number of cdb instances */, 1 /* locked */); + SXE_CDB_UID uid ; + SXE_CDB_UID uids[keys]; + SXE_CDB_HKV * hkv ; + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uids[i].as_u64.u = sxe_cdb_ensemble_put_val(cdb_ensemble, (const uint8_t *) &i, sizeof(i)); + SXEA1(SXE_CDB_UID_NONE != uids[i].as_u64.u, "ERROR: INTERNAL: sxe_cdb_ensemble_put_val() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: put-val %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + uid.as_u64.u = sxe_cdb_ensemble_get_uid(cdb_ensemble); SXEA1(uids[i].as_u64.u == uid.as_u64.u, "ERROR: INTERNAL: unexpected SXE_CDB_UID_NONE at i=%u", i); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: get-uid %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + uid.as_u64.u = uids[i].as_u64.u; + hkv = sxe_cdb_ensemble_get_uid_hkv(cdb_ensemble, uid); SXEA1(hkv, "ERROR: INTERNAL: unexpected NULL hkv at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1( sxe_cdb_tls_hkv_part.key_len == sizeof(i) , "ERROR: INTERNAL: unexpected key len at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1( sxe_cdb_tls_hkv_part.val_len == sizeof(i) , "ERROR: INTERNAL: unexpected val len at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.key ) == i , "ERROR: INTERNAL: unexpected key at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + SXEA1(*((uint32_t *) sxe_cdb_tls_hkv_part.val ) == i , "ERROR: INTERNAL: unexpected val at i=%u for uid %010lx=ii[%04x]%03x-%01x", i, uid.as_u64.u, uid.as_part.sheets_index_index, uid.as_part.row, uid.as_part.cell); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: tls-hkv %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + for (i = 0; i < instances; i++) { + SXEL5("test: ensemble: %.1fMB total memory in sheets, %.1fMB total memory in kvdata; cells used %u, size %u or %u%% full; kv used %u, size %u; instance %u, splits %u", + cdb_ensemble->cdb_instances[i]->sheets_size * SXE_CDB_SHEET_BYTES / 1024 / 1024.0, + cdb_ensemble->cdb_instances[i]->kvdata_used / 1024 / 1024.0, + cdb_ensemble->cdb_instances[i]->sheets_cells_used, + cdb_ensemble->cdb_instances[i]->sheets_cells_size, + cdb_ensemble->cdb_instances[i]->sheets_cells_used * 100 / cdb_ensemble->cdb_instances[i]->sheets_cells_size, + sxe_cdb_ensemble_kvdata_used(cdb_ensemble, i), + cdb_ensemble->cdb_instances[i]->kvdata_size, + i, + cdb_ensemble->cdb_instances[i]->sheets_split); + } + + sxe_cdb_ensemble_destroy(cdb_ensemble); + } + + diag("stress test cdb ensemble inc"); + { + uint32_t instances = 8; + SXE_CDB_ENSEMBLE * cdb_ensemble = sxe_cdb_ensemble_new(0 /* grow from minimum size */, 0 /* grow to maximum allowed size */, instances /* number of cdb instances */, 1 /* locked */); + uint32_t counts_list = 0; + uint64_t manual_count[keys]; + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + manual_count[i] = 1; + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + SXEA1(manual_count[i] == sxe_cdb_ensemble_inc(cdb_ensemble, counts_list ), "ERROR: INTERNAL: sxe_cdb_ensemble_inc() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: inc-1st %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + start_time = kit_timestamp_get(); + for (i = 0; i < keys; i++) { + manual_count[i] ++; + sxe_cdb_prepare ((const uint8_t *) &i, sizeof(i)); + SXEA1(manual_count[i] == sxe_cdb_ensemble_inc(cdb_ensemble, counts_list ), "ERROR: INTERNAL: sxe_cdb_ensemble_inc() unexpectedly failing"); + } + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: inc-2nd %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + start_time = kit_timestamp_get(); + + for (i = 0; i < keys; i++) { + uint32_t myhash[4]; + sxe_hash_128((const uint8_t *)&i, sizeof(i), (uint8_t *)&myhash[0]); + uint32_t myrandom = myhash[0] % (keys * 2 * DIV / 4096); /* incrementing deterministically smaller number of random keys will cause ->counts mremap() (and will cause code to run faster due to cache line re-usage) */ + manual_count[myrandom]++; + sxe_cdb_prepare((const uint8_t *) &myrandom, sizeof(myrandom)); + SXEA1(manual_count[myrandom] == sxe_cdb_ensemble_inc(cdb_ensemble, counts_list), "ERROR: INTERNAL: sxe_cdb_ensemble_inc() unexpectedly failing"); + } + + elapsed_time = kit_timestamp_to_double_seconds(kit_timestamp_get() - start_time); + SXEL5("test: ensemble: inc-rnd %u keys in %6.2f seconds or %8u keys per second", keys, elapsed_time, + (unsigned)(((uint64_t)keys << KIT_TIMESTAMP_BITS_IN_FRACTION) / (kit_timestamp_get() - start_time))); + + uint32_t instance; + for (instance = 0; instance < 8; instance++) { + sxe_cdb_tls_walk_cnt_pos = SXE_CDB_COUNT_NONE; + for (i = 0; i < 2; i++) { + sxe_cdb_ensemble_walk(cdb_ensemble, 1 /* hi2lo */, sxe_cdb_tls_walk_cnt_pos, sxe_cdb_tls_walk_hkv_pos, instance, counts_list); + SXEL5("test: just for fun: instance #%u: top key #%u: binary key=count; %08x=%lu", instance, i, *((uint32_t *) sxe_cdb_tls_hkv_part.key), sxe_cdb_tls_walk_count); + } + } + + for (i = 0; i < instances; i++) { + SXEL5("test: ensemble: %.1fMB total memory in sheets, %.1fMB total memory in kvdata; cells used %u, size %u or %u%% full; kv used %u, size %u; instance %u, splits %u", + cdb_ensemble->cdb_instances[i]->sheets_size * SXE_CDB_SHEET_BYTES / 1024 / 1024.0, + cdb_ensemble->cdb_instances[i]->kvdata_used / 1024 / 1024.0, + cdb_ensemble->cdb_instances[i]->sheets_cells_used, + cdb_ensemble->cdb_instances[i]->sheets_cells_size, + cdb_ensemble->cdb_instances[i]->sheets_cells_used * 100 / cdb_ensemble->cdb_instances[i]->sheets_cells_size, + cdb_ensemble->cdb_instances[i]->kvdata_used, + cdb_ensemble->cdb_instances[i]->kvdata_size, + i, + cdb_ensemble->cdb_instances[i]->sheets_split); + } + + sxe_cdb_ensemble_destroy(cdb_ensemble); + } + + sxe_cdb_finalize_thread(); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} + diff --git a/lib-sxe-dict/GNUmakefile b/lib-sxe-dict/GNUmakefile new file mode 100644 index 0000000..2e0f5f1 --- /dev/null +++ b/lib-sxe-dict/GNUmakefile @@ -0,0 +1,5 @@ +LIBRARIES = sxe-dict + +include ../dependencies.mak + +#LINK_FLAGS += -lxxhash diff --git a/lib-sxe-dict/README.md b/lib-sxe-dict/README.md new file mode 100644 index 0000000..76b8cdd --- /dev/null +++ b/lib-sxe-dict/README.md @@ -0,0 +1,128 @@ +# sxe-dict + +This module is based on hashdict.c, an MIT licensed dictionary downloaded from https://github.com/exebook/hashdict.c.git + +# Original README + +This is my REALLY FAST implementation of a hash table in C, in under 200 lines of code. + +This is in fact a port of my [hashdic][cppversion] previously written in C++ for [jslike][jslike] project (which is a `var` class making programming in C++ as easy as in JavaScript). + +MIT licensed. + +[cppversion]: https://github.com/exebook/hashdic +[jslike]: https://github.com/exebook/jslike + +For some reason it is more than twice as fast on my benchmarks as the [hash table][redisdictc] used in Redis. But unlike Redis version of a hash table there is no incremental resize. + +The hash function used is my adaptation of [Meiyan][cmp2]/7zCRC, it is [better than MurMur3][cmp1]. + +[cmp1]: https://www.strchr.com/hash_functions +[cmp2]: http://www.sanmayce.com/Fastest_Hash/ +[redisdictc]: https://github.com/antirez/redis/blob/unstable/src/dict.c + +Hash slot duplicates are stored as linked list `node = node->next`. + +## example + +```c +#include +#include + +#include "sxe-dict.h" + +int +main(void) { + struct sxe_dict *dic; + const void **value_ptr; + const void *value; + + dic = sxe_dict_new(0); + + value_ptr = sxe_dict_add(dic, "ABC", 3); + is(*value_ptr, NULL, "New entry should not have a value"); + *value_ptr = (const void *)100; + + sxe_dict_add(dic, "DE", 2); + *value_ptr = (const void *)200; + + sxe_dict_add(dic, "HJKL", 4); + *value_ptr = (const void *)300; + + if ((value = sxe_dict_find(dic, "ABC", 3))) + printf("ABC found: %"PRIiPTR"\n", (intptr_t)value); + else + printf("error\n"); + + if ((value = sxe_dict_find(dic, "DE", 2))) + printf("DE found: %"PRIiPTR"\n", (intptr_t)value); + else + printf("error\n"); + + if ((value = sxe_dict_find(dic, "HJKL", 4))) + printf("HJKL found: %"PRIiPTR"\n", (intptr_t)value); + else + printf("error\n"); + + sxe_dict_delete(dic); +} + +``` +## sxe_dict_add() + +Add a new key to the hash table. + +Unlike most of implementations, you do NOT supply the value as the argument for the `add()` function. Instead after +`sxe_dict_add()` returns value_ptr, set the value like this: `*value_ptr = `. + +`const void ** sxe_dict_add(struct sxe_dict *dic, void *key, int keyn);` + +Returns a pointer to the value, which is itself a void pointer. If the key was already in the table, the value will likely have +been set non-NULL; otherwise, it will be NULL. In both cases you can change the associated value. + +## sxe_dict_find() + +Lookup the key in the hash table. Return `true`(`1`) if found, the you can get the value like this: `myvalue = *dic->value`. + +`const void * sxe_dict_find(struct sxe_dict *dic, void *key, int keyn);` + +## sxe_dict_new() + +Create the hash table. + +`struct sxe_dict * sxe_dict_new(int initial_size);` + +Set `initial_size` to the initial size of the table. Useful when you know how much keys you will store and want to preallocate, +in which case use N/growth_treshold as the initial_size. `growth_threshold` is 2.0 by default. + +## sxe_dict_delete() + +Delete the hash table and frees all occupied memory. + +`void sxe_dict_delete(struct sxe_dict *dic);` + +## sxe_dict_forEach() + +Iterates over all keys in the table and calls the specified callback for each of them. + +`void sxe_dict_forEach(struct sxe_dict *dic, enumFunc f, void *user);` + +`typedef int (*enumFunc)(void *key, int count, const *value, void *user);` + + +## tuning + + +#### growth (resize) +`struct sxe__dict` has tuning fields: + +`growth_threshold`: when to resize, for example `0.5` means "if number of inserted keys is half of the table length then resize". Default: `2.0`; + +The original authors experiments on English dictionary shows balanced performance/memory savings with 1.0 to 2.0. + +`growth_factor`: grow the size of hash table by N. Suggested number is between 2 (conserve memory) and 10 (faster insertions). + +The key is a combination of a pointer to bytes and a count of bytes. + + + diff --git a/lib-sxe-dict/sxe-dict.c b/lib-sxe-dict/sxe-dict.c new file mode 100644 index 0000000..221b775 --- /dev/null +++ b/lib-sxe-dict/sxe-dict.c @@ -0,0 +1,310 @@ +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-dict.h" +#include "sxe-util.h" + +#define hash_func meiyan + +struct sxe_dict_node { + struct sxe_dict_node *next; + union { + char *key; + const char *key_ref; + }; + size_t len; + const void *value; +}; + +static inline uint32_t +meiyan(const char *key, size_t count) +{ + typedef const uint32_t * P; + uint32_t h = 0x811c9dc5; + + while (count >= 8) { + h = (h ^ ((((*(P)key) << 5) | ((*(P)key) >> 27)) ^ *(P)(key + 4))) * 0xad3e7; + count -= 8; + key += 8; + } + +#define tmp h = (h ^ *(const uint16_t *)key) * 0xad3e7; key += 2; + if (count & 4) { tmp tmp } + if (count & 2) { tmp } + if (count & 1) { h = (h ^ *key) * 0xad3e7; } +#undef tmp + + return h ^ (h >> 16); +} + +struct sxe_dict_node * +sxe_dict_node_new(const struct sxe_dict *dic, const char *key, size_t len) +{ + struct sxe_dict_node *node = kit_malloc(sizeof(struct sxe_dict_node)); + + if (!node) { + SXEL2(": Failed to allocate dictionary node"); /* COVERAGE EXCLUSION: Out of memory */ + return NULL; /* COVERAGE EXCLUSION: Out of memory */ + } + + if (dic->flags & SXE_DICT_FLAG_KEYS_NOCOPY) + node->key_ref = key; + else { + node->key = kit_malloc(len + (dic->flags & SXE_DICT_FLAG_KEYS_STRING ? 1 : 0)); + memcpy(node->key, key, len); + + if (dic->flags & SXE_DICT_FLAG_KEYS_STRING) + node->key[len] = '\0'; + } + + node->len = len; + node->next = 0; + node->value = NULL; + + return node; +} + +static void +sxe_dict_node_free(struct sxe_dict *dic, struct sxe_dict_node *node) +{ + struct sxe_dict_node *next = node->next; + + if (!(dic->flags & SXE_DICT_FLAG_KEYS_NOCOPY)) + kit_free(node->key); + + kit_free(node); + + if (next) + sxe_dict_node_free(dic, next); +} + +/** + * Initialize a dictionary with full control of it's properties + * + * @param dic Dictionary to initialize + * @param initial_size The initial size of the dictionary's hash table + * @param load The percentage of hash table buckets to inserted values before the table is grown + * @param growth The factor to grow by when the load is exceeded + * @param flags Either SXE_DICT_FLAG_KEYS_BINARY (exact copies), SXE_DICT_FLAG_KEYS_NOCOPY (reference) or + * SXE_DICT_FLAG_KEYS_STRING (copy with NUL termination) + * + * @return true on success, false if out of memory + */ +bool +sxe_dict_init(struct sxe_dict *dic, unsigned initial_size, unsigned load, unsigned growth, unsigned flags) +{ + dic->size = initial_size; + dic->count = 0; + dic->table = initial_size ? MOCKERROR(sxe_dict_init, NULL, ENOMEM, kit_calloc(sizeof(struct sxe_dict_node *), initial_size)) : NULL; + dic->load = load; + dic->growth = growth; + dic->flags = flags; + return initial_size == 0 || dic->table; +} + +/** + * Create a dictionary that grows at 100% (as many entries as buckets) by a factor of 2, and copies keys without NUL terminating + * + * @param initial_size The initial size of the dictionary's hash table + * + * @return The dictionary or NULL on out of memory + */ +struct sxe_dict * +sxe_dict_new(unsigned initial_size) +{ + struct sxe_dict *dic = kit_malloc(sizeof(struct sxe_dict)); + + if (!dic || MOCKERROR(sxe_dict_new, true, ENOMEM, !sxe_dict_init(dic, initial_size, 100, 2, SXE_DICT_FLAG_KEYS_BINARY))) { + SXEL2(": Failed to allocate dictionary structure or buckets"); + kit_free(dic); + return NULL; + } + + return dic; +} + +/** + * Finalize a dictionary by freeing all memory allocated to it by sxe-dict + * + * @param dic Dictionary to finalize + */ +void +sxe_dict_fini(struct sxe_dict *dic) +{ + for (unsigned i = 0; i < dic->size; i++) + if (dic->table[i]) + sxe_dict_node_free(dic, dic->table[i]); + + kit_free(dic->table); + dic->table = NULL; +} + +/** + * Free a dictionary after freeing all memory allocated to it by sxe-dict + * + * @param dic Dictionary to finalize + */ +void +sxe_dict_free(struct sxe_dict *dic) +{ + if (!dic) + return; + + sxe_dict_fini(dic); + kit_free(dic); +} + +static void +sxe_dict_reinsert_when_resizing(struct sxe_dict *dic, struct sxe_dict_node *k2) +{ + int n = hash_func(k2->key, k2->len) % dic->size; + + if (dic->table[n] == 0) { + dic->table[n] = k2; + return; + } + + struct sxe_dict_node *k = dic->table[n]; + k2->next = k; + dic->table[n] = k2; +} + +bool +sxe_dict_resize(struct sxe_dict *dic, int newsize) +{ + unsigned oldsize = dic->size; + struct sxe_dict_node **old = dic->table; + + if (!(dic->table = MOCKERROR(sxe_dict_resize, NULL, ENOMEM, kit_calloc(sizeof(struct sxe_dict_node*), newsize)))) { + SXEL2(": Failed to allocate bigger table"); + dic->table = old; + return false; + } + + dic->size = newsize; + + for (unsigned i = 0; i < oldsize; i++) { + struct sxe_dict_node *node = old[i]; + + while (node) { + struct sxe_dict_node *next = node->next; + node->next = 0; + sxe_dict_reinsert_when_resizing(dic, node); + node = next; + } + } + + kit_free(old); + return true; +} + +/** + * Add a key to a dictionary + * + * @param dic The dictionary + * @param key The key + * @param len The size of the key, or 0 if it's a string to determine its length with strlen + * + * @return A pointer to a value or NULL on out of memory + * + * @note If the caller always saves a non-NULL value in the value pointed at by the return, then if there is a collision, the + * value pointed to by the return should be something other than NULL. + */ +const void ** +sxe_dict_add(struct sxe_dict *dic, const void *key, size_t len) +{ + struct sxe_dict_node **link; + + len = len ?: strlen(key); + + if (dic->table == NULL) { // If this is a completely empty dictionary + if (!(dic->table = MOCKERROR(sxe_dict_add, NULL, ENOMEM, kit_calloc(sizeof(struct sxe_dict_node *), 1)))) { + SXEL2(": Failed to allocate initial table"); + return NULL; + } + + dic->size = 1; + } + + unsigned hash = hash_func(key, len); + unsigned bucket = hash % dic->size; + + if (dic->table[bucket] != NULL) { + unsigned load = dic->count * 100 / dic->size; + + if (load >= dic->load) + if (!sxe_dict_resize(dic, dic->size * dic->growth)) + return NULL; + + bucket = hash % dic->size; + } + + for (link = &dic->table[bucket]; *link != NULL; link = &((*link)->next)) // For each node in the bucket + if ((*link)->len == len && memcmp((*link)->key, key, len) == 0) // If the key matches + return &((*link)->value); + + if ((*link = sxe_dict_node_new(dic, key, len))) + dic->count++; + + return *link ? &((*link)->value) : NULL; +} + +/** + * Find a key in a dictionary + * + * @param dic The dictionary + * @param key The key + * @param len The size of the key, or 0 if it's a string to determine its length with strlen + * + * @return The value, or NULL if the key is not found. + */ +const void * +sxe_dict_find(const struct sxe_dict *dic, const void *key, size_t len) +{ + if (dic->table == NULL) // If the dictionary is empty and its initial_size was 0, the key is not found. + return NULL; + + len = len ?: strlen(key); + unsigned n = hash_func((const char *)key, len) % dic->size; + #if defined(__MINGW32__) || defined(__MINGW64__) + __builtin_prefetch(gc->table[n]); + #endif + + #if defined(_WIN32) || defined(_WIN64) + _mm_prefetch((char*)gc->table[n], _MM_HINT_T0); + #endif + struct sxe_dict_node *k = dic->table[n]; + + if (!k) + return NULL; + + while (k) { + if (k->len == len && memcmp(k->key, key, len) == 0) + return k->value; + + k = k->next; + } + + return NULL; +} + +void +sxe_dict_forEach(const struct sxe_dict *dic, sxe_dict_iter f, void *user) +{ + for (unsigned i = 0; i < dic->size; i++) { + if (dic->table[i] != 0) { + struct sxe_dict_node *k = dic->table[i]; + + while (k) { + if (!f(k->key, k->len, &k->value, user)) + return; + + k = k->next; + } + } + } +} + +#undef hash_func diff --git a/lib-sxe-dict/sxe-dict.h b/lib-sxe-dict/sxe-dict.h new file mode 100644 index 0000000..b3b31b8 --- /dev/null +++ b/lib-sxe-dict/sxe-dict.h @@ -0,0 +1,34 @@ +#ifndef SXE_DICT_H +#define SXE_DICT_H + +#include +#include + +#define SXE_DICT_FLAG_KEYS_BINARY 0x00000000 // Keys are exact copies (no NUL termination) +#define SXE_DICT_FLAG_KEYS_NOCOPY 0x00000001 // Don't copy key values, just save references +#define SXE_DICT_FLAG_KEYS_STRING 0x00000002 // NUL terminate copies of keys + +#define sxe_dict_delete(dic) sxe_dict_free(dic) // Deprecated name + +typedef bool (*sxe_dict_iter)(const void *key, size_t key_size, const void **value, void *user); + +struct sxe_dict_node; + +struct sxe_dict { + struct sxe_dict_node **table; // Pointer to the bucket list or NULL if the dictionary is empty + unsigned flags; // SXE_DICT_FLAG_* + unsigned size; // Number of buckets + unsigned count; // Number of entries + unsigned load; // Maximum load factor (count/size) as a percentage. 100 -> count == size + unsigned growth; // Growth factor when load exceeded. 2 is for doubling +}; + +static inline unsigned +sxe_dict_count(const struct sxe_dict *dict) +{ + return dict->count; +} + +#include "sxe-dict-proto.h" + +#endif diff --git a/lib-sxe-dict/test/test-sxe-dict.c b/lib-sxe-dict/test/test-sxe-dict.c new file mode 100644 index 0000000..dc80fb7 --- /dev/null +++ b/lib-sxe-dict/test/test-sxe-dict.c @@ -0,0 +1,100 @@ +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "kit-test.h" +#include "sxe-dict.h" + +static bool visit_all = true; + +static bool +my_visit(const void *key, size_t key_size, const void **value, void *user) +{ + is_eq(key, "longname", "Correct key"); + is(key_size, 8, "Correct size"); + is(*value, 1026, "Correct value"); + *(unsigned *)user += 1; + return visit_all; +} + +int +main(void) { + struct sxe_dict dictator[1]; + struct sxe_dict *dic; + const void **value_ptr; + const void *value; + unsigned visits = 0; + + kit_test_plan(28); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + MOCKFAIL_START_TESTS(1, sxe_dict_new); + ok(!sxe_dict_new(0), "sxe_dict_new failed to allocate"); + MOCKFAIL_END_TESTS(); + + dic = sxe_dict_new(0); + is(dic->table, NULL, "Empty dictionary has no table"); + ok((value = sxe_dict_find(dic, "ABC", 3)) == NULL, "Before adding ABC, expected NULL, found: %"PRIiPTR"\n", (intptr_t)value); + + MOCKFAIL_START_TESTS(1, sxe_dict_add); + ok(!sxe_dict_add(dic, "ABC", 3), "sxe_dict_add failed to allocate initial table"); + MOCKFAIL_END_TESTS(); + + value_ptr = sxe_dict_add(dic, "ABC", 3); + is(*value_ptr, NULL, "New entry should not have a value"); + *value_ptr = (const void *)100; + is(dic->size, 1, "Size after 1 insert is 1"); + + MOCKFAIL_START_TESTS(1, sxe_dict_resize); + ok(!sxe_dict_add(dic, "DE", 2), "sxe_dict_add failed to expand table"); + MOCKFAIL_END_TESTS(); + + value_ptr = sxe_dict_add(dic, "DE", 2); + *value_ptr = (const void *)200; + is(dic->size, 2, "Size after 2 inserts is 2"); + + value_ptr = sxe_dict_add(dic, "HJKL", 4); + *value_ptr = (const void *)300; + + /* The following is because after doubling to 2, 1 and 2 ended up in bucket 0, but 3 ends up in bucket 1. + */ + is(dic->size, 2, "Size after 3 inserts is 2"); + + ok((value = sxe_dict_find(dic, "ABC", 3)), "ABC found"); + is(value, 100, "It's value is 100"); + ok((value = sxe_dict_find(dic, "DE", 0)), "DE found (and test passing 0 length to use strlen)"); + is(value, 200, "It's value is 200"); + ok((value = sxe_dict_find(dic, "HJKL", 4)), "HJKL found"); + is(value, 300, "It's value is 300"); + + /* Make sure adding a key that's already in the table, a new entry is not added. + */ + value_ptr = sxe_dict_add(dic, "ABC", 0); + is(*value_ptr, (const void *)100, "Got expected value for 'ABC'"); + + sxe_dict_free(dic); + + ok(sxe_dict_init(dictator, 2, 100, 2, SXE_DICT_FLAG_KEYS_NOCOPY), "Constructed a dictionary that stores keys by reference"); + value_ptr = sxe_dict_add(dictator, "longname", 0); + *value_ptr = (const void *)1026; + is(sxe_dict_find(dictator, "different bucket", 0), NULL, "Looked up a non-existent key that lands in a different bucket"); + is(sxe_dict_find(dictator, "same bucket", 0), NULL, "Looked up a non-existent key that lands in the used bucket"); + + sxe_dict_forEach(dictator, my_visit, &visits); + is(visits, 1, "Visited all (1) nodes"); + + visit_all = false; + visits = 0; + sxe_dict_forEach(dictator, my_visit, &visits); + is(visits, 1, "Visited 1 node and short circuited"); + sxe_dict_fini(dictator); + + ok(sxe_dict_init(dictator, 0, 100, 2, SXE_DICT_FLAG_KEYS_STRING), "Constructed a dictionary that stores keys as strings"); + value_ptr = sxe_dict_add(dictator, "longname", 0); + *value_ptr = (const void *)1026; + sxe_dict_fini(dictator); + + sxe_dict_free(NULL); + kit_test_exit(0); +} diff --git a/lib-sxe-hash/GNUmakefile b/lib-sxe-hash/GNUmakefile new file mode 100644 index 0000000..0603b3c --- /dev/null +++ b/lib-sxe-hash/GNUmakefile @@ -0,0 +1,6 @@ +LIBRARIES = sxe-hash +include ../dependencies.mak + +ifndef SXE_DISABLE_XXHASH + LINK_FLAGS += -lxxhash +endif diff --git a/lib-sxe-hash/sxe-hash-classic.c b/lib-sxe-hash/sxe-hash-classic.c new file mode 100644 index 0000000..81147d5 --- /dev/null +++ b/lib-sxe-hash/sxe-hash-classic.c @@ -0,0 +1,121 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#include "sxe-hash-private.h" + +/** + * Allocate and contruct a hash with fixed size elements (SHA1 + unsigned) + * + * @param name = Name of the hash, used in diagnostics + * @param element_count = Maximum number of elements in the hash + * + * @return A pointer to an array of hash elements + * + * @note This hash table is not thread safe + */ +void * +sxe_hash_new(const char * name, unsigned element_count) +{ + void * array; + + SXEE6("sxe_hash_new(name=%s,element_count=%u)", name, element_count); + + array = sxe_hash_new_plus(name, element_count, sizeof(SXE_HASH_KEY_VALUE_PAIR), 0, sizeof(SXE_SHA1), SXE_HASH_OPTION_UNLOCKED); + SXER6("return array=%p", array); + return array; +} + +unsigned +sxe_hash_set(void * array, const char * sha1_as_char, unsigned sha1_key_len, unsigned value) +{ + SXE_HASH * hash = SXE_HASH_ARRAY_TO_IMPL(array); + unsigned id; + + SXE_UNUSED_PARAMETER(sha1_key_len); + + SXEE6("sxe_hash_set(hash=%p,sha1_as_char=%.*s,sha1_key_len=%u,value=%u)", hash, sha1_key_len, sha1_as_char, sha1_key_len, value); + SXEA6(sha1_key_len == SXE_HASH_SHA1_AS_HEX_LENGTH, "sha1 length is incorrect"); + + if ((id = sxe_hash_take(array)) == SXE_HASH_FULL) { + goto SXE_EARLY_OUT; + } + + SXEL7("setting key and value at index=%u", id); + sxe_hash_sha1_from_hex(&((SXE_HASH_KEY_VALUE_PAIR *)hash->pool)[id].sha1, sha1_as_char); + ((SXE_HASH_KEY_VALUE_PAIR *)hash->pool)[id].value = value; + sxe_hash_add(array, id); + +SXE_EARLY_OUT: + SXER6(id == SXE_HASH_FULL ? "%sSXE_HASH_FULL" : "%s%u", "return id=", id); + return id; +} + +enum sxe_hash_action { + SXE_HASH_ACTION_NONE, + SXE_HASH_ACTION_REMOVE, +}; + +/** + * Get the value of an element in a hash with fixed size elements (SHA1 + unsigned) by SHA1 key in hex + * Perform the given 'action' on the element. + */ +static unsigned +sxe_hash_action(void * array, const char * sha1_as_char, unsigned sha1_key_len, enum sxe_hash_action action) +{ + SXE_HASH *hash = SXE_HASH_ARRAY_TO_IMPL(array); + unsigned value = SXE_HASH_KEY_NOT_FOUND; + unsigned id; + struct SXE_HASH_SHA1 sha1; + + SXE_UNUSED_PARAMETER(sha1_key_len); + SXEE6("sxe_hash_%s(hash=%p,sha1_as_char=%.*s,sha1_key_len=%u)", action == SXE_HASH_ACTION_REMOVE ? "remove" : "get", + hash, sha1_key_len, sha1_as_char, sha1_key_len); + SXEA6(sha1_key_len == SXE_HASH_SHA1_AS_HEX_LENGTH, "sha1 length is incorrect"); + + if (sxe_hash_sha1_from_hex(&sha1, sha1_as_char) != SXE_RETURN_OK) { + goto SXE_EARLY_OUT; + } + + id = sxe_hash_look(array, &sha1); + + if (id != SXE_HASH_KEY_NOT_FOUND) { + value = ((SXE_HASH_KEY_VALUE_PAIR *)hash->pool)[id].value; + if (action == SXE_HASH_ACTION_REMOVE) + sxe_hash_give(array, id); + } + +SXE_EARLY_OUT: + SXER6("return value=%u", value); + return value; +} + +unsigned +sxe_hash_get(void *array, const char *sha1_as_char, unsigned sha1_key_len) +{ + return sxe_hash_action(array, sha1_as_char, sha1_key_len, SXE_HASH_ACTION_NONE); +} + +unsigned +sxe_hash_remove(void * array, const char * sha1_as_char, unsigned sha1_key_len) +{ + return sxe_hash_action(array, sha1_as_char, sha1_key_len, SXE_HASH_ACTION_REMOVE); +} diff --git a/lib-sxe-hash/sxe-hash-private.h b/lib-sxe-hash/sxe-hash-private.h new file mode 100644 index 0000000..59ea25d --- /dev/null +++ b/lib-sxe-hash/sxe-hash-private.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-hash.h" + +#define SXE_HASH_ARRAY_TO_IMPL(array) ((SXE_HASH *)sxe_pool_to_base(array) - 1) diff --git a/lib-sxe-hash/sxe-hash-sha1.c b/lib-sxe-hash/sxe-hash-sha1.c new file mode 100644 index 0000000..13752ee --- /dev/null +++ b/lib-sxe-hash/sxe-hash-sha1.c @@ -0,0 +1,55 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "sxe-hash.h" + +SXE_RETURN +sxe_hash_sha1_from_hex(struct SXE_HASH_SHA1 *sha1, const char *sha1_in_hex) +{ + SXE_RETURN result; + + SXEE6("(sha1=%p,sha1_in_hex='%s'", sha1, sha1_in_hex); + result = sxe_hex_to_bytes((unsigned char *)sha1, sha1_in_hex, SXE_HASH_SHA1_AS_HEX_LENGTH); + SXER6("return %s", sxe_return_to_string(result)); + return result; +} + +SXE_RETURN +sxe_hash_sha1_to_hex(const struct SXE_HASH_SHA1 *sha1, char *sha1_in_hex, unsigned sha1_in_hex_length) +{ + SXE_RETURN result = SXE_RETURN_OK; + + SXEE6("(sha1=%08x%08x%08x%08x%08x,sha1_in_hex='%p',sha1_in_hex_length='%u'", + sha1->word[4], sha1->word[3], sha1->word[2], sha1->word[1], sha1->word[0], + sha1_in_hex, sha1_in_hex_length); + SXEA1(sha1_in_hex_length >= (SXE_HASH_SHA1_AS_HEX_LENGTH + 1), "Incorrect length of char * for sxe_hash_sha1_to_hex(): '%u'", + sha1_in_hex_length); + + snprintf(sha1_in_hex, sha1_in_hex_length, "%08x%08x%08x%08x%08x", htonl(sha1->word[0]), htonl(sha1->word[1]), + htonl(sha1->word[2]), htonl(sha1->word[3]), htonl(sha1->word[4])); + SXEL6(": '%s'", sha1_in_hex); + + SXER6("return %s", sxe_return_to_string(result)); + return result; +} + diff --git a/lib-sxe-hash/sxe-hash-sum.c b/lib-sxe-hash/sxe-hash-sum.c new file mode 100644 index 0000000..1a09663 --- /dev/null +++ b/lib-sxe-hash/sxe-hash-sum.c @@ -0,0 +1,62 @@ +#include + +#include "sxe-hash.h" + +/** + * Compute a hash sum of a fixed length or NUL terminated key + * + * @param key Pointer to the key + * @param length Length of the key in bytes or 0 to use strlen + * + * @return 32 bit hash value + */ +#ifndef SXE_DISABLE_XXHASH +unsigned (*sxe_hash_sum)(const void *key, size_t length) = sxe_hash_xxh32; // XXH32 is the default hash algorithm +#else +unsigned (*sxe_hash_sum)(const void *key, size_t length) = NULL; // Build with SXE_DISABLE_XXHASH for no default +#endif + +/** + * Override the default hash sum function (xxh32) + * + * @param new_hash_sum Pointer to a function that takes a key and a length and returns an unsigned sum + * + * @note If the function is passed 0 as the length, it should use strlen to compute the length of the key + */ +SXE_HASH_FUNC +sxe_hash_override_sum(unsigned (*new_hash_sum)(const void *key, size_t length)) +{ + SXE_HASH_FUNC old_hash_sum = sxe_hash_sum; + + sxe_hash_sum = new_hash_sum; + return old_hash_sum; +} + +/** + * Compute a 128 bit hash sum of a fixed length or NUL terminated key + * + * @param key Pointer to the key + * @param length Length of the key in bytes or 0 to use strlen + * @param hash_out Pointer to an array of 16 bytes (i.e. uint8_t values) + */ +#ifndef SXE_DISABLE_XXHASH +void (*sxe_hash_128)(const void *key, size_t length, uint8_t *hash_out) = sxe_hash_xxh128; // XXH128 is the default +#else +void (*sxe_hash_128)(const void *key, size_t length, uint8_t *hash_out) = NULL; // Build w/SXE_DISABLE_XXHASH for no default +#endif + +/** + * Override the default hash sum function (xxh128) + * + * @param new_hash_128 Pointer to a function that takes a key and a length and a pointer to two uint64_t values + * + * @note If the function is passed 0 as the length, it should use strlen to compute the length of the key + */ +SXE_HASH_128_FUNC +sxe_hash_override_128(void (*new_hash_128)(const void *key, size_t length, uint8_t *hash_out)) +{ + SXE_HASH_128_FUNC old_hash_128 = sxe_hash_128; + + sxe_hash_128 = new_hash_128; + return old_hash_128; +} diff --git a/lib-sxe-hash/sxe-hash-xxh128.c b/lib-sxe-hash/sxe-hash-xxh128.c new file mode 100644 index 0000000..0dc2e67 --- /dev/null +++ b/lib-sxe-hash/sxe-hash-xxh128.c @@ -0,0 +1,40 @@ +/* Module that implements the default XXH128 hash function. Calling these functions will require the libxxhash DLL. + */ + +#ifndef SXE_DISABLE_XXHASH + +#pragma GCC diagnostic ignored "-Waggregate-return" // To allow use of XXH3_128bits, which returns a structure + +#include +#include + +#include "sxe-hash.h" + +/** + * Compute a 128 bit hash sum of a fixed length or NUL terminated key using XXH128 hash + * + * @param key Pointer to the key + * @param length Length of the key in bytes or 0 to use strlen + * @param hash_out Pointer to an array of 16 bytes (i.e. uint8_t) + */ +void +sxe_hash_xxh128(const void *key, size_t length, uint8_t *hash_out) +{ + XXH128_hash_t hash = XXH3_128bits(key, length ?: strlen(key)); + + memcpy(hash_out, &hash.low64, 8); + memcpy(hash_out + 8, &hash.high64, 8); +} + +/** + * Override the 128 bit hash sum function with xxh128 + * + * @note This restores the default 128 bit hash sum function + */ +void +sxe_hash_use_xxh128(void) +{ + sxe_hash_override_128(sxe_hash_xxh128); +} + +#endif diff --git a/lib-sxe-hash/sxe-hash-xxh32.c b/lib-sxe-hash/sxe-hash-xxh32.c new file mode 100644 index 0000000..12d59d5 --- /dev/null +++ b/lib-sxe-hash/sxe-hash-xxh32.c @@ -0,0 +1,36 @@ +/* Module that implements the default XXH32 hash function. Calling these functions will require the libxxhash DLL. + */ + +#ifndef SXE_DISABLE_XXHASH + +#include +#include + +#include "sxe-hash.h" + +/** + * Compute a hash sum of a fixed length or NUL terminated key using XXH32 hash + * + * @param key Pointer to the key + * @param length Length of the key in bytes or 0 to use strlen + * + * @return 32 bit hash value + */ +unsigned +sxe_hash_xxh32(const void *key, size_t length) +{ + return XXH32(key, length ?: strlen(key), 17); // SonarQube False Positive +} + +/** + * Override the hash sum function with xxh32 + * + * @note This restores the default hash sum function + */ +void +sxe_hash_use_xxh32(void) +{ + sxe_hash_override_sum(sxe_hash_xxh32); +} + +#endif diff --git a/lib-sxe-hash/sxe-hash.c b/lib-sxe-hash/sxe-hash.c new file mode 100644 index 0000000..21a596a --- /dev/null +++ b/lib-sxe-hash/sxe-hash.c @@ -0,0 +1,200 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "kit-alloc.h" +#include "sxe-hash-private.h" + +#define SXE_HASH_UNUSED_BUCKET 0 +#define SXE_HASH_NEW_BUCKET 1 +#define SXE_HASH_BUCKETS_RESERVED 2 + +/** + * Default hash key function; returns the first word of the key; useful when key is a SHA1 + * + * @param key = Key to hash + * @param size = Size of key to hash + * + * @return Checksum (i.e. hash value) of key + */ +static unsigned +sxe_prehashed_key_hash(const void * key, size_t size) +{ + SXEE6("sxe_prehashed_key_hash(key=%.*s, size=%zu)", (int) size, (const char *) key, size); + SXE_UNUSED_PARAMETER(size); + SXER6("return sum=%u", *(const unsigned *)key); + return *(const unsigned *)key; +} + +/** + * Allocate and contruct a hash + * + * @param name = Name of the hash, used in diagnostics + * @param element_count = Maximum number of elements in the hash + * @param element_size = Size of each element in the hash in bytes + * @param key_offset = Offset of start of key from start of element in bytes + * @param key_size = Size of the key in bytes + * @param options = SXE_HASH_OPTION_UNLOCKED | SXE_HASH_OPTION_LOCKED (single threaded or use locking) + * + SXE_HASH_OPTION_PREHASHED | SXE_HASH_OPTION_COMPUTED_HASH (key is prehashed or use XXH32) + * + * @return A pointer to an array of hash elements + */ +void * +sxe_hash_new_plus(const char * name, unsigned element_count, unsigned element_size, unsigned key_offset, unsigned key_size, + unsigned options) +{ + SXE_HASH * hash; + size_t size; + + SXEE6("sxe_hash_new_plus(name=%s,element_count=%u,element_size=%u,key_offset=%u,key_size=%u,options=%u)", name, element_count, + element_size, key_offset, key_size, options); + size = sizeof(SXE_HASH) + sxe_pool_size(element_count, element_size, element_count + SXE_HASH_BUCKETS_RESERVED); + SXEA1((hash = kit_malloc(size)) != NULL, "Unable to allocate %zu bytes of memory for hash %s", size, name); + SXEL6("Base address of hash %s = %p", name, hash); + + /* Note: hash + 1 == pool base */ + hash->pool = sxe_pool_construct(hash + 1, name, element_count, element_size, element_count + SXE_HASH_BUCKETS_RESERVED, + options & SXE_HASH_OPTION_LOCKED ? SXE_POOL_OPTION_LOCKED : 0); + hash->count = element_count; + hash->size = element_size; + hash->key_offset = key_offset; + hash->key_size = key_size; + hash->options = options; + hash->hash_key = options & SXE_HASH_OPTION_COMPUTED_HASH ? sxe_hash_sum : &sxe_prehashed_key_hash; + + SXER6("return array=%p", hash->pool); + return hash->pool; +} + +void +sxe_hash_reconstruct(void * array) +{ + SXE_HASH * hash = SXE_HASH_ARRAY_TO_IMPL(array); + SXEE6("sxe_hash_reconstruct(hash=%s)", sxe_pool_get_name(array)); + hash->pool = sxe_pool_construct(hash + 1, sxe_pool_get_name(array), + hash->count, hash->size, hash->count + SXE_HASH_BUCKETS_RESERVED, + hash->options & SXE_HASH_OPTION_LOCKED ? SXE_POOL_OPTION_LOCKED : 0); + SXER6("return"); +} + +/** + * Delete a hash created with sxe_hash_new() + * + * @param array = Pointer to the hash array + */ +void +sxe_hash_delete(void * array) +{ + SXE_HASH * hash = SXE_HASH_ARRAY_TO_IMPL(array); + SXEE6("sxe_hash_delete(hash=%s)", sxe_pool_get_name(array)); + kit_free(hash); + SXER6("return"); +} + +/** + * Take an element from the free queue of the hash + * + * @param array = Pointer to the hash array + * + * @return The index of the element or SXE_HASH_FULL if the hash is full + * + * @note The element is moved to the new queue until the caller adds it to the hash + */ +unsigned +sxe_hash_take(void * array) +{ + SXE_HASH * hash = SXE_HASH_ARRAY_TO_IMPL(array); + unsigned id; + + SXEE6("sxe_hash_take(hash=%s)", sxe_pool_get_name(array)); + id = sxe_pool_set_oldest_element_state(hash->pool, SXE_HASH_UNUSED_BUCKET, SXE_HASH_NEW_BUCKET); + + if (id == SXE_POOL_NO_INDEX) { + id = SXE_HASH_FULL; + } + + SXER6(id == SXE_HASH_FULL ? "%sSXE_HASH_FULL" : "%s%u", "return id=", id); + return id; +} + +/** + * Look for a key in the hash + * + * @param array = Pointer to the hash array + * @param key = Pointer to the key value + * + * @return Index of the element found or SXE_HASH_KEY_NOT_FOUND + */ +unsigned +sxe_hash_look(void * array, const void * key) +{ + const SXE_HASH *hash = SXE_HASH_ARRAY_TO_IMPL(array); + unsigned id = SXE_HASH_KEY_NOT_FOUND; + unsigned bucket; + SXE_POOL_WALKER walker; + + SXEE6("sxe_hash_look(hash=%s,key=%p)", sxe_pool_get_name(array), key); + bucket = hash->hash_key(key, hash->key_size) % hash->count + SXE_HASH_BUCKETS_RESERVED; + SXEL6("Looking in bucket %u", bucket); + sxe_pool_walker_construct(&walker, array, bucket); + + while ((id = sxe_pool_walker_step(&walker)) != SXE_HASH_KEY_NOT_FOUND) { + if (memcmp((char *)array + id * hash->size + hash->key_offset, key, hash->key_size) == 0) { + break; + } + } + + SXER6(id == SXE_HASH_KEY_NOT_FOUND ? "%sSXE_HASH_KEY_NOT_FOUND" : "%s%u", "return id=", id); + return id; +} + +/** + * Add an element to the hash + * + * @param array = Pointer to the hash array + * @param id = Index of the element to hash + */ +void +sxe_hash_add(void * array, unsigned id) +{ + const SXE_HASH *hash = SXE_HASH_ARRAY_TO_IMPL(array); + const void *key; + unsigned bucket; + + SXEE6("sxe_hash_add(hash=%s,id=%u)", sxe_pool_get_name(array), id); + + key = &((char *)array)[id * hash->size + hash->key_offset]; + bucket = hash->hash_key(key, hash->key_size) % hash->count + SXE_HASH_BUCKETS_RESERVED; + SXEL6("Adding element %u to bucket %u", id, bucket); + sxe_pool_set_indexed_element_state(array, id, SXE_HASH_NEW_BUCKET, bucket); + SXER6("return"); +} + +void +sxe_hash_give(void * array, unsigned id) +{ + SXE_HASH * hash = SXE_HASH_ARRAY_TO_IMPL(array); + + SXEE6("sxe_hash_give(hash=%s,id=%u)", sxe_pool_get_name(array), id); + sxe_pool_set_indexed_element_state(hash->pool, id, sxe_pool_index_to_state(array, id), SXE_HASH_UNUSED_BUCKET); + SXER6("return"); +} diff --git a/lib-sxe-hash/sxe-hash.h b/lib-sxe-hash/sxe-hash.h new file mode 100644 index 0000000..49e6041 --- /dev/null +++ b/lib-sxe-hash/sxe-hash.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_HASH_H__ +#define __SXE_HASH_H__ + +#include + +#include "sxe-pool.h" +#include "sxe-util.h" + +#define SXE_HASH_KEY_NOT_FOUND ~0U +#define SXE_HASH_FULL ~0U +#define SXE_HASH_SHA1_AS_HEX_LENGTH (2 * sizeof(struct SXE_HASH_SHA1)) + +#define SXE_HASH_OPTION_UNLOCKED 0 +#define SXE_HASH_OPTION_LOCKED SXE_BIT_OPTION(0) +#define SXE_HASH_OPTION_PREHASHED 0 +#define SXE_HASH_OPTION_COMPUTED_HASH SXE_BIT_OPTION(1) + +struct SXE_HASH_SHA1 { + uint32_t word[5]; +}; + +/* Classic hash: prehashed SHA1 key to unsigned value + */ +typedef struct SXE_HASH_KEY_VALUE_PAIR { + struct SXE_HASH_SHA1 sha1; + unsigned value; +} SXE_HASH_KEY_VALUE_PAIR; + +typedef struct SXE_HASH { + void * pool; + unsigned count; + unsigned size; + unsigned key_offset; + unsigned key_size; + unsigned options; + unsigned (* hash_key)(const void * key, size_t size); +} SXE_HASH; + +typedef unsigned (*SXE_HASH_FUNC)( const void *, size_t); // Type signature for a 32 bit hash function +typedef void (*SXE_HASH_128_FUNC)(const void *, size_t, uint8_t *); // Type signature for a 128 bit hash function + +extern uint32_t (*sxe_hash_sum)(const void *key, size_t length); +extern void (*sxe_hash_128)(const void *key, size_t length, uint8_t *hash_out); + +#include "lib-sxe-hash-proto.h" +#endif + diff --git a/lib-sxe-hash/test/test-sxe-hash-bench.c b/lib-sxe-hash/test/test-sxe-hash-bench.c new file mode 100644 index 0000000..ce9e46d --- /dev/null +++ b/lib-sxe-hash/test/test-sxe-hash-bench.c @@ -0,0 +1,59 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include /* For snprintf on Windows */ +#include + +#include "kit-timestamp.h" +#include "sxe-hash.h" + +#define KEY_SIZE 8 + +int +main(int argc, char * argv[]) +{ + struct SXE_HASH_SHA1 *hash; + kit_timestamp_t start_time; + unsigned i; + unsigned id; + char key[KEY_SIZE + 1]; + + (void)argv; + + if (argc == 1) { + fprintf(stderr, "To benchmark hash, run: build-linux-32-release/test-sxe-hash-bench with options:\n"); + fprintf(stderr, " -x = xxh32 hash of 8 byte keys\n"); + exit(0); + } + + hash = sxe_hash_new_plus("testhash", 1 << 16, KEY_SIZE, 0, 8, SXE_HASH_OPTION_UNLOCKED | SXE_HASH_OPTION_COMPUTED_HASH); + start_time = kit_timestamp_get(); + + for (i = 0; i < 10000; i++) { + id = sxe_hash_take(hash); + snprintf((char *)&hash[id], sizeof(key), "%08x", i); + sxe_hash_add(hash, id); + } + + printf("xx32h: hashed %lu 8 byte keys per second\n", KIT_TIMESTAMP_FROM_UNIX_TIME(i) / (kit_timestamp_get() - start_time)); + return 0; +} diff --git a/lib-sxe-hash/test/test-sxe-hash-distribution.c b/lib-sxe-hash/test/test-sxe-hash-distribution.c new file mode 100644 index 0000000..7223239 --- /dev/null +++ b/lib-sxe-hash/test/test-sxe-hash-distribution.c @@ -0,0 +1,71 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "sxe-hash.h" +#include "sxe-log.h" +#include "sxe-pool.h" + +#define HASH_SIZE 10000 +#define NBUCKETS (HASH_SIZE + 2) /* HASH_SIZE + SXE_HASH_BUCKET_RESERVED */ +#define MAX_ALLOWED_PER_BUCKET_INDEX 7 + +int +main(void) +{ + SXE_HASH * hash; + unsigned i; + unsigned id; + unsigned bucket; + int counter[NBUCKETS]; + + plan_tests(1); + sxe_log_set_level(SXE_LOG_LEVEL_DEBUG); + + memset(counter, 0, NBUCKETS * sizeof(int)); + hash = sxe_hash_new_plus("xxh32", HASH_SIZE, sizeof(SXE_HASH), 0, 8, SXE_HASH_OPTION_COMPUTED_HASH); + + for (i = 0; i < HASH_SIZE; i++) + { + id = sxe_hash_take(hash); + snprintf((char *)&hash[id], 9, "%08x", i); + sxe_hash_add(hash, id); + + bucket = sxe_pool_index_to_state(hash, id); + SXEA1(bucket < NBUCKETS, "Bucket index %u is out of range", bucket); + counter[bucket]++; + + if (counter[bucket] > MAX_ALLOWED_PER_BUCKET_INDEX + 1) { + diag("Count at bucket index %u is greater than %u", counter[bucket], MAX_ALLOWED_PER_BUCKET_INDEX + 1); + break; + } + } + + is(i, HASH_SIZE, "%u items xxh32 hashed and no bucket has more that %u entries", i, MAX_ALLOWED_PER_BUCKET_INDEX + 1); + sxe_hash_delete(hash); + return exit_status(); +} diff --git a/lib-sxe-hash/test/test-sxe-hash-sha1.c b/lib-sxe-hash/test/test-sxe-hash-sha1.c new file mode 100644 index 0000000..54a3c90 --- /dev/null +++ b/lib-sxe-hash/test/test-sxe-hash-sha1.c @@ -0,0 +1,64 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "sxe-hash.h" +#include "sxe-log.h" +#include "tap.h" + +#define SHA1_HEX "2ce679528627da7780f8a4fec07cb34f902468a0" + +static unsigned char sxe_sha1_expected_bytes[] = {0x2c, 0xe6, 0x79, 0x52, 0x86, 0x27, 0xda, 0x77, 0x80, 0xf8, + 0xa4, 0xfe, 0xc0, 0x7c, 0xb3, 0x4f, 0x90, 0x24, 0x68, 0xa0}; + +int +main(void) +{ + struct SXE_HASH_SHA1 sha1_expected; + struct SXE_HASH_SHA1 sha1_got; + char sha1_in_hex[SXE_HASH_SHA1_AS_HEX_LENGTH + 1]; + + plan_tests(5); + + /* from hex to bytes */ + ok(sxe_hash_sha1_from_hex(&sha1_got, "goofy goober") != SXE_RETURN_OK, "Conversion from hex 'goofy goober' to SHA1 failed"); + is(sxe_hash_sha1_from_hex(&sha1_got, SHA1_HEX), SXE_RETURN_OK, "Conversion from hex '%s' to SHA1 succeeded", SHA1_HEX); + + memcpy(&sha1_expected, sxe_sha1_expected_bytes, sizeof(sha1_expected)); + + if (memcmp(&sha1_got, &sha1_expected, sizeof(struct SXE_HASH_SHA1)) == 0) { + pass("SHA1 is as expected"); + } + else { + SXEL1("Expected:"); + SXED1(&sha1_expected, sizeof(sha1_expected)); + SXEL1("Got:"); + SXED1(&sha1_got, sizeof(sha1_got)); + fail("SHA1 is not as expected"); + } + + /* from bytes to hex */ + ok(sxe_hash_sha1_to_hex(&sha1_got, sha1_in_hex, sizeof(sha1_in_hex)) == SXE_RETURN_OK, "sha1_to_hex ran successfully"); + ok(memcmp(sha1_in_hex, SHA1_HEX, sizeof(SHA1_HEX)) == 0, "sxe_hash_sha1_to_hex is accurate"); + + return exit_status(); +} diff --git a/lib-sxe-hash/test/test-sxe-hash.c b/lib-sxe-hash/test/test-sxe-hash.c new file mode 100644 index 0000000..c0d2a22 --- /dev/null +++ b/lib-sxe-hash/test/test-sxe-hash.c @@ -0,0 +1,214 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "kit-alloc.h" +#include "sxe-hash.h" +#include "sxe-hash-private.h" +#include "sxe-log.h" + + +#define HASH_SIZE 5 + +// These two sha_keys will map to the same int_key +#define SHA1_1ST "2ce679528627da7780f8a4fec07cb34f902468a0" +#define SHA1_2ND "2ce679528627da7780f8a4fec07cb34f902464b8" + +#define SHA1_3RD "2ce679528627da7780f8a4fec07cb34f902464b7" +#define SHA1_4TH "2ce679528627da7780f8a4fec07cb34f902464b6" +#define SHA1_5TH "2CE679528627DA7780F8A4FEC07CB34F902464B5" /* Support capital hex digits too */ +#define SHA1_6TH "2CE679528627DA7780F8A4FEC07CB34F902464B4" + +#define SHA1_BAD "2CE679528627DA7780F8A4FEC07CB34F902464B" + +static const char * strings[] = {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"}; +static const unsigned strings_number = sizeof(strings) / sizeof(strings[0]); + +static void +test_hash_sha1(void) +{ + const unsigned length = SXE_HASH_SHA1_AS_HEX_LENGTH; + SXE_HASH * hash; + + hash = sxe_hash_new("test-hash", HASH_SIZE); + + is(sxe_hash_set (hash, SHA1_1ST, length, 1), 4 , "set keys 1: Inserted at index 4" ); + is(sxe_hash_get (hash, SHA1_1ST, length ), 1 , "set keys 1: Got correct value for first sha" ); + is(sxe_hash_get (hash, SHA1_2ND, length ), SXE_HASH_KEY_NOT_FOUND, "set keys 1: Second sha is not in hash pool yet" ); + is(sxe_hash_get (hash, SHA1_3RD, length ), SXE_HASH_KEY_NOT_FOUND, "set keys 1: Third sha is not in hash pool yet" ); + + is(sxe_hash_set (hash, SHA1_2ND, length, 2), 3 , "set keys 2: Inserted at index 3" ); + is(sxe_hash_get (hash, SHA1_1ST, length ), 1 , "set keys 2: Still got correct value for first sha" ); + is(sxe_hash_get (hash, SHA1_2ND, length ), 2 , "set keys 2: Got correct value for second sha" ); + is(sxe_hash_get (hash, SHA1_3RD, length ), SXE_HASH_KEY_NOT_FOUND, "set keys 2: Third sha is not in hash pool yet" ); + + is(sxe_hash_set (hash, SHA1_3RD, length, 3), 2 , "set keys 3: Inserted at index 2" ); + is(sxe_hash_get (hash, SHA1_1ST, length ), 1 , "set keys 3: Still got correct value for first sha" ); + is(sxe_hash_get (hash, SHA1_2ND, length ), 2 , "set keys 3: Still got correct value for second sha"); + is(sxe_hash_get (hash, SHA1_3RD, length ), 3 , "set keys 3: Got correct value for third sha" ); + + is(sxe_hash_set (hash, SHA1_4TH, length, 4), 1 , "insert too many keys: Inserted at index 1" ); + is(sxe_hash_set (hash, SHA1_5TH, length, 5), 0 , "insert too many keys: Inserted at index 0" ); + is(sxe_hash_set (hash, SHA1_6TH, length, 6), SXE_HASH_FULL , "insert too many keys: Failed to insert key" ); + + is(sxe_hash_remove(hash, SHA1_1ST, length ), 1 , "remove keys: Remove returns the correct value" ); + is(sxe_hash_remove(hash, SHA1_2ND, length ), 2 , "remove keys: Remove returns the correct value" ); + is(sxe_hash_get (hash, SHA1_1ST, length ), SXE_HASH_KEY_NOT_FOUND, "remove keys: First sha has been deleted" ); + is(sxe_hash_get (hash, SHA1_2ND, length ), SXE_HASH_KEY_NOT_FOUND, "remove keys: Second sha has been deleted" ); + is(sxe_hash_get (hash, SHA1_3RD, length ), 3 , "remove keys: Still got correct value for third sha"); + + is(sxe_hash_remove(hash, SHA1_1ST, length ), SXE_HASH_KEY_NOT_FOUND, "remove non-existent key: returns expected value" ); + + /* for coverage */ + is(sxe_hash_get(hash, SHA1_BAD, length ), SXE_HASH_KEY_NOT_FOUND, "get bad key: returns expected value" ); + is(sxe_hash_remove(hash, SHA1_BAD, length ), SXE_HASH_KEY_NOT_FOUND, "remove bad key: returns expected value" ); + + sxe_hash_delete(hash); /* for coverage */ +} + +typedef struct TEST_HASH_STRING_PAYLOAD +{ + char value[8]; +} TEST_HASH_STRING_PAYLOAD; + +static void +test_hash_variable_data(void) +{ + TEST_HASH_STRING_PAYLOAD * hash; + unsigned i; + unsigned id; + + hash = sxe_hash_new_plus("test_hash_variable_data", 10, sizeof(TEST_HASH_STRING_PAYLOAD), 0, + sizeof(TEST_HASH_STRING_PAYLOAD), + SXE_HASH_OPTION_UNLOCKED | SXE_HASH_OPTION_COMPUTED_HASH); + + for (i = 0; i < strings_number; i++) { + ok((id = sxe_hash_take(hash)) != SXE_HASH_FULL, "Allocated index %u for element %u of %u", id, i + 1, strings_number); + strlcpy(hash[id].value, strings[i], sizeof(hash[id].value)); + sxe_hash_add(hash, id); + } + + is(sxe_hash_take(hash), SXE_HASH_FULL, "Hash table is full"); + ok(sxe_hash_look(hash, "one\0\0\0\0") != SXE_HASH_KEY_NOT_FOUND, "'one\\0\\0\\0\\0\\0' found in table"); + + /* One of these must search a non-empty bucket to ensure full coverage + */ + is(sxe_hash_look(hash, "eleven\0"), SXE_HASH_KEY_NOT_FOUND, "'eleven\\0\\0' correctly not found in table"); + is(sxe_hash_look(hash, "twelve\0"), SXE_HASH_KEY_NOT_FOUND, "'twelve\\0\\0' correctly not found in table"); + + sxe_hash_delete(hash); /* for coverage */ +} + +static void +test_hash_sha1_reconstruct(void) +{ + const unsigned length = SXE_HASH_SHA1_AS_HEX_LENGTH; + SXE_HASH * hash; + + hash = sxe_hash_new("test-hash-reconstruct", HASH_SIZE); + + is(sxe_hash_set (hash, SHA1_1ST, length, 1), 4 , "set keys 1: Inserted at index 4" ); + is(sxe_hash_set (hash, SHA1_2ND, length, 2), 3 , "set keys 2: Inserted at index 3" ); + is(sxe_hash_set (hash, SHA1_3RD, length, 3), 2 , "set keys 3: Inserted at index 2" ); + is(sxe_hash_set (hash, SHA1_4TH, length, 4), 1 , "set keys 4: Inserted at index 1" ); + is(sxe_hash_set (hash, SHA1_5TH, length, 5), 0 , "set keys 5: Inserted at index 0" ); + + /* Reconstruct the hash, reuse the memory block in fact */ + sxe_hash_reconstruct(hash); + + /* Insert only two entries first */ + is(sxe_hash_set (hash, SHA1_2ND, length, 2), 4 , "set keys 2: Inserted at index 4" ); + is(sxe_hash_set (hash, SHA1_4TH, length, 4), 3 , "set keys 4: Inserted at index 3" ); + + is(sxe_hash_get (hash, SHA1_1ST, length ), SXE_HASH_KEY_NOT_FOUND, "reconstruct keys: First sha not available yet" ); + is(sxe_hash_get (hash, SHA1_3RD, length ), SXE_HASH_KEY_NOT_FOUND, "reconstruct keys: Third sha not available yet" ); + is(sxe_hash_get (hash, SHA1_5TH, length ), SXE_HASH_KEY_NOT_FOUND, "reconstruct keys: Fifth sha not available yet" ); + is(sxe_hash_get (hash, SHA1_2ND, length ), 2 , "reconstruct keys: Second sha is correct" ); + is(sxe_hash_get (hash, SHA1_4TH, length ), 4 , "reconstruct keys: Fourth sha is correct" ); + + /* Insert the others */ + is(sxe_hash_set (hash, SHA1_3RD, length, 7), 2 , "set keys 3: Inserted at index 2" ); + is(sxe_hash_set (hash, SHA1_5TH, length, 8), 1 , "set keys 5: Inserted at index 1" ); + is(sxe_hash_set (hash, SHA1_1ST, length, 9), 0 , "set keys 1: Inserted at index 0" ); + is(sxe_hash_set (hash, SHA1_6TH, length, 6), SXE_HASH_FULL , "insert too many keys: Failed to insert key" ); + + is(sxe_hash_get (hash, SHA1_3RD, length ), 7 , "reconstruct keys: Third sha is correct" ); + is(sxe_hash_get (hash, SHA1_1ST, length ), 9 , "reconstruct keys: First sha is correct" ); + is(sxe_hash_get (hash, SHA1_5TH, length ), 8 , "reconstruct keys: Fifth sha is correct" ); + + sxe_hash_delete(hash); /* for coverage */ +} + +static unsigned +my_hash_sum(const void *key, size_t length) +{ + unsigned result = 0; + + memcpy(&result, key, length > sizeof(result) ? sizeof(result) : length); + return result; +} + +static void +my_hash_128(const void *key, size_t length, uint8_t *hash_out) +{ + memset(hash_out, 0, 16); + memcpy(hash_out, key, length > 16 ? 16 : length); +} + +static void +test_hash_override_sum(void) +{ + unsigned default_sum = sxe_hash_sum("Hello, world.", 0); + uint8_t default_128[16], my_128[16]; + + sxe_hash_override_sum(my_hash_sum); + ok(default_sum != sxe_hash_sum("Hello, world.", 0), "After overriding to my_hash_sum, sum is different"); + sxe_hash_use_xxh32(); + is(sxe_hash_sum("Hello, world.", 0), default_sum, "After setting back to xxh32, sum is the same"); + + sxe_hash_128("Hello, world.", 0, default_128); + sxe_hash_override_128(my_hash_128); + sxe_hash_128("Hello, world.", 0, my_128); + ok(memcmp(default_128, my_128, sizeof(default_128)), "After overriding to my_hash_128, sum is different"); + sxe_hash_use_xxh128(); + sxe_hash_128("Hello, world.", 0, my_128); + ok(memcmp(default_128, my_128, sizeof(default_128)) == 0, "After setting back to xxh128, sum is the same"); +} + +int +main(void) +{ + plan_tests(61); + + uint64_t start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + test_hash_sha1(); + test_hash_variable_data(); + test_hash_sha1_reconstruct(); + test_hash_override_sum(); + + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/GNUmakefile b/lib-sxe-jitson/GNUmakefile new file mode 100644 index 0000000..504916c --- /dev/null +++ b/lib-sxe-jitson/GNUmakefile @@ -0,0 +1,5 @@ +LIBRARIES = sxe-jitson + +include ../dependencies.mak + +LINK_FLAGS += -lxxhash diff --git a/lib-sxe-jitson/sxe-factory.c b/lib-sxe-jitson/sxe-factory.c new file mode 100644 index 0000000..b9d9c90 --- /dev/null +++ b/lib-sxe-jitson/sxe-factory.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-factory.h" + +/** + * Contruct a new factory that uses allocated memory + * + * @param minsize Minimum bytes of space allocated to the factory + * @param maxincr Once the data space reaches this size, it will be increased by this amount + */ +void +sxe_factory_alloc_make(struct sxe_factory *factory, size_t minsize, size_t maxincr) +{ + factory->len = 0; + factory->size = minsize ?: 8; + factory->maxincr = maxincr ?: 4096; + factory->data = NULL; +} + +/** + * Reserve space in a factory. Reserved space always immediately follows the space already used. + * + * @param factory The factory + * @param len The amount of space to reserve + * + * @return A pointer to the reserved space or NULL on allocation failure + */ +char * +sxe_factory_reserve(struct sxe_factory *factory, size_t len) +{ + char *newdata; + size_t needed = factory->len + len + 1; + + if (needed > factory->size || factory->data == NULL) { + size_t newsize = factory->size; + + while (newsize < needed) + newsize = newsize >= factory->maxincr ? newsize + factory->maxincr : newsize << 1; + + if ((newdata = MOCKERROR(sxe_factory_reserve, NULL, ENOMEM, kit_realloc(factory->data, newsize))) == NULL) + return NULL; + + factory->data = newdata; + factory->size = newsize; + } + + return &factory->data[factory->len]; +} + +/** + * Commit data to a factory. Data must be stored in reserved space prior to commiting. + * + * @param factory The factory + * @param len The amount of data to commit + */ +void +sxe_factory_commit(struct sxe_factory *factory, size_t len) +{ + factory->len += len; + factory->data[factory->len] = '\0'; +} + +/** + * Add data to a factory. + * + * @param factory The factory + * @param data Data to be added + * @param len The amount of data to add or 0 to use the strlen of the data + * + * @return Amount of data added or -1 on out of memory + */ +ssize_t +sxe_factory_add(struct sxe_factory *factory, const char *data, size_t len) +{ + char *reservation; + + if (!(reservation = sxe_factory_reserve(factory, len = len ?: strlen(data)))) // SonarQube False Positive + return -1; + + memcpy(reservation, data, len); + sxe_factory_commit(factory, len); + return len; +} + +/** + * Look at the current data in a factory + * + * @param factory The factory + * @param len_out Pointer to a variable in which to save the length or NULL. + * + * @return The current data or NULL if no data has been allocated + */ +void * +sxe_factory_look(struct sxe_factory *factory, size_t *len_out) +{ + if (len_out) + *len_out = factory->len; + + return factory->data; +} + +/** + * Remove the current data from a factory + * + * @param factory The factory + * @param len_out Pointer to a variable in which to save the length or NULL. + * + * @return The data removed from the factory or NULL if no data has been allocated + */ +void * +sxe_factory_remove(struct sxe_factory *factory, size_t *len_out) +{ + char *data = sxe_factory_look(factory, len_out); + + if (factory->len + 1 < factory->size) + data = kit_realloc(factory->data, factory->len + 1) ?: factory->data; + + factory->data = NULL; + factory->len = 0; + return data; +} diff --git a/lib-sxe-jitson/sxe-factory.h b/lib-sxe-jitson/sxe-factory.h new file mode 100644 index 0000000..56b588d --- /dev/null +++ b/lib-sxe-jitson/sxe-factory.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SXE_FACTORY_H +#define SXE_FACTORY_H + +#include + +/* Memory allocating factory + */ +struct sxe_factory { + size_t len; + size_t size; + size_t maxincr; + char *data; +}; + +#include "sxe-factory-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-const.c b/lib-sxe-jitson/sxe-jitson-const.c new file mode 100644 index 0000000..82d5350 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-const.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2023 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "sxe-jitson-const.h" +#include "sxe-dict.h" + +uint32_t sxe_jitson_const_type_cast = SXE_JITSON_TYPE_INVALID; // Used by the parser to detect a cast operator + +static const struct sxe_jitson *jitson_constants = NULL; +static struct sxe_dict *jitson_builtin_symbols = NULL; +static struct sxe_dict *jitson_all_symbols = NULL; +static struct sxe_jitson json_builtins[3]; + +/* Internal function called by sxe_jitson_initialize to create the symbol table for JSON builtins + */ +void +sxe_jitson_builtins_initialize_private(void) +{ + const void **value_ptr; + + SXEA1(!jitson_builtin_symbols, "Can't initialize jitson builtins twice"); + SXEA1(jitson_builtin_symbols = sxe_dict_new(3), "Failed to allocate symbol table for JSON builtins"); + + /* Construct JSON builtin values + */ + sxe_jitson_make_bool(&json_builtins[0], true); + sxe_jitson_make_bool(&json_builtins[1], false); + sxe_jitson_make_null(&json_builtins[2]); + + value_ptr = sxe_dict_add(jitson_builtin_symbols, "true", sizeof("true") - 1); + *value_ptr = &json_builtins[0]; + value_ptr = sxe_dict_add(jitson_builtin_symbols, "false", sizeof("false") - 1); + *value_ptr = &json_builtins[1]; + value_ptr = sxe_dict_add(jitson_builtin_symbols, "null", sizeof("null") - 1); + *value_ptr = &json_builtins[2]; + + jitson_all_symbols = jitson_builtin_symbols; // Until sxe_jitson_const_initialize is called +} + +/* Internal function called by sxe_jitson_finalize to free space used for JSON builtins + */ +void +sxe_jitson_builtins_finalize_private(void) +{ + if (jitson_all_symbols == jitson_builtin_symbols) + jitson_all_symbols = NULL; + + sxe_dict_free(jitson_builtin_symbols); + jitson_builtin_symbols = NULL; +} + +/** + * Initialize the constants module + * + * @param constants A set of identifiers to be replaced with constant jitson values when parsing or NULL for JSON builtins only. + * + * @note Values are duplicated in the parsed jitson. If you want to include a large object or array, consider making the value + * a reference to it if it may appear more than once in the parsed JSON, but then beware the lifetime of the referenced + * object or array. + */ +void +sxe_jitson_const_initialize(const struct sxe_jitson *constants) +{ + const void **value_ptr; + const char *name; + size_t len, num_const; + unsigned i; + + SXEA1(jitson_builtin_symbols, "Can't initialize jitson constants before calling sxe_jitson_initialize"); + SXEA1(jitson_all_symbols == jitson_builtin_symbols, "Can't initialize jitson constants twice"); + + jitson_constants = constants; + sxe_jitson_flags |= SXE_JITSON_FLAG_ALLOW_CONSTS; + num_const = constants ? sxe_jitson_len(constants) : 0; + SXEA1(jitson_all_symbols = sxe_dict_new(3 + num_const), "Failed to allocate symbol table for JSON builtins and constants"); + + value_ptr = sxe_dict_add(jitson_all_symbols, "true", sizeof("true") - 1); + *value_ptr = &json_builtins[0]; + value_ptr = sxe_dict_add(jitson_all_symbols, "false", sizeof("false") - 1); + *value_ptr = &json_builtins[1]; + value_ptr = sxe_dict_add(jitson_all_symbols, "null", sizeof("null") - 1); + *value_ptr = &json_builtins[2]; + + for (constants++, i = 0; i < num_const; i++) { + name = sxe_jitson_get_string(constants, &len); + constants += sxe_jitson_size(constants); + value_ptr = sxe_dict_add(jitson_all_symbols, name, len); + *value_ptr = constants; + constants += sxe_jitson_size(constants); + } + + if (sxe_jitson_const_type_cast == SXE_JITSON_TYPE_INVALID) // Create the cast pseudo type if not already done + sxe_jitson_const_type_cast = sxe_jitson_type_register("cast", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} + +void +sxe_jitson_const_finalize(void) +{ + SXEA1(jitson_all_symbols && jitson_all_symbols != jitson_builtin_symbols, + "Can't finalize constants if they weren't initialized"); + sxe_jitson_free(jitson_constants); + sxe_dict_free(jitson_all_symbols); + jitson_all_symbols = jitson_builtin_symbols; // Revert back to the JSON builtins +} + +/** + * Register a type cast operator + * + * @param cast A jitson used to hold the cast function for the parser + * @param type Name of the type + * @param func The cast function to be applied to the JSON that follows the operator + */ +void +sxe_jitson_const_register_cast(struct sxe_jitson *cast, const char *type, sxe_jitson_castfunc_t func) +{ + const void **value_ptr; + + SXEA1(sxe_jitson_const_type_cast != SXE_JITSON_TYPE_INVALID, + "sxe_jitson_const_initialize must be called before sxe_jitson_const_register_cast"); + cast->type = sxe_jitson_const_type_cast; + cast->castfunc = func; + value_ptr = sxe_dict_add(jitson_all_symbols, type, strlen(type)); // SonarQube False Positive + SXEA1(!*value_ptr, "Attempt to reregister cast for type '%s'", type); + *value_ptr = cast; +} + +/** + * Look up a name and return the matching builtin or constant value + * + * @param flags If SXE_JITSON_FLAG_ALLOW_CONSTS is set, return constants from initialization, else return JSON builtins only + * @param name The name of the builtin or constant + * @param len The length of the name + * + * @return The constant value, or NULL if not found + */ +const struct sxe_jitson * +sxe_jitson_const_get(unsigned flags, const char *name, size_t len) +{ + return sxe_dict_find(flags & SXE_JITSON_FLAG_ALLOW_CONSTS ? jitson_all_symbols : jitson_builtin_symbols, name, len); +} + diff --git a/lib-sxe-jitson/sxe-jitson-const.h b/lib-sxe-jitson/sxe-jitson-const.h new file mode 100644 index 0000000..3ecfe4d --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-const.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef SXE_JITSON_CONST_H +#define SXE_JITSON_CONST_H + +#include "sxe-jitson.h" + +extern uint32_t sxe_jitson_const_type_cast; + +#include "sxe-jitson-const-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-ident.c b/lib-sxe-jitson/sxe-jitson-ident.c new file mode 100644 index 0000000..88b125f --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-ident.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "sxe-jitson-ident.h" + +uint32_t SXE_JITSON_TYPE_IDENT = SXE_JITSON_TYPE_INVALID; // Need to register the type to get a valid value + +static const struct sxe_jitson * (*sxe_jitson_ident_lookup)(const char *name, size_t len) = NULL; // name -> value function + +/* Hook the identifier lookup function, returning the previous function or NULL if no previous function was set + */ +sxe_jitson_ident_lookup_t +sxe_jitson_ident_lookup_hook(const struct sxe_jitson *(*my_ident_lookup)(const char *, size_t)) +{ + sxe_jitson_ident_lookup_t prev = sxe_jitson_ident_lookup; + + sxe_jitson_ident_lookup = my_ident_lookup; + return prev; +} + +/** + * Called back from sxe-jitson-stack when an unrecognized identifier has been found + */ +bool +sxe_jitson_ident_push_stack(struct sxe_jitson_stack *stack, const char *ident, size_t len) +{ + unsigned idx; + + SXEA6((unsigned)((SXE_JITSON_STRING_SIZE + len) / SXE_JITSON_TOKEN_SIZE) + == (SXE_JITSON_STRING_SIZE + len) / SXE_JITSON_TOKEN_SIZE, "String length overflows the maximum jitson count"); + + /* Reserve space for the fixed header, the identifier and the trailing '\0', rounded up to the nearest jitson + */ + if ((idx = sxe_jitson_stack_expand(stack, (unsigned)((SXE_JITSON_STRING_SIZE + len) / SXE_JITSON_TOKEN_SIZE) + 1)) + == SXE_JITSON_STACK_ERROR) + return false; + + stack->jitsons[idx].type = SXE_JITSON_TYPE_IDENT; + stack->jitsons[idx].len = (uint32_t)len; + memcpy(&stack->jitsons[idx].string, ident, len); + stack->jitsons[idx].string[len] = '\0'; + return true; +} + +static const struct sxe_jitson * +sxe_jitson_ident_get_value(const struct sxe_jitson *jitson) +{ + const char *ident; + size_t len; + + if (!sxe_jitson_ident_lookup) { + SXEL2(": No lookup function registered"); + return NULL; + } + + ident = sxe_jitson_ident_get_name(jitson, &len); + return sxe_jitson_ident_lookup(ident, len); +} + +static int +sxe_jitson_ident_test(const struct sxe_jitson *jitson) +{ + return (jitson = sxe_jitson_ident_get_value(jitson)) ? sxe_jitson_test(jitson) : SXE_JITSON_TEST_ERROR; +} + +/* Identifiers are stored like strings: Up to 8 chars in the first jitson, then up to 16 in each subsequent jitson. + */ +static uint32_t +sxe_jitson_ident_size(const struct sxe_jitson *jitson) +{ + return 1 + (jitson->len + SXE_JITSON_STRING_SIZE) / SXE_JITSON_TOKEN_SIZE; +} + +//static size_t +//sxe_jitson_ident_len(const struct sxe_jitson *jitson) +//{ +// return types[sxe_jitson_get_type(jitson->jitref)].len(jitson->jitref); +//} + +static char * +sxe_jitson_ident_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + char *buf; + size_t len = strlen(jitson->string); // SonarQube False Positive + + if (!(buf = sxe_factory_reserve(factory, len))) + return NULL; + + memcpy(buf, jitson->string, len); + sxe_factory_commit(factory, len); + return sxe_factory_look(factory, NULL); +} + +/* Private hook into the jitson parser + */ +extern bool (*sxe_jitson_stack_push_ident)(struct sxe_jitson_stack *stack, const char *ident, size_t len); + +/* Call at initialization after sxe_jitson_type_init to register the identifier type + */ +uint32_t +sxe_jitson_ident_register(void) +{ + if (SXE_JITSON_TYPE_IDENT != SXE_JITSON_TYPE_INVALID) // Already registered + return SXE_JITSON_TYPE_IDENT; + + SXE_JITSON_TYPE_IDENT = sxe_jitson_type_register("identifier", sxe_jitson_free_base, sxe_jitson_ident_test, + sxe_jitson_ident_size, NULL, NULL, sxe_jitson_ident_build_json, NULL, NULL); + sxe_jitson_stack_push_ident = sxe_jitson_ident_push_stack; // Hook in to the parser + sxe_jitson_flags |= SXE_JITSON_FLAG_ALLOW_IDENTS; + return SXE_JITSON_TYPE_IDENT; +} + +const char * +sxe_jitson_ident_get_name(const struct sxe_jitson *ident, size_t *len_out) +{ + if (len_out) + *len_out = ident->len; + + return ident->string; +} diff --git a/lib-sxe-jitson/sxe-jitson-ident.h b/lib-sxe-jitson/sxe-jitson-ident.h new file mode 100644 index 0000000..57dbb68 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-ident.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef SXE_JITSON_IDENT_H +#define SXE_JITSON_IDENT_H + +#include "sxe-jitson.h" + +typedef const struct sxe_jitson *(*sxe_jitson_ident_lookup_t)(const char *, size_t); // Prototype of a lookup function + +extern uint32_t SXE_JITSON_TYPE_IDENT; + +#include "sxe-jitson-ident-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-in.c b/lib-sxe-jitson/sxe-jitson-in.c new file mode 100644 index 0000000..5ba5ded --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-in.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "kit-sortedarray.h" +#include "sxe-jitson-in.h" +#include "sxe-jitson-oper.h" + +unsigned sxe_jitson_oper_in = 0; + +static __thread const struct sxe_jitson *array; // Pointer to indexed array + +/* Compare a value to an indexed array element + */ +static int +compare_value_to_element(const void *void_value, const void *void_element_offset) +{ + const struct sxe_jitson *value = void_value; + const struct sxe_jitson *element = &array[*(const uint32_t *)void_element_offset]; + + return sxe_jitson_cmp(value, element); +} + +/* Implementation of IN for sxe-jitson arrays + */ +const struct sxe_jitson * +sxe_jitson_in_array(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + struct kit_sortedarray_class elem_type; + const struct sxe_jitson *element, *result; + size_t len; + unsigned i; + uint32_t left_type; + + SXEA6(sxe_jitson_get_type(right) == SXE_JITSON_TYPE_ARRAY, + "Right hand side of an array IN expression cannot be JSON type %s", sxe_jitson_get_type_as_str(right)); + + if (right->len == 0) + return sxe_jitson_null; + + left_type = sxe_jitson_get_type(left); + + /* If the array is ordered and of one homogenous type and the type being looked up is the type of the array, use bsearch for + * O(log n). + */ + if ((right->type & SXE_JITSON_TYPE_IS_ORD) && (right->type & SXE_JITSON_TYPE_IS_HOMO) + && left_type == sxe_jitson_get_type(&right[1])) { + unsigned idx; + bool match; + + elem_type.keyoffset = 0; + elem_type.fmt = NULL; + elem_type.flags = KIT_SORTEDARRAY_CMP_CAN_FAIL; + + if (right->type & SXE_JITSON_TYPE_IS_UNIF) { // If array is of uniform elements, it's simple + elem_type.size = right->uniform.size; + elem_type.cmp = (int (*)(const void *, const void *))sxe_jitson_cmp; + idx = kit_sortedarray_find(&elem_type, &right[1], right->len, left, &match); + } + else { + if (!(right->type & SXE_JITSON_TYPE_INDEXED)) // If the array needs indexing + sxe_jitson_array_get_element(right, 0); + + array = right; + elem_type.size = sizeof(left->index[0]); + elem_type.cmp = compare_value_to_element; + idx = kit_sortedarray_find(&elem_type, array->index, array->len, left, &match); + } + + if (idx == ~0U) // Error occurred in find + return NULL; + + return match ? sxe_jitson_true : sxe_jitson_null; + } + + for (i = 0, len = sxe_jitson_len(right); i < len; i++) { // For each element in the array + element = sxe_jitson_array_get_element(right, i); + + if (left_type == sxe_jitson_get_type(element)) { + if (sxe_jitson_eq(left, element) == SXE_JITSON_TEST_TRUE) + return sxe_jitson_true; // Return true so false values don't cause false negatives + } + + /* If not the same type, look for the LHS value in the element (transitive IN operation) + */ + else if ((result = sxe_jitson_in(left, element)) && sxe_jitson_get_type(result) != SXE_JITSON_TYPE_NULL) + return element; // Safe to return, because a containing value cannot test false + } + + return sxe_jitson_null; +} + +/* Default implementation of IN for sxe-jitson standard types + */ +static const struct sxe_jitson * +sxe_jitson_in_default(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + const char *string; + size_t len; + uint32_t json_type; + + switch (json_type = sxe_jitson_get_type(right)) { + case SXE_JITSON_TYPE_OBJECT: + case SXE_JITSON_TYPE_STRING: + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_STRING) { + SXEL2(": invalid check for a JSON value of type %s in a%s", sxe_jitson_type_to_str(sxe_jitson_get_type(left)), + json_type == SXE_JITSON_TYPE_OBJECT ? "n object" : " string"); + return NULL; + } + + /* Only for objects, the value is returned and may be 0 or "", which are treated as false. This is done so that IN can + * be used as a safe [] operator. If you must test whether a key is in an object, use '(key IN object) != null' + */ + if (json_type == SXE_JITSON_TYPE_OBJECT) { + string = sxe_jitson_get_string(left, &len); + return sxe_jitson_object_get_member(right, string, len) ?: sxe_jitson_null; + } + + string = strstr(sxe_jitson_get_string(right, NULL), sxe_jitson_get_string(left, NULL)); + return string ? sxe_jitson_true : sxe_jitson_null; + + case SXE_JITSON_TYPE_NULL: // Allow "element IN (member IN object)" when member IN object is NULL. + return sxe_jitson_null; + + default: + SXEL2(": invalid check for inclusion in a JSON value of type %s", sxe_jitson_type_to_str(json_type)); + return NULL; + } +} + +/* Initialize the SXE in operator + */ +void +sxe_jitson_in_init(void) +{ + union sxe_jitson_oper_func func_default = {.binary = sxe_jitson_in_default}, + func_array = {.binary = sxe_jitson_in_array}; + + sxe_jitson_oper_in = sxe_jitson_oper_register("IN", SXE_JITSON_OPER_BINARY | SXE_JITSON_OPER_TYPE_RIGHT, func_default); + sxe_jitson_oper_add_to_type(sxe_jitson_oper_in, SXE_JITSON_TYPE_ARRAY, func_array); +} + +/** + * Determine whether one json value is in another + * + * @param left The key or value to be checked for + * @param right The value to be looked in + * + * @return The value found or sxe_jitson_bool_true if found, sxe_jitson_null if not, or NULL on error + * + * @note The value found may test as false (e.g. '"a" IN {"a":0}'); CRL will only treat a null return from IN as false, + * but if you save the result of an IN operation to a variable, be aware that it might have a value like 0 or "". + */ +const struct sxe_jitson * +sxe_jitson_in(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + SXEA6(sxe_jitson_oper_in, "sxe_jitson_in_init has not been called"); + return sxe_jitson_oper_apply_binary(left, sxe_jitson_oper_in, right); +} + + diff --git a/lib-sxe-jitson/sxe-jitson-in.h b/lib-sxe-jitson/sxe-jitson-in.h new file mode 100644 index 0000000..10704b4 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-in.h @@ -0,0 +1,10 @@ +#ifndef SXE_JITSON_IN_H +#define SXE_JITSON_IN_H + +#include + +extern unsigned sxe_jitson_oper_in; + +#include "sxe-jitson-in-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-intersect.c b/lib-sxe-jitson/sxe-jitson-intersect.c new file mode 100644 index 0000000..acf1a87 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-intersect.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include "kit-mockfail.h" +#include "kit-sortedarray.h" +#include "sxe-jitson-in.h" +#include "sxe-jitson-intersect.h" +#include "sxe-jitson-oper.h" + +unsigned sxe_jitson_oper_intersect = 0; +unsigned sxe_jitson_oper_intersect_test = 0; + +static __thread const struct sxe_jitson *array_left; // The indexed array in which the visited elements are found +static __thread const struct sxe_jitson *array_right; // The indexed array being intersected with it +static __thread struct sxe_jitson_stack iou; + +static int +indexed_element_cmp(const void *void_left, const void *void_right) +{ + const struct sxe_jitson *elem_left = &array_left[ *(const unsigned *)void_left]; + const struct sxe_jitson *elem_right = &array_right[*(const unsigned *)void_right]; + + return sxe_jitson_cmp(elem_left, elem_right); +} + +static inline bool +jitson_stack_add(struct sxe_jitson_stack *stack, const struct sxe_jitson *element) +{ + return sxe_jitson_size(element) == 1 ? sxe_jitson_stack_add_dup(stack, element) : sxe_jitson_stack_add_reference(stack, element); +} + +static bool +intersect_add_indexed_element(void *void_stack, const void *void_element) +{ + struct sxe_jitson_stack *stack = void_stack; + const struct sxe_jitson *element = &array_left[*(const unsigned *)void_element]; + + return MOCKERROR(SXE_JITSON_INTERSECT_ADD_INDEXED, false, ENOMEM, jitson_stack_add(stack, element)); +} + +static bool +intersect_add_element(void *void_stack, const void *element) +{ + struct sxe_jitson_stack *stack = void_stack; + + return MOCKERROR(SXE_JITSON_INTERSECT_ADD, false, ENOMEM, jitson_stack_add(stack, element)); +} + +static struct sxe_jitson_stack * +get_stack_open_array(void) +{ + struct sxe_jitson_stack *stack = sxe_jitson_stack_get_thread(); + + sxe_jitson_stack_borrow(stack, &iou); + + /* Open an empty array for the intersection. + */ + if (!MOCKERROR(SXE_JITSON_INTERSECT_OPEN, false, ENOMEM, sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY))) { + SXEL2(": Failed to open array for result of INTERSECT expression"); + sxe_jitson_stack_return(stack, &iou); + return NULL; + } + + return stack; +} + +static bool +add_to_array(struct sxe_jitson_stack *stack, const struct sxe_jitson *element) +{ + if (!MOCKERROR(SXE_JITSON_INTERSECT_ADD, false, ENOMEM, jitson_stack_add(stack, element))) { + SXEL2(": Failed to add a duplicate of or reference to an element in an INTERSECT expression"); + return false; + } + + return true; +} + +static const struct sxe_jitson * +close_array_and_get(struct sxe_jitson_stack *stack) +{ + const struct sxe_jitson *jitson; + + sxe_jitson_stack_close_collection(stack); + + if (!(jitson = MOCKERROR(SXE_JITSON_INTERSECT_GET, NULL, ENOMEM, sxe_jitson_stack_get_jitson(stack)))) + SXEL2(": Failed to get array for result of INTERSECT expression"); + + sxe_jitson_stack_return(stack, &iou); + return jitson; +} + +/* Implementation of INTERSECT for an ordered sxe-jitson array type + */ +static const struct sxe_jitson * +sxe_jitson_intersect_ordered_array(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + struct kit_sortedarray_class elem_type; + struct sxe_jitson_stack *stack; + const struct sxe_jitson *elem_lhs, *elem_rhs, *json = NULL; + size_t i, len_lhs; + + SXEA6(sxe_jitson_get_type(right) == SXE_JITSON_TYPE_ARRAY, + "Right hand side of an array INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(right)); + SXEA6(right->type & SXE_JITSON_TYPE_IS_ORD, "Right hand side of an ordered array INTERSECT expression must be ordered"); + + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_ARRAY) { + SXEL2(": Left hand side of an INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(left)); + return NULL; + } + + if (!(stack = get_stack_open_array())) + return NULL; + + len_lhs = sxe_jitson_len(left); + + if (left->type & SXE_JITSON_TYPE_IS_ORD) { // Left hand size is ordered + elem_type.keyoffset = 0; + elem_type.fmt = NULL; + elem_type.value = stack; + elem_type.flags = KIT_SORTEDARRAY_CMP_CAN_FAIL; + + /* If both arrays contain uniformly sized elements + */ + if ((left->type & SXE_JITSON_TYPE_IS_UNIF) && (right->type & SXE_JITSON_TYPE_IS_UNIF)) { + if (left->uniform.size != right->uniform.size) + goto EARLY_OUT; + + elem_type.size = left->uniform.size; + elem_type.cmp = (int (*)(const void *, const void *))sxe_jitson_cmp; + elem_type.visit = intersect_add_element; + + if (!kit_sortedarray_intersect(&elem_type, &left[1], left->len, &right[1], right->len)) + goto ERROR_OUT; + + goto EARLY_OUT; + } + + /* If both arrays contain non-uniformly sized elements + */ + if (!(left->type & SXE_JITSON_TYPE_IS_UNIF) && !(right->type & SXE_JITSON_TYPE_IS_UNIF)) { + if (!(left->type & SXE_JITSON_TYPE_INDEXED)) // If the left array isn't indexed, index it + sxe_jitson_array_get_element(left, 0); + + if (!(right->type & SXE_JITSON_TYPE_INDEXED)) // If the right array isn't indexed, index it + sxe_jitson_array_get_element(right, 0); + + array_left = left; + array_right = right; + elem_type.size = sizeof(left->index[0]); + elem_type.cmp = indexed_element_cmp; + elem_type.visit = intersect_add_indexed_element; + + if (!kit_sortedarray_intersect(&elem_type, left->index, left->len, right->index, right->len)) + goto ERROR_OUT; /* COVERAGE EXCLUSION: Ordered arrays of non-uniform size with incomparable elements */ + + goto EARLY_OUT; + } + } + + for (i = 0; i < len_lhs; i++) { + elem_lhs = sxe_jitson_array_get_element(left, i); + + if ((elem_rhs = sxe_jitson_in(elem_lhs, right)) == NULL // Error in IN operation + || (elem_rhs != sxe_jitson_null && !add_to_array(stack, elem_lhs))) // Found but failed to add to result array + goto ERROR_OUT; + } + +EARLY_OUT: + if ((json = close_array_and_get(stack))) + return json; + +ERROR_OUT: + sxe_jitson_stack_clear(stack); + return NULL; +} + +/* Implementation of INTERSECT for sxe-jitson array type + */ +static const struct sxe_jitson * +sxe_jitson_intersect_array(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + struct sxe_jitson_stack *stack; + const struct sxe_jitson *elem_lhs, *elem_rhs, *json = NULL; + size_t i, j, len_lhs, len_rhs; + int ret; + + SXEA6(sxe_jitson_get_type(right) == SXE_JITSON_TYPE_ARRAY, + "Right hand side of an array INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(right)); + + if (right->type & SXE_JITSON_TYPE_IS_ORD) + return sxe_jitson_intersect_ordered_array(left, right); + else if (left->type & SXE_JITSON_TYPE_IS_ORD) + return sxe_jitson_intersect_ordered_array(right, left); + + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_ARRAY) { + SXEL2(": Left hand side of an INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(left)); + return NULL; + } + + if (!(stack = get_stack_open_array())) + return NULL; + + len_lhs = sxe_jitson_len(left); + + for (i = 0; i < len_lhs; i++) { + elem_lhs = sxe_jitson_array_get_element(left, i); + + for (j = 0, len_rhs = sxe_jitson_len(right); j < len_rhs; j++) { + elem_rhs = sxe_jitson_array_get_element(right, j); + + if ((ret = sxe_jitson_eq(elem_lhs, elem_rhs)) == SXE_JITSON_TEST_ERROR) { + SXEL2(": Failed to compare elements of types %s and %s when INTERSECTing arrays", + sxe_jitson_get_type_as_str(elem_lhs), sxe_jitson_get_type_as_str(elem_rhs)); + goto ERROR_OUT; + } + else if (ret == SXE_JITSON_TEST_TRUE && !add_to_array(stack, elem_lhs)) + goto ERROR_OUT; + } + } + + if ((json = close_array_and_get(stack))) + return json; + +ERROR_OUT: + sxe_jitson_stack_clear(stack); + return NULL; +} + +static bool +intersect_check_element(void *found_out, const void *element) +{ + SXE_UNUSED_PARAMETER(element); + *(bool *)found_out = true; + return false; // Found a match so end the intersect test +} + +/* Implementation of INTERSECT_TEST for an ordered sxe-jitson array type + */ +static const struct sxe_jitson * +sxe_jitson_intersect_test_ordered_array(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + struct kit_sortedarray_class elem_type; + const struct sxe_jitson *elem_lhs, *elem_rhs; + size_t i, len_lhs; + bool found; + + SXEA6(sxe_jitson_get_type(right) == SXE_JITSON_TYPE_ARRAY, + "Right hand side of an array INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(right)); + SXEA6(right->type & SXE_JITSON_TYPE_IS_ORD, "Right hand side of an ordered array INTERSECT expression must be ordered"); + + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_ARRAY) { + SXEL2(": Left hand side of an INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(left)); + return NULL; + } + + len_lhs = sxe_jitson_len(left); + + if (left->type & SXE_JITSON_TYPE_IS_ORD) { // Left hand size is ordered + elem_type.keyoffset = 0; + elem_type.fmt = NULL; + found = false; + elem_type.value = &found; + elem_type.flags = KIT_SORTEDARRAY_CMP_CAN_FAIL; + + /* If both arrays contain uniformly sized elements + */ + if ((left->type & SXE_JITSON_TYPE_IS_UNIF) && (right->type & SXE_JITSON_TYPE_IS_UNIF)) { + if (left->uniform.size != right->uniform.size) // Different sized elements can't intersect + return sxe_jitson_false; + + elem_type.size = left->uniform.size; + elem_type.cmp = (int (*)(const void *, const void *))sxe_jitson_cmp; + elem_type.visit = intersect_check_element; + + if (!kit_sortedarray_intersect(&elem_type, &left[1], left->len, &right[1], right->len)) { // Incomplete intersect + if (found) + return sxe_jitson_true; + + return NULL; + } + + return sxe_jitson_false; + } + + /* If both arrays contain non-uniformly sized elements + */ + if (!(left->type & SXE_JITSON_TYPE_IS_UNIF) && !(right->type & SXE_JITSON_TYPE_IS_UNIF)) { + if (!(left->type & SXE_JITSON_TYPE_INDEXED)) // If the left array isn't indexed, index it + sxe_jitson_array_get_element(left, 0); + + if (!(right->type & SXE_JITSON_TYPE_INDEXED)) // If the right array isn't indexed, index it + sxe_jitson_array_get_element(right, 0); + + array_left = left; + array_right = right; + elem_type.size = sizeof(left->index[0]); + elem_type.cmp = indexed_element_cmp; + elem_type.visit = intersect_check_element; + + if (!kit_sortedarray_intersect(&elem_type, left->index, left->len, right->index, right->len)) { // Incomplete + if (found) + return sxe_jitson_true; + + return NULL; /* COVERAGE EXCLUSION: Ordered arrays of non-uniform size with incomparable elements */ + } + + return sxe_jitson_false; + } + } + + for (i = 0; i < len_lhs; i++) { + elem_lhs = sxe_jitson_array_get_element(left, i); + + if ((elem_rhs = sxe_jitson_in_array(elem_lhs, right)) == NULL) // Error in IN operation + return NULL; + else if (elem_rhs != sxe_jitson_null) + return sxe_jitson_true; + } + + return sxe_jitson_false; +} + +/* Implementation of INTERSECT_TEST for sxe-jitson array type + */ +static const struct sxe_jitson * +sxe_jitson_intersect_test_array(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + const struct sxe_jitson *elem_lhs, *elem_rhs; + size_t i, j, len_lhs, len_rhs; + int ret; + + SXEA6(sxe_jitson_get_type(right) == SXE_JITSON_TYPE_ARRAY, + "Right hand side of an array INTERSECT expression cannot be JSON type %s", sxe_jitson_get_type_as_str(right)); + + if (right->type & SXE_JITSON_TYPE_IS_ORD) + return sxe_jitson_intersect_test_ordered_array(left, right); + else if (left->type & SXE_JITSON_TYPE_IS_ORD) + return sxe_jitson_intersect_test_ordered_array(right, left); + + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_ARRAY) { + SXEL2(": Left hand side of an INTERSECT_TEST expression cannot be JSON type %s", sxe_jitson_get_type_as_str(left)); + return NULL; + } + + len_lhs = sxe_jitson_len(left); + + for (i = 0; i < len_lhs; i++) { + elem_lhs = sxe_jitson_array_get_element(left, i); + + for (j = 0, len_rhs = sxe_jitson_len(right); j < len_rhs; j++) { + elem_rhs = sxe_jitson_array_get_element(right, j); + + if ((ret = sxe_jitson_eq(elem_lhs, elem_rhs)) == SXE_JITSON_TEST_ERROR) { + SXEL2(": Failed to compare elements of types %s and %s when testing INTERSECTion of arrays", + sxe_jitson_get_type_as_str(elem_lhs), sxe_jitson_get_type_as_str(elem_rhs)); + return NULL; + } else if (ret == SXE_JITSON_TEST_TRUE) + return sxe_jitson_true; + } + } + + return sxe_jitson_false; +} + +/* Default function to determine whether two json values intersect, which falls back to calling intersect + */ +static const struct sxe_jitson * +sxe_jitson_intersect_test_default(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + const struct sxe_jitson *result, *ret = NULL; + + result = sxe_jitson_oper_apply_binary(left, sxe_jitson_oper_intersect, right); + + if (result) { + if (sxe_jitson_len(result)) /* COVERAGE EXCLUSION: Requires a type that implement INTERSECT but not INTERSECT_TEST */ + ret = sxe_jitson_true; /* COVERAGE EXCLUSION: Requires a type that implement INTERSECT but not INTERSECT_TEST */ + else + ret = sxe_jitson_false; /* COVERAGE EXCLUSION: Requires a type that implement INTERSECT but not INTERSECT_TEST */ + + sxe_jitson_free(result); /* COVERAGE EXCLUSION: Requires a type that implement INTERSECT but not INTERSECT_TEST */ + } + + return ret; +} + +/* Initialize the SXE intersect operator + */ +void +sxe_jitson_intersect_init(void) +{ + union sxe_jitson_oper_func func = {.binary = sxe_jitson_intersect_array}, + func_test_default = {.binary = sxe_jitson_intersect_test_default}, + func_test_array = {.binary = sxe_jitson_intersect_test_array}; + + SXEA1(sxe_jitson_is_init(), "sxe_jitson_initialize needs to be called first"); + sxe_jitson_oper_intersect = sxe_jitson_oper_register("INTERSECT", SXE_JITSON_OPER_BINARY | SXE_JITSON_OPER_TYPE_RIGHT, + sxe_jitson_oper_func_null); + sxe_jitson_oper_add_to_type(sxe_jitson_oper_intersect, SXE_JITSON_TYPE_ARRAY, func); + + sxe_jitson_oper_intersect_test = sxe_jitson_oper_register("INTERSECT_TEST", + SXE_JITSON_OPER_BINARY | SXE_JITSON_OPER_TYPE_RIGHT, + func_test_default); + sxe_jitson_oper_add_to_type(sxe_jitson_oper_intersect_test, SXE_JITSON_TYPE_ARRAY, func_test_array); +} + +/** + * Determine the intersection of two json values + * + * @param left/right The values to be intersected + * + * @return The possibly empty intersection or NULL on error + */ +const struct sxe_jitson * +sxe_jitson_intersect(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + SXEA6(sxe_jitson_oper_intersect, "sxe_jitson_intersect_init has not been called"); + return sxe_jitson_oper_apply_binary(left, sxe_jitson_oper_intersect, right); +} + +/** + * Determine whether two json values intersect (optimized function) + * + * @param left The key or value to be checked for + * @param right The value to be looked in + * + * @return sxe_jitson_bool_true if found, sxe_jitson_false if not, or NULL on error + */ +const struct sxe_jitson * +sxe_jitson_intersect_test(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + SXEA6(sxe_jitson_oper_intersect, "sxe_jitson_intersect_init has not been called"); + return sxe_jitson_oper_apply_binary(left, sxe_jitson_oper_intersect_test, right); +} diff --git a/lib-sxe-jitson/sxe-jitson-intersect.h b/lib-sxe-jitson/sxe-jitson-intersect.h new file mode 100644 index 0000000..1730167 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-intersect.h @@ -0,0 +1,18 @@ +#ifndef SXE_JITSON_INTERSECT_H +#define SXE_JITSON_INTERSECT_H + +#include + +extern unsigned sxe_jitson_oper_intersect; +extern unsigned sxe_jitson_oper_intersect_test; + +#include "sxe-jitson-intersect-proto.h" + +#if defined(SXE_DEBUG) || defined(SXE_COVERAGE) // Define unique tags for mockfails +# define SXE_JITSON_INTERSECT_OPEN ((const char *)sxe_jitson_intersect_init + 1) +# define SXE_JITSON_INTERSECT_ADD ((const char *)sxe_jitson_intersect_init + 2) +# define SXE_JITSON_INTERSECT_ADD_INDEXED ((const char *)sxe_jitson_intersect_init + 3) +# define SXE_JITSON_INTERSECT_GET ((const char *)sxe_jitson_intersect_init + 4) +#endif + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-oper.c b/lib-sxe-jitson/sxe-jitson-oper.c new file mode 100644 index 0000000..0b0e644 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-oper.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include + +#include "kit-alloc.h" +#include "sxe-jitson-oper.h" +#include "sxe-log.h" + +/* + * This module implements operations on sxe-jitson values. All operations take and return constant values. + * + * Memory allocation philosophy: All values returned must either be allocated by the operation or be non-allocated values. This + * allows any value returned by an operation to be freed. If an allocated value that was not allocated by the operation (e.g. + * one of the arguments) needs to be returned, a reference to the value should be created and returned. This will prevent + * accidental deallocation of the value. + */ + +struct sxe_jitson_oper { + const char *name; + unsigned flags; + union sxe_jitson_oper_func def_func; +}; + +struct sxe_jitson_oper_per_type { + unsigned num_opers; // Number of additonal operations supported by this type + union sxe_jitson_oper_func *opers; // Pointers to operation implementations +}; + +union sxe_jitson_oper_func sxe_jitson_oper_func_null = {.unary = NULL}; + +static unsigned num_opers = 0; +static struct sxe_jitson_oper *opers = NULL; // Note that operation 0 is invalid +static unsigned num_types = 0; // Number of types with operations registered +static struct sxe_jitson_oper_per_type *type_opers = NULL; // Array of oper_per_type, 1:1 with sxe-jitson-types +static union sxe_jitson_oper_func null_func = {NULL}; + +/* If type is >= num_types, increase num_types if any operators are defined. Called by sxe_jitson_type_register + */ +void +sxe_jitson_oper_increase_num_types(unsigned type) +{ + if (type >= num_types) { + if (num_opers) { // Only allocate space for new per type operators if any operators have been registered + SXEA1(type_opers = kit_realloc(type_opers, (type + 1) * sizeof(*type_opers)), + "Failed to reallocate operations per type"); + + for (; num_types <= type; num_types++) { // All new types need to be initialized + type_opers[num_types].num_opers = 0; + type_opers[num_types].opers = NULL; + } + } else + num_types = type + 1; + } +} + +/** + * Register an operation on sxe_jitson values + * + * @param name Name of the operation (which may be a symbol, like '==') + * @param flags SXE_JITSON_OPER_UNARY if the operation is takes 1 argument (i.e. is unary) + * SXE_JITSON_OPER_BINARY if the operation takes 2 arguments and uses the function from the type of the left arg + * SXE_JITSON_OPER_BINARY|SXE_JITSON_OPER_TYPE_RIGHT if binary and uses function from the type of the right arg + * @param default The default implementation of the operation, or NULL if there is no default + * + * @return An unsigned identifier for the operation + */ +unsigned +sxe_jitson_oper_register(const char *name, unsigned flags, union sxe_jitson_oper_func def_func) +{ + unsigned i; + + SXEE6("(name='%s',flags=%x,def_func=%p) // num_opers=%u, num_types=%u", name, flags, def_func.unary, num_opers, num_types); + + if (num_opers == 0) { + SXEA1(opers = kit_calloc(2, sizeof(*opers)), "Failed to allocate first operation"); + num_opers = 1; + + if (num_types) { + i = num_types - 1; + num_types = 0; // Make sure that per type operations get allocated above + sxe_jitson_oper_increase_num_types(i); + } + } else { + for (i = 1; i <= num_opers; i++) + SXEA1(strcmp(opers[i].name, name) != 0, "Operation '%s' is already registered", name); + + SXEA1(opers = kit_realloc(opers, (++num_opers + 1) * sizeof(*opers)), "Failed to reallocate operations"); + } + + opers[num_opers].name = name; + opers[num_opers].flags = flags; + opers[num_opers].def_func = def_func; + SXER6("return num_opers=%u", num_opers); + return num_opers; +} + +void +sxe_jitson_oper_add_to_type(unsigned op, unsigned type, union sxe_jitson_oper_func func) +{ + unsigned i; + + SXEA1(op <= num_opers, "Operator %u is invalid with only %u operators registered", op, num_opers); + SXEA1(type < num_types, "Type %u is >= number of types %u", type, num_types); + + if (op >= type_opers[type].num_opers) { + SXEA1(type_opers[type].opers = kit_realloc(type_opers[type].opers, (op + 1) * sizeof(*type_opers[type].opers)), + "Failed to reallocate operations for type %s", sxe_jitson_type_to_str(type)); + + for (i = type_opers[type].num_opers; i < op; i++) // Functions for intervening operations need to be initialized + type_opers[type].opers[i] = null_func; + + type_opers[type].num_opers = op + 1; + } + + type_opers[type].opers[op] = func; +} + +const char * +sxe_jitson_oper_get_name(unsigned op) +{ + SXEA1(op <= num_opers, "Operator %u is invalid with only %u operators registered", op, num_opers); + return opers[op].name; +} + +const struct sxe_jitson * +sxe_jitson_oper_apply_unary(unsigned op, const struct sxe_jitson *arg) +{ + unsigned type; + + arg = sxe_jitson_dereference(arg); + type = sxe_jitson_get_type_no_deref(arg); + + SXEA6(op <= num_opers, "Operator %u is invalid with only %u operators registered", op, num_opers); + SXEA6(!(opers[op].flags & SXE_JITSON_OPER_BINARY), "Operator '%s' is binary", opers[op].name); + SXEA6(type < num_types, "Type %u is >= number of types %u", type, num_types); + + if (op < type_opers[type].num_opers && type_opers[type].opers[op].unary) + return type_opers[type].opers[op].unary(arg); + + if (!opers[op].def_func.unary) { + SXEL2(": No default function for operator '%s'", opers[op].name); + errno = EOPNOTSUPP; + return NULL; + } + + return opers[op].def_func.unary(arg); +} + +const struct sxe_jitson * +sxe_jitson_oper_apply_binary(const struct sxe_jitson *left, unsigned op, const struct sxe_jitson *right) +{ + unsigned type; + + SXEA6(op <= num_opers, "Operator %u is invalid with only %u operators registered", op, num_opers); + SXEA6(opers[op].flags & SXE_JITSON_OPER_BINARY, "Operator '%s' is unary", opers[op].name); + + left = sxe_jitson_dereference(left); + right = sxe_jitson_dereference(right); + type = opers[op].flags & SXE_JITSON_OPER_TYPE_RIGHT ? sxe_jitson_get_type_no_deref(right) + : sxe_jitson_get_type_no_deref(left); + SXEA6(type < num_types, "Type %u is >= number of types %u", type, num_types); + + if (op < type_opers[type].num_opers && type_opers[type].opers[op].binary) + return type_opers[type].opers[op].binary(left, right); + + if (!opers[op].def_func.binary) { + SXEL2(": No default function for operator '%s'", opers[op].name); + errno = EOPNOTSUPP; + return NULL; + } + + return opers[op].def_func.binary(left, right); +} + +void +sxe_jitson_oper_fini(void) +{ + unsigned i; + + for (i = 0; i < num_types; i++) + kit_free(type_opers[i].opers); + + kit_free(type_opers); + type_opers = NULL; + num_types = 0; + + kit_free(opers); + opers = NULL; + num_opers = 0; +} diff --git a/lib-sxe-jitson/sxe-jitson-oper.h b/lib-sxe-jitson/sxe-jitson-oper.h new file mode 100644 index 0000000..d23ee0a --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-oper.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef SXE_JITSON_OPER_H +#define SXE_JITSON_OPER_H + +#include "sxe-jitson.h" + +#define SXE_JITSON_OPER_UNARY 0 +#define SXE_JITSON_OPER_BINARY 1 // Set this bit in flags if the operator is binary +#define SXE_JITSON_OPER_TYPE_RIGHT 2 // Set this bit in flags if the operator should apply the type of the right argument + +union sxe_jitson_oper_func { + const struct sxe_jitson *(*unary)( const struct sxe_jitson *); + const struct sxe_jitson *(*binary)(const struct sxe_jitson *, const struct sxe_jitson *); +}; + +extern union sxe_jitson_oper_func sxe_jitson_oper_func_null; + +#include "sxe-jitson-oper-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-range.c b/lib-sxe-jitson/sxe-jitson-range.c new file mode 100644 index 0000000..290299a --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-range.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include "sxe-jitson.h" +#include "sxe-jitson-const.h" +#include "sxe-jitson-in.h" +#include "sxe-jitson-oper.h" +#include "sxe-jitson-range.h" +#include "sxe-log.h" + +uint32_t SXE_JITSON_TYPE_RANGE = SXE_JITSON_TYPE_INVALID; // Need to register the type to get a valid value + +static bool +sxe_jitson_range_cast(struct sxe_jitson_stack *stack, const struct sxe_jitson *from) +{ + char *string; + unsigned idx = stack->count; + + SXEA1(SXE_JITSON_TYPE_RANGE != SXE_JITSON_TYPE_INVALID, "Can't cast to a range until sxe_jitson_range_register is called"); + + if (sxe_jitson_get_type(from) != SXE_JITSON_TYPE_ARRAY) { + SXEL2("Can't cast a %s to a range", sxe_jitson_get_type_as_str(from)); + return false; + } + + if (sxe_jitson_len(from) != 2) { + SXEL2("Expected a range array to have 2 elements, got %zu", sxe_jitson_len(from)); + return false; + } + + if (sxe_jitson_cmp(sxe_jitson_array_get_element(from, 0), sxe_jitson_array_get_element(from, 1)) > 0) { + string = sxe_jitson_to_json(from, NULL); + SXEL2("Expected the begining and end of range '%s' to be ordered", string); + kit_free(string); + return false; + } + + if (!sxe_jitson_stack_dup_at_index(stack, idx, from, 0)) + return false; + + /* A range is exactly an array with the type changed, but all the same flags. + */ + stack->jitsons[idx].type = SXE_JITSON_TYPE_RANGE | (stack->jitsons[idx].type & ~SXE_JITSON_TYPE_MASK); + return true; +} + +static int +sxe_jitson_range_test(const struct sxe_jitson *jitson) +{ + SXE_UNUSED_PARAMETER(jitson); + return SXE_JITSON_TEST_TRUE; // Ranges always test true for evaluation purposes +} + +static char * +sxe_jitson_range_build_json(const struct sxe_jitson *range, struct sxe_factory *factory) +{ + if (sxe_factory_add(factory, "range(", sizeof("range(") - 1) < 0 || sxe_jitson_array_build_json(range, factory) == NULL + || sxe_factory_add(factory, ")", 1) < 0) { + SXEL2("Failed to allocate memory for range as a string"); + return NULL; + } + + return sxe_factory_look(factory, NULL); +} + +/* Implement value IN range -> true or null + */ +static const struct sxe_jitson * +sxe_jitson_range_in(const struct sxe_jitson *json_value, const struct sxe_jitson *range) +{ + SXEL2A6((range->type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_RANGE, + ": Internal error: Expected right side of IN to be a range value"); + + if (sxe_jitson_cmp(sxe_jitson_array_get_element(range, 0), json_value) <= 0 + && sxe_jitson_cmp(json_value, sxe_jitson_array_get_element(range, 1)) <= 0) + return sxe_jitson_true; + + return sxe_jitson_null; // On error or if out of range, return null +} + +void +sxe_jitson_range_register(void) +{ + union sxe_jitson_oper_func func; + static struct sxe_jitson cast_constant; + + if (SXE_JITSON_TYPE_RANGE != SXE_JITSON_TYPE_INVALID) // Already registered + return; + + SXE_JITSON_TYPE_RANGE = sxe_jitson_type_register("range", sxe_jitson_array_free, sxe_jitson_range_test, + sxe_jitson_array_size, sxe_jitson_len_base, sxe_jitson_array_clone, + sxe_jitson_range_build_json, NULL, sxe_jitson_array_eq); + sxe_jitson_const_register_cast(&cast_constant, "range", sxe_jitson_range_cast); + func.binary = sxe_jitson_range_in; + sxe_jitson_oper_add_to_type(sxe_jitson_oper_in, SXE_JITSON_TYPE_RANGE, func); +} + +static bool +range_error(const char *before, const struct sxe_jitson *from, const struct sxe_jitson *to, const char *after) +{ + char *json_from = sxe_jitson_to_json(from, NULL); + char *json_to = sxe_jitson_to_json(to, NULL); + + SXEL2("%s[%s,%s]%s", before, json_from, json_to, after); + kit_free(json_to); + kit_free(json_from); + return false; +} + +/** + * Construct a range on a stack + * + * @param stack The stack to add the range to + * @param from/to The beginning and end of the range, expected to be comparable and in order + * + * @return true on success, false on failure + */ +bool +sxe_jitson_stack_add_range(struct sxe_jitson_stack *stack, const struct sxe_jitson *from, const struct sxe_jitson *to) +{ + unsigned idx = stack->count; + + _Static_assert(SXE_JITSON_CMP_ERROR > 0, "Expected compare error value to be > 0"); + SXEA1(SXE_JITSON_TYPE_RANGE != SXE_JITSON_TYPE_INVALID, "Can't add a range until sxe_jitson_range_init is called"); + + if (sxe_jitson_cmp(from, to) > 0) + return range_error("Expected the begining and end of range ", from, to, " to be ordered"); + + if (!sxe_jitson_stack_open_array(stack, "for range") || !sxe_jitson_stack_add_dup(stack, from) + || !sxe_jitson_stack_add_dup(stack, to) || !sxe_jitson_stack_close_array(stack, "for range")) + return range_error("Failed to add range ", from, to, " to the stack"); + + /* A range is exactly an array with the type changed, but all the same flags. + */ + SXEA6(sxe_jitson_cmp(&stack->jitsons[idx + 1], &stack->jitsons[idx + 2]) <= 0, "Expected constructed range to be ordered"); + stack->jitsons[idx].type = SXE_JITSON_TYPE_RANGE | (stack->jitsons[idx].type & ~SXE_JITSON_TYPE_MASK); + return true; +} diff --git a/lib-sxe-jitson/sxe-jitson-range.h b/lib-sxe-jitson/sxe-jitson-range.h new file mode 100644 index 0000000..d82b22c --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-range.h @@ -0,0 +1,10 @@ +#ifndef SXE_RANGE_H +#define SXE_RANGE_H + +#include + +extern uint32_t SXE_JITSON_TYPE_RANGE; // jitson range + +#include "sxe-jitson-range-proto.h" + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-source.c b/lib-sxe-jitson/sxe-jitson-source.c new file mode 100644 index 0000000..55819b5 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-source.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "sxe-jitson.h" +#include "sxe-jitson-ident.h" +#include "sxe-log.h" + +/* This is a 256 bit little endian bitmask. For each unsigned char, the bit is set if the char is valid in an identifier + */ +static uint64_t identifier_chars[4] = {0x03FF400000000000, 0x07FFFFFE87FFFFFE, 0x0000000000000000, 0x00000000000000}; + +/** + * Construct a source from possibly non-NUL terminated JSON + * + * @param source The source to construct + * @param string JSON to be parsed + * @param len Length of the JSON to be parsed + * @param flags Flags affecting the parsing. 0 for strict JSON, or one or more of SXE_JITSON_FLAG_ALLOW_HEX, + * SXE_JITSON_FLAG_ALLOW_CONSTS or SXE_JITSON_FLAG_ALLOW_IDENTS + * + * @note SXE_JITSON_FLAG_ALLOW_IDENTS has no effect if sxe_jitson_ident_register has not been called + */ +void +sxe_jitson_source_from_buffer(struct sxe_jitson_source *source, const char *string, size_t len, uint32_t flags) +{ + source->json = string; + source->next = string; + source->end = string + len; + source->flags = flags; + source->file = NULL; + source->line = 0; +} + +/** + * Construct a source from a JSON string + * + * @param source The source to construct + * @param string A JSON string to be parsed + * @param flags Flags affecting the parsing. 0 for strict JSON, or one or more of SXE_JITSON_FLAG_ALLOW_HEX, + * SXE_JITSON_FLAG_ALLOW_CONSTS or SXE_JITSON_FLAG_ALLOW_IDENTS + * + * @note SXE_JITSON_FLAG_ALLOW_IDENTS has no effect if sxe_jitson_ident_register has not been called + */ +void +sxe_jitson_source_from_string(struct sxe_jitson_source *source, const char *string, uint32_t flags) +{ + source->json = string; + source->next = string; + source->end = (const char *)~0ULL; // Just use the NUL terminator. This allows us to not take the strlen. + source->flags = flags; + source->file = NULL; + source->line = 0; +} + +/** + * Set file/line information for diagnostics + * + * @param file File name or NULL + * @param line Line number or 0 to not track + */ +void +sxe_jitson_source_set_file_line(struct sxe_jitson_source *source, const char *file, unsigned line) +{ + source->file = file; + source->line = line; +} + +/* Return the next character in the source without consuming it, or '\0' on end of data + */ +unsigned char +sxe_jitson_source_peek_char(const struct sxe_jitson_source *source) +{ + return source->next >= source->end ? '\0' : *source->next; +} + +/* Get the next character in the source, returning '\0' on end of data + */ +char +sxe_jitson_source_get_char(struct sxe_jitson_source *source) +{ + if (source->next >= source->end || (source->end == (const char *)~0ULL && *source->next == '\0')) + return '\0'; + + if (source->line && *source->next == '\n') + source->line++; + + return *source->next++; +} + +/* Skip whitespace characters in the source, returning a peek at the first nonspace character or '\0' on end of data + */ +char +sxe_jitson_source_peek_nonspace(struct sxe_jitson_source *source) +{ + while (source->next < source->end && isspace(*source->next)) { + if (source->line && *source->next == '\n') + source->line++; + + source->next++; + } + + return sxe_jitson_source_peek_char(source); +} + +/* Skip whitespace characters in the source, returning the first nonspace character or '\0' on end of data + */ +char +sxe_jitson_source_get_nonspace(struct sxe_jitson_source *source) +{ + char c; + + while (isspace(c = sxe_jitson_source_get_char(source))) { + } + + return c; +} + +/** + * Peek at identifier characters in a source until a non-identifier character is reached + * + * @param source Source to parse + * @param len_out Pointer to variable set to the length of the identifier; not set if identifier is not found + * + * @return Identifier or NULL if first character is a non-identifier + * + * @note If the first identifier character has stricter limitations than subsequent characters, you must check that outside + */ +const char * +sxe_jitson_source_peek_identifier(const struct sxe_jitson_source *source, size_t *len_out) +{ + const char *next = source->next; + unsigned char c = next >= source->end ? '\0' : (unsigned char)*next; + + /* While the next character's bit is set in the 256 bit identifier_chars bitmask + */ + for (; identifier_chars[c >> 6] & (1UL << (c & 0x3f)); c = next >= source->end ? '\0' : *next) + next++; + + return (*len_out = next - source->next) ? source->next : NULL; +} + +/** + * Peek at characters in a source until a whitespace character is reached + * + * @param source Source to parse + * @param len_out Pointer to variable set to the length of the token; not set if a token is not found + * + * @return Token or NULL on end of source + */ +const char * +sxe_jitson_source_peek_token(struct sxe_jitson_source *source, size_t *len_out) +{ + const char *next, *token; + + if (!sxe_jitson_source_peek_nonspace(source)) // EOF + return NULL; + + token = source->next; + *len_out = 1; + + /* While the next character is not EOF or whitespace + */ + for (next = token + 1; next < source->end && *next && !isspace(*next); next++) + (*len_out)++; + + return token; +} + +/** + * Get identifier characters until a non-identifier character is reached. + * + * @param source Source to parse + * @param len_out Set to the length of the identifier or 0 if there is no valid identifier + * + * @return Pointer to the identifier or NULL if there is no valid identifier + * + * @note If the first identifier character has stricter limitations than subsequent characters, you must check that outside + */ +const char * +sxe_jitson_source_get_identifier(struct sxe_jitson_source *source, size_t *len_out) +{ + const char *identifier = sxe_jitson_source_peek_identifier(source, len_out); + + sxe_jitson_source_consume(source, *len_out); + return identifier; +} + +/** + * Get characters until a non-number character is reached. + * + * @param source Source to parse + * @param len_out Set to the length of the number as a string + * @param is_uint_out Set to true if the value is a uint, false if not + * + * @return Pointer to the number as a string or NULL if there is no valid number + */ +const char * +sxe_jitson_source_get_number(struct sxe_jitson_source *source, size_t *len_out, bool *is_uint_out) +{ + const char *number = source->next; + + *is_uint_out = true; + + if (sxe_jitson_source_peek_char(source) == '-') { + *is_uint_out = false; + source->next++; + } + + if (!isdigit(sxe_jitson_source_peek_char(source))) + goto INVALID; + + source->next++; + + /* If hex is allowed and the number starts with '0x', its a hexadecimal unsigned integer + */ + if ((source->flags & SXE_JITSON_FLAG_ALLOW_HEX) && *number == '0' && sxe_jitson_source_peek_char(source) == 'x') { + for (source->next++; isxdigit(sxe_jitson_source_peek_char(source)); source->next++) { + } + + if (source->next - number <= 2) // Can't be just '0x' + goto INVALID; + + *len_out = source->next - number; + return number; + } + + while (isdigit(sxe_jitson_source_peek_char(source))) + source->next++; + + if (sxe_jitson_source_peek_char(source) == '.') { // If there's a fraction + *is_uint_out = false; + source->next++; + + if (!isdigit(sxe_jitson_source_peek_char(source))) + goto INVALID; + + while (isdigit(sxe_jitson_source_peek_char(source))) + source->next++; + } + + if (sxe_jitson_source_peek_char(source) == 'E' || sxe_jitson_source_peek_char(source) == 'e') { // If there's an exponent + *is_uint_out = false; + source->next++; + + if (sxe_jitson_source_peek_char(source) == '-' || sxe_jitson_source_peek_char(source) == '+') + source->next++; + + if (!isdigit(sxe_jitson_source_peek_char(source))) + goto INVALID; + + while (isdigit(sxe_jitson_source_peek_char(source))) + source->next++; + } + + *len_out = source->next - number; + return number; + +INVALID: + errno = EINVAL; + return NULL; +} + +/** + * Get a literal, non-JSON character string + * + * @param source Source to parse + * @param len_out If not NULL, set to the length of the literal as a string including the double quotes + * + * @return Pointer to the literal as a string or NULL if there is no valid literal + */ +const char * +sxe_jitson_source_get_literal(struct sxe_jitson_source *source, size_t *len_out) +{ + const char *literal = source->next; + char character; + + if (sxe_jitson_source_peek_char(source) != '"') + goto INVALID; + + source->next++; + + while ((character = sxe_jitson_source_get_char(source)) != '"') + if (character == '\0') + goto INVALID; + + if (len_out) + *len_out = source->next - literal; + + return literal; + +INVALID: + errno = EINVAL; + return NULL; +} + +/** + * Return up to 63 characters of the source or "" on EOF, for use in diagnostic messages. + */ +const char * +sxe_jitson_source_left(const struct sxe_jitson_source *source) +{ + static __thread char buf[65]; // An extra byte is needed just to shut up GCC + + if (*source->next == '\0' || source->next >= source->end) + return ""; + + if ((uintptr_t)source->end - (uintptr_t)source->next > 63) { + strncpy(buf, source->next, 64); // SonarQube False Positive + + if (buf[63]) // There are more than 63 characters + strcpy(&buf[60], "..."); // SonarQube False Positive + } + else { + memcpy(buf, source->next, source->end - source->next); + buf[source->end - source->next] = '\0'; + } + + return buf; +} diff --git a/lib-sxe-jitson/sxe-jitson-stack.c b/lib-sxe-jitson/sxe-jitson-stack.c new file mode 100644 index 0000000..1617422 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-stack.c @@ -0,0 +1,1300 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* SXE jitson stacks are factories for building sxe-jitson. + */ + +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-hash.h" +#include "sxe-jitson.h" +#include "sxe-jitson-const.h" +#include "sxe-log.h" +#include "sxe-sortedarray.h" +#include "sxe-thread.h" +#include "sxe-unicode.h" + +#define JITSON_STACK_INIT_SIZE 1 // The initial numer of tokens in a per thread stack +#define JITSON_STACK_MAX_INCR 4096 // The maximum the stack will grow by + +/* A per thread stack is kept for parsing. It's per thread for lockless thread safety, and automatically grows as needed. + */ +static unsigned jitson_stack_init_size = JITSON_STACK_INIT_SIZE; +static __thread struct sxe_jitson_stack *jitson_stack = NULL; + +/* Hook to allow parser to push unmatched identifiers onto the stack. This is a non-standard extension + */ +bool (*sxe_jitson_stack_push_ident)(struct sxe_jitson_stack *stack, const char *ident, size_t len) = NULL; + +static bool +sxe_jitson_stack_make(struct sxe_jitson_stack *stack, unsigned init_size) +{ + SXEA1(SXE_JITSON_TOKEN_SIZE == 16, "Expected token size 16, got %zu", SXE_JITSON_TOKEN_SIZE); + + if (!stack) + return false; + + memset(stack, 0, sizeof(*stack)); + stack->maximum = init_size; + stack->jitsons = MOCKERROR(MOCK_FAIL_STACK_NEW_JITSONS, NULL, ENOMEM, kit_malloc((size_t)init_size * sizeof(*stack->jitsons))); + return stack->jitsons ? true : false; +} + +struct sxe_jitson_stack * +sxe_jitson_stack_new(unsigned init_size) +{ + struct sxe_jitson_stack *stack = MOCKERROR(MOCK_FAIL_STACK_NEW_OBJECT, NULL, ENOMEM, kit_malloc(sizeof(*stack))); + + if (!sxe_jitson_stack_make(stack, init_size)) { + kit_free(stack); + return NULL; + } + + return stack; +} + +/** + * Extract the jitson parsed or constructed on a stack + * + * @param The stack + * + * @return The parsed or constructed jitson + * + * @note Aborts if there is no jitson on the stack or if there is a partially constructed one + */ +struct sxe_jitson * +sxe_jitson_stack_get_jitson(struct sxe_jitson_stack *stack) +{ + struct sxe_jitson *ret = stack->jitsons; + size_t size; + + SXEA1(stack->jitsons, "Can't get a jitson from an empty stack"); + SXEA1(!stack->open, "Can't get a jitson there's an open collection"); + SXEE6("(stack=%p)", stack); + + if (stack->borrow) { + SXEA1(stack->borrow < stack->count, "Can't get a jitson from a borrowed stack that hasn't been grown"); + size = (stack->count - stack->borrow) * sizeof(*stack->jitsons); + + if ((ret = kit_malloc(size))) { + memcpy(ret, &stack->jitsons[stack->borrow], size); + ret->type |= SXE_JITSON_TYPE_ALLOCED; + stack->count = stack->borrow; + } + + goto OUT; + } + + if (stack->maximum > stack->count) + ret = kit_realloc(ret, stack->count * sizeof(*stack->jitsons)) ?: stack->jitsons; + + stack->jitsons = NULL; + stack->count = 0; + ret->type |= SXE_JITSON_TYPE_ALLOCED; // The token at the base of the stack is marked as allocated + +OUT: + SXER6("return %p; // type=%s", ret, ret ? sxe_jitson_type_to_str(sxe_jitson_get_type(ret)) : "NONE"); + return ret; +} + +/** + * Clear the content of a parse stack + */ +void +sxe_jitson_stack_clear(struct sxe_jitson_stack *stack) +{ + stack->count = stack->borrow; + stack->open = 0; +} + +/** + * Temporarily borrow a stack + * + * @param stack The stack to borrow + * @param iou A stack object used to store an IOU for the stack + */ +void +sxe_jitson_stack_borrow(struct sxe_jitson_stack *stack, struct sxe_jitson_stack *iou) +{ + memcpy(iou, stack, sizeof(*iou)); + stack->open = 0; + stack->last = 0; + stack->borrow = stack->count; +} + +/** + * Return a temporarily borrowed stack + * + * @param stack The stack that was borrowed + * @param iou A stack object used to store an IOU for the stack + */ +void +sxe_jitson_stack_return(struct sxe_jitson_stack *stack, const struct sxe_jitson_stack *iou) +{ + SXEA1(stack->borrow == iou->count, "The size of the stack differs from its size when borrowed"); + stack->open = iou->open; + stack->last = iou->last; + stack->borrow = iou->borrow; + stack->count = iou->count; +} + +/** + * Return a per thread stack, constructing it on first call. + * + * @note The stack can be freed after the thread exits by calling sxe_thread_memory_free + */ +struct sxe_jitson_stack * +sxe_jitson_stack_get_thread(void) +{ + /* Allocate the per thread stack; once allocated, we can't free it because its being tracked + */ + if (!jitson_stack) { + jitson_stack = sxe_thread_malloc(sizeof(*jitson_stack), (void (*)(void *))sxe_jitson_stack_free, NULL); + + if (!sxe_jitson_stack_make(jitson_stack, jitson_stack_init_size)) { + SXEL2(": failed to create a sxe-jitson per thread stack"); + return NULL; + } + } + + SXEL6(": return %p; // count=%u, open=%u", jitson_stack, jitson_stack ? jitson_stack->count : 0, + jitson_stack ? jitson_stack->open : 0); + return jitson_stack; +} + +void +sxe_jitson_stack_free(struct sxe_jitson_stack *stack) +{ + kit_free(stack->jitsons); + kit_free(stack); +} + +/* Reserve space on stack, expanding it if needed to make room for at least 'more' new values + * + * @return The index of the first new slot on the stack, or SXE_JITSON_STACK_ERROR on error (ENOMEM) + */ +unsigned +sxe_jitson_stack_expand(struct sxe_jitson_stack *stack, unsigned more) +{ + unsigned expanded = stack->count + more; + + if (expanded > stack->maximum) { + unsigned new_maximum; + + if (expanded < JITSON_STACK_MAX_INCR) + new_maximum = ((expanded - 1) / stack->maximum + 1) * stack->maximum; + else + new_maximum = ((expanded - 1) / JITSON_STACK_MAX_INCR + 1) * JITSON_STACK_MAX_INCR; + + struct sxe_jitson *new_jitsons = MOCKERROR(MOCK_FAIL_STACK_EXPAND, NULL, ENOMEM, + kit_realloc(stack->jitsons, (size_t)new_maximum * sizeof(*stack->jitsons))); + + if (!new_jitsons) { + SXEL2(": Failed to expand the stack to %u jitsons from %u", new_maximum, stack->jitsons ? stack->maximum : 0); + return SXE_JITSON_STACK_ERROR; + } + + stack->maximum = new_maximum; + stack->jitsons = new_jitsons; // If the array moved, point current into the new one. + } + else if (!stack->jitsons && !(stack->jitsons = MOCKERROR(MOCK_FAIL_STACK_EXPAND_AFTER_GET, NULL, ENOMEM, + kit_malloc(((size_t)stack->maximum * sizeof(*stack->jitsons)))))) { + SXEL2(": Failed to allocate %u jitsons for the stack", stack->maximum); + return SXE_JITSON_STACK_ERROR; + } + + stack->count = expanded; + return expanded - more; +} + +/** + * Load a JSON string from a source, returning true on success, false on error + * See https://www.json.org/json-en.html + */ +bool +sxe_jitson_stack_load_string(struct sxe_jitson_stack *stack, struct sxe_jitson_source *source) +{ + struct sxe_jitson *jitson; + unsigned i, idx, space, unicode; + char c, utf8[4]; + + if (sxe_jitson_source_get_nonspace(source) != '"') { + errno = EINVAL; + return false; + } + + if ((idx = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) + return false; + + jitson = &stack->jitsons[idx]; + jitson->type = SXE_JITSON_TYPE_STRING; + jitson->len = 0; + space = sizeof(jitson->string); // Amount of space left in the current jitson + + while ((c = sxe_jitson_source_get_char(source)) != '"') { + if (c == '\0') { // No terminating " + errno = EINVAL; + goto ERROR; + } + + i = 1; // If its not UTF-8, it takes 1 byte + + if (c == '\\') + switch (c = sxe_jitson_source_get_char(source)) { + case '"': + case '\\': + case '/': + jitson->string[jitson->len++] = c; + break; + + case 'b': + jitson->string[jitson->len++] = '\b'; + break; + + case 'f': + jitson->string[jitson->len++] = '\f'; + break; + + case 'n': + jitson->string[jitson->len++] = '\n'; + break; + + case 'r': + jitson->string[jitson->len++] = '\r'; + break; + + case 't': + jitson->string[jitson->len++] = '\t'; + break; + + case 'u': + for (i = 0, unicode = 0; i < 4; i++) + switch (c = sxe_jitson_source_get_char(source)) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + unicode = (unicode << 4) + c - '0'; + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + unicode = (unicode << 4) + c - 'a' + 10; + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + unicode = (unicode << 4) + c - 'A' + 10; + break; + + default: + errno = EILSEQ; + goto ERROR; + } + + i = sxe_unicode_to_utf8(unicode, utf8); // Return is number of bytes in the utf8 string + jitson->string[jitson->len++] = utf8[0]; + break; + + default: + errno = EILSEQ; + goto ERROR; + } + else + jitson->string[jitson->len++] = c; + + if (--space < i) { // Used up a byte. If there's not enough space left including an extra byte for the trailing '\0' + if (sxe_jitson_stack_expand(stack, 1) == SXE_JITSON_STACK_ERROR) + goto ERROR; + + jitson = &stack->jitsons[idx]; // In case the jitsons were moved by realloc + space += sizeof(*jitson); // Got 16 more bytes of string space + } + + if (--i > 0) { // For UTF-8 strings > 1 character, copy the rest of the string + memcpy(&jitson->string[jitson->len], &utf8[1], i); + jitson->len += i; + space -= i; + } + } + + jitson->string[jitson->len] = '\0'; + return true; + +ERROR: + stack->count = idx; // Discard any data that this function added to the stack + return false; +} + +/** + * Duplicate a jitson value onto the stack + * + * @param stack to duplicate to + * @param idx the stack index where the duplicate is to be made + * @param value the jitson value to duplicate + * @param size the size (in jitsons) of the value or 0 to compute it + * + * @return false on out of memory expanding stack or if deep copying the value fails (usually implying out of memory) + */ +bool +sxe_jitson_stack_dup_at_index(struct sxe_jitson_stack *stack, unsigned idx, const struct sxe_jitson *value, unsigned size) +{ + size = size ?: sxe_jitson_size(value); + + if (idx + size > stack->count && sxe_jitson_stack_expand(stack, idx + size - stack->count) == SXE_JITSON_STACK_ERROR) + return false; + + memcpy(&stack->jitsons[idx], value, size * sizeof(*value)); + bool ret = sxe_jitson_clone(value, &stack->jitsons[idx]); // If the type requires a deep clone, do it + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_ALLOCED; // Clear the allocation flag if any + return ret; +} + +/* Character classes used by the parser + */ +#define INV 0 // Invalid (control character, non-ASCII character, etc.) +#define SYM 1 // A standalone symbol not used by JSON +#define QOT 2 // A quote-like character +#define DIG 4 // A decimal digit +#define ALP 5 // An alphabetic character, including _ and upper and lower case letters + +/* Map from char to character class. Characters special to JSON are often a class of their own. + */ +static char jitson_class[256] = { INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, SYM, '"', SYM, SYM, SYM, SYM, QOT, '(', ')', SYM, SYM, SYM, '-', SYM, SYM, + DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, SYM, SYM, SYM, SYM, SYM, SYM, + SYM, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, + ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, '[', SYM, ']', SYM, ALP, + QOT, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, + ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, ALP, '{', SYM, '}', SYM, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, + INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV }; + +/** + * Load a JSON onto a sxe-jitson stack. + * See https://www.json.org/json-en.html + * + * @param stack The stack to load onto + * @param source A sxe_jitson_source object + * + * @return true if the JSON was successfully parsed or false on error + * + * @note On error, any jitson values partially parsed onto the stack will be cleared. + */ +bool +sxe_jitson_stack_load_json(struct sxe_jitson_stack *stack, struct sxe_jitson_source *source) +{ + const struct sxe_jitson *jitson, *cast_arg = NULL; + const char *token; + char *endptr; + size_t len; + unsigned current, idx, previous; + bool is_uint; + char c; + + if ((c = sxe_jitson_source_peek_nonspace(source)) == '\0') { // Nothing but whitespace + errno = ENODATA; + return false; + } + + if ((idx = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) // Get an empty jitson + return false; + + stack->last = idx; // Keep track of the last value loaded on the stack + + switch (jitson_class[(unsigned char)c]) { + case '"': // It's a string + stack->count--; // Return the jitson just allocated. The load_string function will get it back. + return sxe_jitson_stack_load_string(stack, source); + + case '{': // It's an object + sxe_jitson_source_consume(source, 1); + stack->jitsons[idx].type = SXE_JITSON_TYPE_OBJECT; + stack->jitsons[idx].len = 0; + + if ((c = sxe_jitson_source_peek_nonspace(source)) == '}') { // If it's an empty object, return it + sxe_jitson_source_consume(source, 1); + stack->jitsons[idx].integer = 1; // Save the size in jitsons + return true; + } + + do { + if (c != '"') + goto INVALID; + + previous = stack->count; + + if (!sxe_jitson_stack_load_string(stack, source)) // Member name must be a string + goto ERROR; + + if (sxe_jitson_source_get_nonspace(source) != ':') + goto INVALID; + + if (!sxe_jitson_stack_load_json(stack, source)) // Value can be any JSON value + goto ERROR; + + stack->jitsons[idx].len++; + stack->jitsons[previous].type |= SXE_JITSON_TYPE_IS_KEY; + c = sxe_jitson_source_get_nonspace(source); + } while (c == ',' && (c = sxe_jitson_source_peek_nonspace(source))); + + if (c == '}') { + stack->jitsons[idx].integer = stack->count - idx; // Store the size = offset past the object + return true; + } + + goto INVALID; + + case '[': // It's an array + sxe_jitson_source_consume(source, 1); + stack->jitsons[idx].type = SXE_JITSON_TYPE_ARRAY; + stack->jitsons[idx].len = 0; + + if (source->flags & SXE_JITSON_FLAG_OPTIMIZE) // If optimization is turned on, detect the following properties + stack->jitsons[idx].type |= SXE_JITSON_TYPE_IS_ORD | SXE_JITSON_TYPE_IS_UNIF | SXE_JITSON_TYPE_IS_HOMO; + + if (sxe_jitson_source_peek_nonspace(source) == ']') { // If it's an empty array, return it + sxe_jitson_source_consume(source, 1); + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_IS_ORD; // Empty arrays are not considered ordered + stack->jitsons[idx].integer = 1; // Offset past the empty array (not used if optimized) + return true; + } + + do { + previous = stack->last; + current = stack->count; // Index of JSON value about to be loaded + + if (!sxe_jitson_stack_load_json(stack, source)) // Value can be any JSON value + goto ERROR; + + /* If optimization is enabled and there's at least one element already in the array. + */ + if ((source->flags & SXE_JITSON_FLAG_OPTIMIZE) && stack->jitsons[idx].len > 0) { + /* If the array is currently ordered and the previous element is greater than the current one or they can't be + * compared, clear the ordered flag. + */ + if ((stack->jitsons[idx].type & SXE_JITSON_TYPE_IS_ORD) + && sxe_jitson_cmp(&stack->jitsons[previous], &stack->jitsons[current]) > 0) + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_IS_ORD; + + /* If the array is currently uniform and the previous element's size differs from the current one, clear the + * uniform flag. + */ + if ((stack->jitsons[idx].type & SXE_JITSON_TYPE_IS_UNIF) && (current - previous) != (stack->count - current)) + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_IS_UNIF; + + /* If the array is currently homogenous and the previous element's type differs from the current one, clear the + * homogenous flag. + */ + if ((stack->jitsons[idx].type & SXE_JITSON_TYPE_IS_HOMO) + && stack->jitsons[previous].type != stack->jitsons[current].type) + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_IS_HOMO; + } + + stack->jitsons[idx].len++; + } while ((c = sxe_jitson_source_get_nonspace(source)) == ','); + + if (c != ']') + goto INVALID; + + if (stack->jitsons[idx].len <= 1) // Arrays with a single element are not considered ordered + stack->jitsons[idx].type &= ~SXE_JITSON_TYPE_IS_ORD; + + if (stack->jitsons[idx].len && (stack->jitsons[idx].type & SXE_JITSON_TYPE_IS_UNIF)) { + stack->jitsons[idx].uniform.size = sizeof(struct sxe_jitson) * (stack->count - (idx + 1)) / stack->jitsons[idx].len; + stack->jitsons[idx].uniform.type = stack->jitsons[idx].type & SXE_JITSON_TYPE_IS_HOMO + ? stack->jitsons[stack->open].type : SXE_JITSON_TYPE_INVALID; // Mixed list + } + else + stack->jitsons[idx].integer = stack->count - idx; // Store the offset past the object + + return true; + + case '-': + case DIG: + if ((token = sxe_jitson_source_get_number(source, &len, &is_uint)) == NULL) + goto ERROR; + + if (is_uint) { + stack->jitsons[idx].type = SXE_JITSON_TYPE_NUMBER | SXE_JITSON_TYPE_IS_UINT; + stack->jitsons[idx].integer = strtoul(token, &endptr, + sxe_jitson_source_get_flags(source) & SXE_JITSON_FLAG_ALLOW_HEX ? 0 : 10); + SXEA6(endptr - token == (ptrdiff_t)len, "strtoul failed to parse '%.*s'", (int)len, token); + } else { + stack->jitsons[idx].type = SXE_JITSON_TYPE_NUMBER; + stack->jitsons[idx].number = strtod(token, &endptr); + SXEA6(endptr - token == (ptrdiff_t)len, "strtod failed to parse '%.*s'", (int)len, token); + } + + return true; + + case ALP: + token = sxe_jitson_source_get_identifier(source, &len); + + if ((jitson = sxe_jitson_const_get(sxe_jitson_source_get_flags(source), token, len))) { + if (jitson->type == sxe_jitson_const_type_cast) { // If it's a cast + struct sxe_jitson_stack iou; + + if (sxe_jitson_source_get_nonspace(source) != '(') { + SXEL2("Expected '(' after type name %.*s", (int)len, token); + goto ERROR; + } + + stack->count--; // Return the allocated jitson to the stack + sxe_jitson_stack_borrow(stack, &iou); + + if (sxe_jitson_stack_load_json(stack, source)) + cast_arg = sxe_jitson_stack_get_jitson(stack); + + sxe_jitson_stack_return(stack, &iou); + + if (!cast_arg) { + SXEL2("Expected JSON value after %.*s", (int)len, token); + goto ERROR; + } + + if (sxe_jitson_source_get_nonspace(source) != ')') { + SXEL2("Expected ')' after JSON value that follows type name %.*s", (int)len, token); + goto ERROR; + } + + if (!(*jitson->castfunc)(stack, cast_arg)) + goto ERROR; + + sxe_jitson_free(cast_arg); + return true; + } + + return sxe_jitson_stack_dup_at_index(stack, idx, jitson, 0); // Duplicate the constant's value + } + + if (sxe_jitson_stack_push_ident && (sxe_jitson_source_get_flags(source) & SXE_JITSON_FLAG_ALLOW_IDENTS)) { + stack->count--; // Return the allocated jitson to the stack + + if (!sxe_jitson_stack_push_ident(stack, token, len)) + goto ERROR; + + return true; + } + +#if SXE_DEBUG + if (sxe_jitson_source_get_flags(source) & SXE_JITSON_FLAG_ALLOW_CONSTS) + SXEL6(": Identifier '%.*s' is neither a JSON keyword nor a registered constant", (int)len, token); + else + SXEL6(": Identifier '%.*s' is not a JSON keyword", (int)len, token); +#endif + + __FALLTHROUGH; + default: + break; + } + +INVALID: + errno = EINVAL; + +ERROR: + sxe_jitson_free(cast_arg); + stack->count = idx; // Discard any data that this function added to the stack + return false; +} + +/** + * Parse a JSON from a string onto a sxe-jitson stack. + * + * @return Pointer into the json string to the character after the JSON parsed or NULL on error + * + * @note On error, any jitson values partially parsed onto the stack will be cleared. + */ +const char * +sxe_jitson_stack_parse_json(struct sxe_jitson_stack *stack, const char *json) +{ + struct sxe_jitson_source source; + + sxe_jitson_source_from_string(&source, json, sxe_jitson_flags); + + if (!sxe_jitson_stack_load_json(stack, &source)) + return NULL; + + return json + sxe_jitson_source_get_consumed(&source); +} + +/** + * Add or prepare to add a value to a collection + * + * @param stack The stack on which the collection is being constructed + * @param size Size in jitsons of the element, or 0 if the value will be added by the caller and its size is unknown + * @param elem_type The type of the element being added + * @param element Pointer to the element to add (required for _TYPE_MK_SORT) or NULL if the caller will add the value + * + * @return The index of the new element or SXE_JITSON_STACK_ERROR on error + * + * @note This function can be called by types that are constructing internal objects, but use with care. To facilitate this, + * if a size is passed but element is NULL, this function is assumed to be called after adding the element. + */ +unsigned +sxe_jitson_stack_add_value(struct sxe_jitson_stack *stack, unsigned size, uint32_t elem_type, const struct sxe_jitson *element) +{ + SXEA1(stack->open, "Can't add a value when there is no array or object under construction"); + SXEA6(size || !element, "Can't copy an element if the size is not known"); + + unsigned idx, len; + unsigned collection = stack->open - 1; // Can't use a pointer in case the stack is moved + unsigned type = stack->jitsons[collection].type; + bool is_array = (type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_ARRAY; + + SXEA1(is_array || (type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_OBJECT, "Values can only be added to arrays or objects"); + SXEA1(is_array || stack->jitsons[collection].partial.no_value, "Must add member name to an object before adding a value"); + + if ((len = stack->jitsons[collection].len)) { // If there's at least one element + if ((type & SXE_JITSON_TYPE_IS_UNIF) // If all elements so far are of uniform size + && size != (stack->count - (element ? 0 : size) - (collection + 1)) / stack->jitsons[collection].len) { // No longer + SXEA1(!(type & SXE_JITSON_TYPE_MK_SORT), "Insertion sorted lists must have uniformly sized elements"); + stack->jitsons[collection].type &= ~SXE_JITSON_TYPE_IS_UNIF; + } + + if ((type & SXE_JITSON_TYPE_IS_HOMO) && (stack->jitsons[collection + 1].type & SXE_JITSON_TYPE_MASK) != elem_type) + stack->jitsons[collection].type &= ~SXE_JITSON_TYPE_IS_HOMO; // Type changed + } + + /* Keep track of the last value just added or about to be added + */ + if (element) { + if ((stack->last = sxe_jitson_stack_expand(stack, size)) == SXE_JITSON_STACK_ERROR) + return SXE_JITSON_STACK_ERROR; + } + else + stack->last = stack->count - size; + + idx = stack->last; + + /* If there's at least one element already in the array + */ + if (len) { + if (type & SXE_JITSON_TYPE_MK_SORT) { // If insertion sorting is desired + SXEA6(element, "Elements must be provided for insertion sorted lists"); + + if (sxe_jitson_cmp(&stack->jitsons[idx - size], element) > 0) { // Insertion is required + struct sxe_sortedarray_class elem_class; + unsigned i; + bool match; + + elem_class.size = size * sizeof(*element); + elem_class.keyoffset = 0; + elem_class.cmp = (int (*)(const void *, const void *))sxe_jitson_cmp; + elem_class.fmt = NULL; + elem_class.flags = SXE_SORTEDARRAY_CMP_CAN_FAIL; + + i = sxe_sortedarray_find(&elem_class, &stack->jitsons[collection + 1], len, element, &match); + SXEA6(i < len, "If insertion is required, array index must be found and < len"); + idx = collection + 1 + size * i; // Turn the array index into a stack index + memmove(&stack->jitsons[idx + 1], &stack->jitsons[idx], (len - i) * size * sizeof(*element)); + } + } + else if (type & SXE_JITSON_TYPE_IS_ORD) { + if (element) { + if (sxe_jitson_cmp(&stack->jitsons[stack->jitsons[collection].partial.last], element) > 0) + stack->jitsons[collection].type &= ~SXE_JITSON_TYPE_IS_ORD; + } + else if (size) { + if (sxe_jitson_cmp(&stack->jitsons[stack->jitsons[collection].partial.last], &stack->jitsons[stack->last]) > 0) + stack->jitsons[collection].type &= ~SXE_JITSON_TYPE_IS_ORD; + } + } + } + + if (element) + memcpy(&stack->jitsons[idx], element, size * sizeof(*element)); + + if (is_array) { + if (size) + stack->jitsons[collection].partial.last = stack->last; + } + else + stack->jitsons[collection].partial.no_value = false; + + stack->jitsons[collection].len++; + return idx; +} + +/** + * Check optimization after adding a variably sized value (copied string or array) to an array + * + * @param stack The stack the array is being constructed on + * @param flags The flags in effect, needed in case optimization is disabled + * + * @return true; always succeeds, but the return allows chained construction + * + * @note This function can be called by types that are constructing internal objects, but use with care. + */ +bool +sxe_jitson_stack_value_added(struct sxe_jitson_stack *stack, unsigned flags) +{ + struct sxe_jitson *collection = &stack->jitsons[stack->open - 1]; // Safe to use a pointer because no stack expansion + + /* If optimization is enabled and the collection is an array + */ + if (flags & SXE_JITSON_FLAG_OPTIMIZE && (collection->type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_ARRAY) { + if (collection->len > 1) { // If there are at least 2 elements + /* If the array is currently ordered and the previous element is greater than the current one or they can't be + * compared, clear the ordered flag. + */ + if ((collection->type & SXE_JITSON_TYPE_IS_ORD) + && sxe_jitson_cmp(&stack->jitsons[collection->partial.last], &stack->jitsons[stack->last]) > 0) + collection->type &= ~SXE_JITSON_TYPE_IS_ORD; + } + + collection->partial.last = stack->last; + } + + return true; +} + +/** + * Begin construction of an object on a stack + * + * @param type SXE_JITSON_TYPE_OBJECT, SXE_JITSON_TYPE_ARRAY, or SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT + * + * @return true on success, false on allocation failure + * + * @note SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT allows you to construct a sorted array out of order. This makes + * construction inefficient, but can speed evalation of membership and intersection operations. Elements must be of a + * single orderable type and must be of uniform fixed size, or additions will fail. + */ +bool +sxe_jitson_stack_open_collection(struct sxe_jitson_stack *stack, uint32_t type) +{ + unsigned idx; + uint32_t type_check = type & ~SXE_JITSON_TYPE_IS_LOCAL; + + SXE_USED_IN_DEBUG(type_check); + + SXEA6(type_check == SXE_JITSON_TYPE_ARRAY || type_check == SXE_JITSON_TYPE_OBJECT + || type_check == (SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT), "Only arrays and objects can be constructed"); + + if (stack->open) // If there's already an open collection, prepare to add the new collection as a value + sxe_jitson_stack_add_value(stack, 0, type, NULL); + + if ((idx = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) + return false; /* COVERAGE EXCLUSION: Out of memory condition */ + + if ((type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_ARRAY // If it's an array and either optimized or sorted + && ((sxe_jitson_flags & SXE_JITSON_FLAG_OPTIMIZE) || (type & SXE_JITSON_TYPE_MK_SORT))) + stack->jitsons[idx].type = type | SXE_JITSON_TYPE_IS_ORD | SXE_JITSON_TYPE_IS_UNIF | SXE_JITSON_TYPE_IS_HOMO; + else + stack->jitsons[idx].type = type; + + stack->jitsons[idx].len = 0; + stack->jitsons[idx].partial.no_value = false; // If it's an array, sets last to 0, which is harmless + stack->jitsons[idx].partial.collection = stack->open; + stack->open = idx + 1; + return true; +} + +/** + * Add a string to the stack. + * + * @param stack The jitson stack + * @param string The string + * @param type SXE_JITSON_TYPE_IS_COPY, SXE_JITSON_TYPE_IS_REF, or SXE_JITSON_TYPE_IS_OWN + * + * @return true on success, false on out of memory (ENOMEM) or copied string too long (ENAMETOOLONG) + * + * @note When called internally, type may include SXE_JITSON_TYPE_IS_KEY + */ +bool +sxe_jitson_stack_push_string(struct sxe_jitson_stack *stack, const char *string, uint32_t type) +{ + unsigned idx, unwind; + + if ((unwind = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) + return false; /* COVERAGE EXCLUSION: Out of memory condition */ + + type = type & SXE_JITSON_TYPE_IS_OWN ? SXE_JITSON_TYPE_IS_REF | type : type; + stack->jitsons[unwind].type = SXE_JITSON_TYPE_STRING | type; + + if (type & SXE_JITSON_TYPE_IS_REF) { // Not a copy (a reference, possibly giving ownership to the object) + stack->jitsons[unwind].reference = string; + stack->jitsons[unwind].len = 0; + return true; + } + + size_t len = strlen(string); // SonarQube False Positive + + if ((uint32_t)len != len) { + errno = ENAMETOOLONG; /* COVERAGE EXCLUSION: Copied string > 4294967295 characters */ + stack->count = unwind; /* COVERAGE EXCLUSION: Copied string > 4294967295 characters */ + return false; /* COVERAGE EXCLUSION: Copied string > 4294967295 characters */ + } + + stack->jitsons[unwind].len = (uint32_t)len; + + if (len < SXE_JITSON_STRING_SIZE) { + memcpy(stack->jitsons[unwind].string, string, len + 1); + return true; + } + + memcpy(stack->jitsons[unwind].string, string, SXE_JITSON_STRING_SIZE); + + for (string += SXE_JITSON_STRING_SIZE, len -= SXE_JITSON_STRING_SIZE; ; len -= SXE_JITSON_TOKEN_SIZE) { + if ((idx = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) { + stack->count = unwind; + return false; + } + + if (len < SXE_JITSON_TOKEN_SIZE) { + memcpy(&stack->jitsons[idx], string, len + 1); + return true; + } + + memcpy(&stack->jitsons[idx], string, SXE_JITSON_TOKEN_SIZE); + string += SXE_JITSON_TOKEN_SIZE; + } + + return true; +} + +/** + * Add a string to the stack in reverse order + * + * @param stack The jitson stack + * @param string The string + * @param len The length of the string or 0 to compute it on the fly + * + * @return true on success, false on out of memory (ENOMEM) or string too long (ENAMETOOLONG) + */ +bool +sxe_jitson_stack_push_string_reversed(struct sxe_jitson_stack *stack, const char *string, size_t len) +{ + unsigned idx, i; + + len = len ?: strlen(string); + + if ((uint32_t)len != len) { + errno = ENAMETOOLONG; /* COVERAGE EXCLUSION: Copied string > 4294967295 characters */ + return false; /* COVERAGE EXCLUSION: Copied string > 4294967295 characters */ + } + + /* Length to number of jitson tokens needed: 0-7 -> 1, 8-23 -> 2, 24-39 -> 3, ... + */ + idx = sxe_jitson_stack_expand(stack, (len + 2 * SXE_JITSON_TOKEN_SIZE - SXE_JITSON_STRING_SIZE) / SXE_JITSON_TOKEN_SIZE); // SonarQube False Positive + + if (idx == SXE_JITSON_STACK_ERROR) + return false; /* COVERAGE EXCLUSION: Out of memory condition */ + + stack->jitsons[idx].type = SXE_JITSON_TYPE_STRING | SXE_JITSON_TYPE_IS_COPY | SXE_JITSON_TYPE_REVERSED; + stack->jitsons[idx].len = len; // SonarQube False Positive + + for (i = 0; i < len; i++) + stack->jitsons[idx].string[len - i - 1] = string[i]; + + stack->jitsons[idx].string[len] = '\0'; + return true; +} + +/** + * Add a member name to the object being constructed on the stack + * + * @param stack The jitson stack + * @param name The member name + * @param type SXE_JITSON_TYPE_IS_COPY, SXE_JITSON_TYPE_IS_REF, or SXE_JITSON_TYPE_IS_OWN + * + * @return true on success, false on out of memory (ENOMEM) or copied string too long (ENAMETOOLONG) + */ +bool +sxe_jitson_stack_add_member_name(struct sxe_jitson_stack *stack, const char *name, uint32_t type) +{ + unsigned object; + + SXEA1(stack->open, "Can't add a member name when there is no object under construction"); + SXEA1((stack->jitsons[object = stack->open - 1].type & ~SXE_JITSON_TYPE_IS_LOCAL) == SXE_JITSON_TYPE_OBJECT, "Member names can only be added to objects"); + SXEA1(!stack->jitsons[object].partial.no_value, "Member name already added without a value"); + SXEA1(!(type & ~(SXE_JITSON_TYPE_IS_REF | SXE_JITSON_TYPE_IS_OWN | SXE_JITSON_TYPE_IS_LOCAL)), "Unexected type flags 0x%x", (unsigned)type); + + stack->jitsons[object].partial.no_value = 1; // (uint8_t)true + return sxe_jitson_stack_push_string(stack, name, type | SXE_JITSON_TYPE_IS_KEY); +} + +/** + * Add a string to the object or array being constructed on the stack + * + * @param stack The jitson stack + * @param string The NUL terminated string + * @param type SXE_JITSON_TYPE_IS_COPY, SXE_JITSON_TYPE_IS_REF, or SXE_JITSON_TYPE_IS_OWN + * + * @return true on success, false on out of memory (ENOMEM) or copied string too long (ENAMETOOLONG) + */ +bool +sxe_jitson_stack_add_string(struct sxe_jitson_stack *stack, const char *string, uint32_t type) +{ + SXEA1(stack->open, "Can't add a value when there is no array or object under construction"); + SXEA1(!(type & ~(SXE_JITSON_TYPE_IS_REF | SXE_JITSON_TYPE_IS_OWN)), "Unexected type flags 0x%x", (unsigned)type); + type = type == SXE_JITSON_TYPE_IS_OWN ? SXE_JITSON_TYPE_IS_OWN | SXE_JITSON_TYPE_IS_REF : type; + + if (type & SXE_JITSON_TYPE_IS_REF) { // String references are fixed size, allowing sorted array construction + struct sxe_jitson string_ref; + + sxe_jitson_make_string_ref(&string_ref, string); + string_ref.type |= type & SXE_JITSON_TYPE_IS_OWN; + return sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_STRING, &string_ref) != SXE_JITSON_STACK_ERROR; + } + + if (sxe_jitson_stack_add_value(stack, 0, SXE_JITSON_TYPE_STRING, NULL) == SXE_JITSON_STACK_ERROR) + return false; /* COVERAGE EXCLUSION: Out of memory */ + + stack->last = stack->count; // Keep track of the last value loaded on the stack + + if (!sxe_jitson_stack_push_string(stack, string, type)) { + stack->jitsons[stack->open - 1].len--; // On error, remove the partial element added above in _add_value + return false; + } + + sxe_jitson_stack_value_added(stack, sxe_jitson_flags); + return true; +} + +/** + * Add a null value to the array or object being constructed on the stack + * + * @param stack The jitson stack + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_null(struct sxe_jitson_stack *stack) +{ + return sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_NULL, sxe_jitson_null) != SXE_JITSON_STACK_ERROR; +} + +/** + * Add a boolean value to the array or object being constructed on the stack + * + * @param stack The jitson stack + * @param boolean The boolean value + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_bool(struct sxe_jitson_stack *stack, bool boolean) +{ + return sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_BOOL, boolean ? sxe_jitson_true : sxe_jitson_false) + != SXE_JITSON_STACK_ERROR; +} + +/** + * Add a number to the array or object being constructed on the stack + * + * @param stack The jitson stack + * @param number The numeric value + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_number(struct sxe_jitson_stack *stack, double number) +{ + struct sxe_jitson value; + + sxe_jitson_make_number(&value, number); + + return sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_NUMBER, &value) != SXE_JITSON_STACK_ERROR; +} + +/** + * Add an unsigned integer to the array or object being constructed on the stack + * + * @param stack The jitson stack + * @param uint The unsigned integer value + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_uint(struct sxe_jitson_stack *stack, uint64_t uint) +{ + struct sxe_jitson value; + + sxe_jitson_make_uint(&value, uint); + + return sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_NUMBER, &value) != SXE_JITSON_STACK_ERROR; +} + +/** + * Add a reference value to the array or object being constructed on the stack + * + * @param stack The jitson stack + * @param to The jitson to add a reference to + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_reference(struct sxe_jitson_stack *stack, const struct sxe_jitson *to) +{ + struct sxe_jitson value; + + sxe_jitson_make_reference(&value, to); + + return sxe_jitson_stack_add_value(stack, 1, to->type, &value) != SXE_JITSON_STACK_ERROR; +} + +/** + * Add a duplicate of a value to the array or object being constructed on the stack + * + * @param stack The jitson stack + * @param to The jitson to add a duplicate of + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_dup(struct sxe_jitson_stack *stack, const struct sxe_jitson *value) +{ + unsigned size = sxe_jitson_size(value); + + if (!sxe_jitson_stack_dup_at_index(stack, stack->count, value, size)) + return false; + + SXEA1(sxe_jitson_stack_add_value(stack, size, value->type, NULL) != SXE_JITSON_STACK_ERROR, "Can't fail in this case"); + sxe_jitson_stack_value_added(stack, sxe_jitson_flags); + return true; +} + +/** + * Add duplicates of all members of an object to the object being constructed on the stack + * + * @param stack The jitson stack + * @param jitson The object whose members are to be duplicated + * + * @return true on success, false if memory allocation failed + */ +bool +sxe_jitson_stack_add_dup_members(struct sxe_jitson_stack *stack, const struct sxe_jitson *jitson) +{ + unsigned len, object = stack->open - 1; + uint32_t idx, size; + + SXEA1(stack->open, "Can't add members when no object is under construction"); + SXEA1((stack->jitsons[object].type & ~SXE_JITSON_TYPE_IS_LOCAL) == SXE_JITSON_TYPE_OBJECT, "Members can only be added to an object"); + SXEA1(!stack->jitsons[object].partial.no_value, "Member name already added without a value"); + + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; + SXEA1((jitson->type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_OBJECT, "Can't add members from JSON type %s", + sxe_jitson_get_type_as_str(jitson)); + + if ((len = jitson->len) == 0) + return true; + + size = sxe_jitson_size(jitson) - 1; // Don't include the object itself + + if ((idx = sxe_jitson_stack_expand(stack, size)) == SXE_JITSON_STACK_ERROR) + return false; + + memcpy(&stack->jitsons[idx], jitson + 1, size * sizeof(*jitson)); + + if (!sxe_jitson_object_clone_members(jitson, &stack->jitsons[idx - 1], len)) + return false; + + stack->jitsons[object].len += len; + return true; +} + +/** + * Finish construction of an object or array on a stack + * + * @note Aborts if the object is not a collection under construction, has an incomplete nested object, or is an object and has a + * member name without a matching value. + * + * @return true. This is returned to allow further calls (e.g. sxe_jitson_stack_get_jitson) to be chained with &&. + */ +bool +sxe_jitson_stack_close_collection(struct sxe_jitson_stack *stack) +{ + SXEA1(stack->open, "There must be an open collection on the stack"); + + unsigned idx = stack->open - 1; + struct sxe_jitson *collection = &stack->jitsons[idx]; // Safe to use a pointer because no stack expansion is done + + SXEA1(collection->type != SXE_JITSON_TYPE_OBJECT || !collection->partial.no_value, + "Index %u is an object with a member name with no value", idx); + SXEA1(collection->partial.collection < stack->open, "Previous collection is not < the one being closed"); + + stack->open = collection->partial.collection; + + if (collection->len && (collection->type & (SXE_JITSON_TYPE_MASK | SXE_JITSON_TYPE_IS_UNIF)) + == (SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_IS_UNIF)) { + collection->uniform.size = sizeof(struct sxe_jitson) * (stack->count - (idx + 1)) / collection->len; + collection->uniform.type = collection->type & SXE_JITSON_TYPE_IS_HOMO ? (SXE_JITSON_TYPE_MASK & collection[1].type) + : SXE_JITSON_TYPE_INVALID; // Mixed list + } + else + collection->integer = stack->count - idx; // Store the offset past the object or array + + if (stack->open) + sxe_jitson_stack_value_added(stack, sxe_jitson_flags); + + return true; +} + +/** + * Push a concatenation of two arrays to the stack. + * + * @param stack The jitson stack + * @param array1 The first array + * @param array2 The second array + * @param type SXE_JITSON_TYPE_REF if contatination refers to two arrays, SXE_JITSON_TYPE_OWN if it owns their storage + * + * @return true on success, false on out of memory (ENOMEM) or if concatenation's length exceeds 4294967295 (EOVERFLOW) + */ +bool +sxe_jitson_stack_push_concat_array(struct sxe_jitson_stack *stack, const struct sxe_jitson *array1, + const struct sxe_jitson *array2, uint32_t type) +{ + size_t len; + unsigned idx; + + SXEA6(sxe_jitson_get_type(array1) == SXE_JITSON_TYPE_ARRAY && sxe_jitson_get_type(array2) == SXE_JITSON_TYPE_ARRAY, + "Array arguments must be arrays, not %s and %s", sxe_jitson_get_type_as_str(array1), sxe_jitson_get_type_as_str(array2)); + SXEA6(type == SXE_JITSON_TYPE_IS_REF || type == SXE_JITSON_TYPE_IS_OWN, + "Type must be SXE_JITSON_TYPE_IS_REF or SXE_JITSON_TYPE_IS_OWN, not %"PRIu32, type); + + len = sxe_jitson_len_array(array1) + sxe_jitson_len_array(array2); + + if (len != (uint32_t)len) { + SXEL2(": concatenated array len %zu exceeds %u", len, ~0U); + errno = EOVERFLOW; + return false; + } + + if ((idx = sxe_jitson_stack_expand(stack, 2)) == SXE_JITSON_STACK_ERROR) + return false; /* COVERAGE EXCLUSION: Out of memory condition */ + + stack->jitsons[idx].type = SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_IS_REF | type; + stack->jitsons[idx].len = len; // SonarQube False Positive + (&stack->jitsons[idx].reference)[0] = array1; + (&stack->jitsons[idx].reference)[1] = array2; // SonarQube False Positive + return true; +} + +#if SXE_DEBUG + +/* Recursively format a possibly partial collection on the stack using a string factory + */ +static unsigned +sxe_jitson_stack_to_str_iterator(const struct sxe_jitson_stack *stack, unsigned i, struct sxe_factory *factory) +{ + char *text; + size_t len; + unsigned j; + bool is_object = false; + + if (sxe_jitson_get_type(&stack->jitsons[i]) == SXE_JITSON_TYPE_ARRAY) + sxe_factory_add(factory, "[", 1); + else if (sxe_jitson_get_type(&stack->jitsons[i]) == SXE_JITSON_TYPE_OBJECT) { + sxe_factory_add(factory, "{", 1); + is_object = true; + } + else { + text = sxe_jitson_to_json(&stack->jitsons[i], &len); + sxe_factory_add(factory, text, len); + kit_free(text); + return i + sxe_jitson_size(&stack->jitsons[i]); + } + + unsigned count = stack->jitsons[i].len; + + for (i++, j = 0; j < count; j++) { // For each element or member of the collection + if (j > 0) + sxe_factory_add(factory, ",", 1); + + if (i >= stack->count) + return i; + + if (is_object) { + SXEA1(sxe_jitson_get_type(&stack->jitsons[i]) == SXE_JITSON_TYPE_STRING + && (stack->jitsons[i].type & SXE_JITSON_TYPE_IS_KEY), "Object members must be preceded by a key"); + i = sxe_jitson_stack_to_str_iterator(stack, i, factory); + sxe_factory_add(factory, ":", 1); + + if (i >= stack->count) + return i; + } + + if ((i = sxe_jitson_stack_to_str_iterator(stack, i, factory)) >= stack->count) + return i; + } + + if (is_object) + sxe_factory_add(factory, "}", 1); + else + sxe_factory_add(factory, "]", 1); + + return i; +} + +/* Diagnostic function that dumps a stack as a string in a static per thread buffer which is overwritten on the next call + */ +const char * +sxe_jitson_stack_to_str(const struct sxe_jitson_stack *stack) +{ + static __thread char buf[4096]; + struct sxe_factory factory; + char *text; + size_t len; + + sxe_factory_alloc_make(&factory, 0, 0); + sxe_jitson_stack_to_str_iterator(stack, 0, &factory); + text = sxe_factory_remove(&factory, &len); + + if (len < sizeof(buf)) { + memcpy(buf, text, len); + buf[len] = '\0'; + } + else { + memcpy(buf, text, sizeof(buf) - 4); + memcpy(&buf[sizeof(buf) - 4], "...", 4); + } + + kit_free(text); + return buf; +} + +#endif diff --git a/lib-sxe-jitson/sxe-jitson-type.c b/lib-sxe-jitson/sxe-jitson-type.c new file mode 100644 index 0000000..16fbb55 --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson-type.c @@ -0,0 +1,979 @@ +/* Copyright (c) 2022 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-jitson.h" +#include "sxe-jitson-const.h" +#include "sxe-jitson-oper.h" +#include "sxe-log.h" +#include "sxe-util.h" + +struct sxe_jitson_type { + const char *name; + const void *extra; // Allow the creator of a type to add extra information + void (*free)( struct sxe_jitson *); // Free any memory allocated to the value (e.g. indices) + int (*test)( const struct sxe_jitson *); // Return true (1), false (0) or error (-1) + uint32_t (*size)( const struct sxe_jitson *); // The size in "struct sxe_jitson"s of the value. + size_t (*len)( const struct sxe_jitson *); // The logical length (strlen, number of elements, number of members) + bool (*clone)( const struct sxe_jitson *, struct sxe_jitson *); // Clone (deep copy) a value's data + char * (*build_json)(const struct sxe_jitson *, struct sxe_factory *); // Format a value + int (*cmp)( const struct sxe_jitson *, const struct sxe_jitson *); // Compare two values <, =, or > + int (*eq)( const struct sxe_jitson *, const struct sxe_jitson *); // Equal (1) or not (0), -1 on error +}; + +const struct sxe_jitson *sxe_jitson_false = NULL; +const struct sxe_jitson *sxe_jitson_true = NULL; +const struct sxe_jitson *sxe_jitson_null = NULL; + +uint32_t sxe_jitson_flags = 0; // Flags passed at initialization + +static uint32_t type_count = 0; +static struct sxe_jitson_type *jitson_types = NULL; +static struct sxe_factory type_factory[1]; + +/** + * Register a type. + * + * @param name The name of the type. e.g. "bool" is the name of the boolean type whose values are true and false + * @param free Free a value of this type. e.g. sxe_jitson_free_base is often a good default + * @param test Test a value of this type, returning SXE_JITSON_TEST_TRUE, SXE_JITSON_TEST_FALSE, or SXE_JITSON_TEST_ERROR. + * @param size Return the size of a value of this type. e.g. sxe_jitson_size_1 for types guaranteed to fit in one jitson + * @param len Return the length of a value. e.g. sxe_jitson_type_len or NULL for types that don't have a length. + * @param clone Deep copy a value of this type. NULL if the type can be copied without extra work (e.g. numbers, bools) + * @param build_json Convert a value back into JSON using the given factory + * @param cmp Determine whether a value is <, =, or > another value. NULL for types that can't be ordered. + * @param eq Determine whether two values are equal. NULL to use cmp or if no comparisons are supported. + */ +uint32_t +sxe_jitson_type_register(const char *name, + void (*free)( struct sxe_jitson *), + int (*test)( const struct sxe_jitson *), + uint32_t (*size)( const struct sxe_jitson *), + size_t (*len)( const struct sxe_jitson *), + bool (*clone)( const struct sxe_jitson *, struct sxe_jitson *), + char * (*build_json)(const struct sxe_jitson *, struct sxe_factory *), + int (*cmp)( const struct sxe_jitson *, const struct sxe_jitson *), + int (*eq)( const struct sxe_jitson *, const struct sxe_jitson *)) +{ + uint32_t type = type_count++; + + SXEA1(jitson_types, ":sxe_jitson_type_init has not been called"); + SXEA1(jitson_types = (struct sxe_jitson_type *)sxe_factory_reserve(type_factory, type_count * sizeof(*jitson_types)), + "Couldn't allocate %u jitson types", type_count); + jitson_types[type].name = name; + jitson_types[type].extra = NULL; + jitson_types[type].free = free; + jitson_types[type].test = test; + jitson_types[type].size = size; + jitson_types[type].len = len; + jitson_types[type].clone = clone; + jitson_types[type].build_json = build_json; + jitson_types[type].cmp = cmp; + jitson_types[type].eq = eq; + + sxe_jitson_oper_increase_num_types(type); + return type; +} + +/* Get extra type info set below + */ +const void * +sxe_jitson_type_get_extra(unsigned type) +{ + return jitson_types[type].extra; +} + +/* Set extra type info used only by the caller + */ +void +sxe_jitson_type_set_extra(unsigned type, const void *extra) +{ + jitson_types[type].extra = extra; +} + +const char * +sxe_jitson_type_to_str(unsigned type) +{ + if (type > type_count) { + errno = ERANGE; + return "ERROR"; + } + + return jitson_types[type].name; +} + +unsigned +sxe_jitson_get_type(const struct sxe_jitson *jitson) +{ + if (sxe_jitson_is_reference(jitson)) + return sxe_jitson_get_type(jitson->jitref); + + return jitson->type & SXE_JITSON_TYPE_MASK; +} + +/* Most types can use this as a free function, and those that don't should call it after any special work they do. + */ +void +sxe_jitson_free_base(struct sxe_jitson *jitson) +{ + if (jitson->type & SXE_JITSON_TYPE_IS_OWN) { // If this jitson contains a reference to a value or index that it owns + // Atomically nullify the reference in case there is a race, though calling code should ensure this is not the case + void *reference = __sync_lock_test_and_set(&jitson->index, NULL); + kit_free(reference); + } + + jitson->type = SXE_JITSON_TYPE_INVALID; +} + +uint32_t +sxe_jitson_size_1(const struct sxe_jitson *jitson) +{ + SXE_UNUSED_PARAMETER(jitson); + return 1; +} + +static int +sxe_jitson_null_test(const struct sxe_jitson *jitson) +{ + SXE_UNUSED_PARAMETER(jitson); + return SXE_JITSON_TEST_FALSE; +} + +static char * +sxe_jitson_null_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + SXE_UNUSED_PARAMETER(jitson); + sxe_factory_add(factory, "null", 4); + return sxe_factory_look(factory, NULL); +} + +static int +sxe_jitson_null_eq(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + SXE_UNUSED_PARAMETER(left); + SXE_UNUSED_PARAMETER(right); + return SXE_JITSON_TEST_TRUE; // There's only one value, so nulls always equal eachother +} + +uint32_t +sxe_jitson_type_register_null(void) +{ + return sxe_jitson_type_register("null", sxe_jitson_free_base, sxe_jitson_null_test, sxe_jitson_size_1, NULL, NULL, + sxe_jitson_null_build_json, NULL, sxe_jitson_null_eq); +} + +static int +sxe_jitson_bool_test(const struct sxe_jitson *jitson) +{ + return jitson->boolean ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; +} + +static char * +sxe_jitson_bool_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + if (jitson->boolean) + sxe_factory_add(factory, "true", 4); + else + sxe_factory_add(factory, "false", 5); + + return sxe_factory_look(factory, NULL); +} + +static int +sxe_jitson_bool_eq(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + return left->boolean == right->boolean ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; +} + +uint32_t +sxe_jitson_type_register_bool(void) +{ + return sxe_jitson_type_register("bool", sxe_jitson_free_base, sxe_jitson_bool_test, sxe_jitson_size_1, NULL, NULL, + sxe_jitson_bool_build_json, NULL, sxe_jitson_bool_eq); +} + +static int +sxe_jitson_number_test(const struct sxe_jitson *jitson) +{ + return jitson->number != 0.0 ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; +} + +#define NUMBER_MAX_LEN 24 // Enough space for the largest double and the largest uint64_t + +static char * +sxe_jitson_number_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + char *ret; + unsigned len; + + if ((ret = sxe_factory_reserve(factory, NUMBER_MAX_LEN)) == NULL) + return NULL; + + if (jitson->type & SXE_JITSON_TYPE_IS_UINT) { + len = snprintf(ret, NUMBER_MAX_LEN + 1, "%"PRIu64, sxe_jitson_get_uint(jitson)); + SXEA6(len <= NUMBER_MAX_LEN, "As a string, numeric value %"PRIu64" is more than %u characters long", + sxe_jitson_get_uint(jitson), NUMBER_MAX_LEN); + } else { + /* The format %.17G comes from: + * https://stackoverflow.com/questions/16839658/printf-width-specifier-to-maintain-precision-of-floating-point-value + * + * Using .17G reveals rounding errors in the encoding of fractions. For example: + * got: "pi":3.1415899999999999,"number":1.1415900000000001 + * expected: "pi":3.14159, "number":1.14159 + * + * Reducing the format to .16G rounds the numbers back to what was orinally parsed. + */ + len = snprintf(ret, NUMBER_MAX_LEN + 1, "%.16G", sxe_jitson_get_number(jitson)); + SXEA6(len <= NUMBER_MAX_LEN, "As a string, numeric value %.16G is more than %u characters long", + sxe_jitson_get_number(jitson), NUMBER_MAX_LEN); + } + + sxe_factory_commit(factory, len); + return sxe_factory_look(factory, NULL); +} + +static int +sxe_jitson_number_cmp(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + uint64_t cast_uint; + double cast_float; + + if (left->type & SXE_JITSON_TYPE_IS_UINT) { + if (right->type & SXE_JITSON_TYPE_IS_UINT) + return sxe_uint64_cmp(left->integer, right->integer); + + if ((double)(cast_uint = (uint64_t)right->number) == right->number) + return sxe_uint64_cmp(left->integer, cast_uint); + + if ((uint64_t)(cast_float = (double)left->integer) == left->integer) + return cast_float < right->number ? -1 : cast_float == right->number ? 0 : 1; + + SXEL2(": Cannot compare %"PRIu64" with %.16g", left->integer, right->number); + return SXE_JITSON_CMP_ERROR; + } + + if (!(right->type & SXE_JITSON_TYPE_IS_UINT)) + return left->number < right->number ? -1 : left->number == right->number ? 0 : 1; + + if ((double)(cast_uint = (uint64_t)left->number) == left->number) + return sxe_uint64_cmp(cast_uint, right->integer); + + if ((uint64_t)(cast_float = (double)right->integer) == right->integer) + return left->number < cast_float ? -1 : left->number == cast_float ? 0 : 1; + + SXEL2(": Cannot compare %.16g with %"PRIu64, left->number, right->integer); + return SXE_JITSON_CMP_ERROR; +} + +uint32_t +sxe_jitson_type_register_number(void) +{ + return sxe_jitson_type_register("number", sxe_jitson_free_base, sxe_jitson_number_test, sxe_jitson_size_1, NULL, NULL, + sxe_jitson_number_build_json, sxe_jitson_number_cmp, NULL); +} + +size_t +sxe_jitson_string_len(const struct sxe_jitson *jitson) +{ + size_t len; + + if (jitson->type & SXE_JITSON_TYPE_IS_KEY) // Object keys use the len field to store a link offset + return jitson->type & SXE_JITSON_TYPE_IS_REF ? strlen(jitson->reference) : strlen(jitson->string); // SonarQube False Positive + + if (jitson->len == 0 && (jitson->type & SXE_JITSON_TYPE_IS_REF)) { + len = strlen(jitson->reference); // SonarQube False Positive + + if ((uint32_t)len == len) // Can't cache the length if > 4294967295 + /* This is thread safe because the assignment is atomic and the referenced string is immutable + */ + ((struct sxe_jitson *)(uintptr_t)jitson)->len = (uint32_t)len; + + return len; + } + + return jitson->len; +} + +static uint32_t +sxe_jitson_string_size(const struct sxe_jitson *jitson) +{ + if (jitson->type & SXE_JITSON_TYPE_IS_REF) + return 1; + + return 1 + + (uint32_t)((sxe_jitson_string_len(jitson) + SXE_JITSON_TOKEN_SIZE - SXE_JITSON_STRING_SIZE) / SXE_JITSON_TOKEN_SIZE); +} + +int +sxe_jitson_string_test(const struct sxe_jitson *jitson) +{ + if (jitson->type & SXE_JITSON_TYPE_IS_REF ? *(const uint8_t *)jitson->reference : jitson->len) + return SXE_JITSON_TEST_TRUE; + else + return SXE_JITSON_TEST_FALSE; +} + +static bool +sxe_jitson_string_clone(const struct sxe_jitson *jitson, struct sxe_jitson *clone) +{ + /* If the jitson owns the string it's refering to, it must be duplicated + */ + if ((jitson->type & SXE_JITSON_TYPE_IS_OWN) + && (clone->reference = MOCKERROR(MOCK_FAIL_STRING_CLONE, NULL, ENOMEM, kit_strdup(jitson->reference))) == NULL) { + SXEL2("Failed to duplicate a %zu byte string", strlen(jitson->string)); + return false; + } + + return true; +} + +static char * +sxe_jitson_string_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + const char *string; + char *buffer; + size_t len; + unsigned first, i; + + len = sxe_jitson_len(jitson); + string = sxe_jitson_get_string(jitson, NULL); + sxe_factory_add(factory, "\"", 1); + + for (first = i = 0; i < len; i++) + /* If the character is a control character or " or \, encode it as a unicode escape sequence. + * (unsigned char) casts are used to allow any UTF8 encoded string. + */ + if ((unsigned char)string[i] <= 0x1F || string[i] == '"' || string[i] == '\\') { + if (first < i) + sxe_factory_add(factory, &string[first], i - first); + + if ((buffer = sxe_factory_reserve(factory, sizeof("\\u0000"))) == NULL) + return NULL; /* COVERAGE EXCLUSION: Memory allocation failure */ + + snprintf(buffer, sizeof("\\u0000"), "\\u00%02x", (unsigned char)string[i]); + SXEA6(strlen(buffer) == sizeof("\\u0000") - 1, "Unicode escape sequence should always be 6 characters long"); + sxe_factory_commit(factory, sizeof("\\u0000") - 1); + first = i + 1; + } + + if (first < len) + sxe_factory_add(factory, &string[first], len - first); + + sxe_factory_add(factory, "\"", 1); + return sxe_factory_look(factory, NULL); +} + +int +sxe_jitson_string_cmp(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + const char *left_str = left->type & SXE_JITSON_TYPE_IS_REF ? left->reference : left->string; + const char *right_str = right->type & SXE_JITSON_TYPE_IS_REF ? right->reference : right->string; + int ret = strcmp(left_str, right_str); + + return ret < 0 ? -1 : ret == 0 ? 0 : 1; +} + +static int +sxe_jitson_string_eq(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + if (left->len && right->len && left->len != right->len) // If both lengths are known and different, not equal! + return SXE_JITSON_TEST_FALSE; + + const char *left_str = left->type & SXE_JITSON_TYPE_IS_REF ? left->reference : left->string; + const char *right_str = right->type & SXE_JITSON_TYPE_IS_REF ? right->reference : right->string; + return strcmp(left_str, right_str) == 0 ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; +} + +uint32_t +sxe_jitson_type_register_string(void) +{ + return sxe_jitson_type_register("string", sxe_jitson_free_base, sxe_jitson_string_test, sxe_jitson_string_size, + sxe_jitson_string_len, sxe_jitson_string_clone, sxe_jitson_string_build_json, + sxe_jitson_string_cmp, sxe_jitson_string_eq); +} + +static uint32_t +sxe_jitson_size_indexed(const struct sxe_jitson *jitson) +{ + if (jitson->type & SXE_JITSON_TYPE_INDEXED) // Once an array or object is indexed, it's size is at the end of the index + return jitson->index[jitson->len]; + + return (uint32_t)jitson->integer; // Prior to indexing, the offset past the end of the object/array is stored here +} + +/** + * Determine the size of an array or array-like jitson (e.g. a range) + * + * @param jitson An array or array-like jitson + * + * @return The size in jitsons + */ +uint32_t +sxe_jitson_array_size(const struct sxe_jitson *jitson) +{ + if (jitson->type & SXE_JITSON_TYPE_IS_REF) // Concatenations are alway 2 jitsons in size + return 2; + else if (jitson->type & SXE_JITSON_TYPE_IS_UNIF) // Uniform arrays have their element sizes in bytes; round up to jitsons + return 1 + jitson->len * ((jitson->uniform.size + sizeof(*jitson) - 1) / sizeof(*jitson)); + + return sxe_jitson_size_indexed(jitson); +} + +int +sxe_jitson_test_len(const struct sxe_jitson *jitson) +{ + return jitson->len != 0 ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; +} + +/** + * Length function for types that store their lengths in the len field + * + * @param jitson Jitson of a type that stores its type in the len field (e.g. an array) + */ +size_t +sxe_jitson_len_base(const struct sxe_jitson *jitson) +{ + return jitson->len; +} + +/** + * Free a jitson array or array-like jitson (e.g. a range) + * + * @param jitson An array or array-like jitson + * + * @note The free is done without using the index so that referenced data owned by it's members will be freed. + */ +void +sxe_jitson_array_free(struct sxe_jitson *jitson) +{ + struct sxe_jitson *element; + unsigned i; + uint32_t offset, size; + + if ((jitson->type & SXE_JITSON_TYPE_IS_REF) == 0) { // If the array is not a concatenation of two arrays + for (i = 0, offset = 1; i < jitson->len; i++, offset += size) { + size = sxe_jitson_size(element = jitson + offset); + jitson_types[element->type & SXE_JITSON_TYPE_MASK].free(element); + } + } else if (jitson->type & SXE_JITSON_TYPE_IS_OWN) { // If the array is a concatenation and owns the subarray's storage + sxe_jitson_free((&jitson->jitref)[0]); + sxe_jitson_free((&jitson->jitref)[1]); // SonarQube False Positive + jitson->type &= ~SXE_JITSON_TYPE_IS_OWN; // Remove ownership so free_base won't try to free the first reference + } + + sxe_jitson_free_base(jitson); +} + +/** + * Clone an array or array-like jitson (e.g. a range) + * + * @param jitson An array or array-like jitson + * + * @return The size in jitsons + */ +bool +sxe_jitson_array_clone(const struct sxe_jitson *jitson, struct sxe_jitson *clone) +{ + size_t size; + unsigned i, len; + + if ((len = jitson->len) == 0) + return true; + + if (jitson->type & SXE_JITSON_TYPE_INDEXED) { + if (!(clone->index = MOCKERROR(MOCK_FAIL_ARRAY_CLONE, NULL, ENOMEM, kit_malloc((size = (len + 1) * sizeof(jitson->index[0])))))) + { + SXEL2("Failed to allocate %zu bytes to clone an array", (len + 1) * sizeof(jitson->index[0])); + return false; + } + + memcpy(clone->index, jitson->index, size); + } + + for (i = 0; i < len; i++) + if (!sxe_jitson_clone(sxe_jitson_array_get_element(jitson, i), + (struct sxe_jitson *)(uintptr_t)sxe_jitson_array_get_element(clone, i))) { + while (i > 0) // On error, free any allocations done + sxe_jitson_free(SXE_CAST_NOCONST(struct sxe_jitson *, sxe_jitson_array_get_element(clone, --i))); + + if (clone->type & SXE_JITSON_TYPE_INDEXED) + kit_free(clone->index); + + return false; + } + + return true; +} + +/** + * Convert an array or array-like jitson (e.g. a range) to a JSON string + * + * @param jitson An array or array-like jitson + * + * @return A pointer to the string + */ +char * +sxe_jitson_array_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + unsigned i, len; + + sxe_factory_add(factory, "[", 1); + + if ((len = jitson->len) > 0) { + for (i = 0; i < len - 1; i++) { + sxe_jitson_build_json(sxe_jitson_array_get_element(jitson, i), factory); + sxe_factory_add(factory, ",", 1); + } + + sxe_jitson_build_json(sxe_jitson_array_get_element(jitson, len - 1), factory); + } + + sxe_factory_add(factory, "]", 1); + return sxe_factory_look(factory, NULL); +} + +static int +sxe_jitson_array_cmp(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + unsigned i; + int ret; + + for (i = 0; i < left->len; i++) { + if (i >= right->len) // Right is a prefix of left + return 1; + + if ((ret = sxe_jitson_cmp(sxe_jitson_array_get_element(left, i), sxe_jitson_array_get_element(right, i)))) + return ret; + } + + if (i < right->len) // Left is a prefix of right + return -1; + + return 0; +} + +/** + * Test two arrays or array-like jitsons (e.g. ranges) for equality + * + * @param left/right Arrays or array-like jitsons to compare + * + * @return SXE_JITSON_TEST_TRUE if the same, SXE_JITSON_TEST_FALSE if not. + */ +int +sxe_jitson_array_eq(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + unsigned i; + int ret; + + if (left->len != right->len) + return SXE_JITSON_TEST_FALSE; + + for (i = 0; i < left->len; i++) + if ((ret = sxe_jitson_eq(sxe_jitson_array_get_element(left, i), sxe_jitson_array_get_element(right, i))) + != SXE_JITSON_TEST_TRUE) + return ret; + + return SXE_JITSON_TEST_TRUE; +} + +uint32_t +sxe_jitson_type_register_array(void) +{ + return sxe_jitson_type_register("array", sxe_jitson_array_free, sxe_jitson_test_len, sxe_jitson_array_size, + sxe_jitson_len_base, sxe_jitson_array_clone, sxe_jitson_array_build_json, + sxe_jitson_array_cmp, sxe_jitson_array_eq); +} + +/** + * Free any data allocated by a jitson value that is contained in a larger value + * + * @note This should only be called on a contained jitson when the container is being freed; the containee itself isn't freed + */ +void +sxe_jitson_free_containee(struct sxe_jitson *containee) +{ + jitson_types[containee->type & SXE_JITSON_TYPE_MASK].free(containee); +} + +/* Free a jitson object without using the index so that referenced data owned by it's members will be freed. + */ +static void +sxe_jitson_free_object(struct sxe_jitson *jitson) +{ + struct sxe_jitson *element; + unsigned i, len; + uint32_t offset, size; + + len = jitson->len * 2; // Objects are pairs of member name/value + + for (i = 0, offset = 1; i < len; i++, offset += size) { + size = sxe_jitson_size(element = jitson + offset); + sxe_jitson_free_containee(element); + } + + sxe_jitson_free_base(jitson); +} + +/* Helper for cloning the members of one object into another + */ +bool +sxe_jitson_object_clone_members(const struct sxe_jitson *jitson, struct sxe_jitson *clone, unsigned len) +{ + const struct sxe_jitson *content; + size_t size; + unsigned i; + uint32_t index, stop; + bool key; + + for (i = 0, index = 1; i < len; i++, index += size + sxe_jitson_size(&clone[index + size])) { // For each member + content = &jitson[index]; + size = sxe_jitson_size(content); + + /* Clone the key and the value + */ + if (!(key = sxe_jitson_clone(content, &clone[index])) || !sxe_jitson_clone(content + size, &clone[index + size])) { + stop = index; + + /* For each member already processed + */ + for (i = 0, index = 1; index < stop; i++, index += size + sxe_jitson_size(&clone[index + size])) { + SXEA6(i < len, "We missed our stop!"); + size = sxe_jitson_size(&clone[index]); + sxe_jitson_free(&clone[index]); + sxe_jitson_free(&clone[index + size]); + } + + SXEA6(index == stop, "We skipped our stop!"); + + if (key) // If the last key was cloned, free it + sxe_jitson_free(&clone[index]); + + return false; + } + } + + return true; +} + +static bool +sxe_jitson_object_clone(const struct sxe_jitson *jitson, struct sxe_jitson *clone) +{ + size_t size; + unsigned len; + + if ((len = jitson->len) == 0) + return true; + + if (jitson->type & SXE_JITSON_TYPE_INDEXED) { // If the object is already indexed, must clone it's index + size = (len + 1) * sizeof(jitson->index[0]); + + if (!(clone->index = MOCKERROR(MOCK_FAIL_OBJECT_CLONE, NULL, ENOMEM, kit_malloc(size)))) { + SXEL2("Failed to allocate %zu bytes to clone an object", (len + 1) * sizeof(jitson->index[0])); + return false; + } + + SXEA6(clone->type & SXE_JITSON_TYPE_INDEXED, "Clone of object should already be marked indexed"); + memcpy(clone->index, jitson->index, size); + } + + if (!sxe_jitson_object_clone_members(jitson, clone, len)) { + if (clone->type & SXE_JITSON_TYPE_INDEXED) { // If the index was cloned, free it up + kit_free(clone->index); + clone->index = NULL; + } + + return false; + } + + return true; +} + +static char * +sxe_jitson_object_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + unsigned i, index, len; + bool first; + + if ((len = jitson->len) == 0) { + sxe_factory_add(factory, "{}", 2); + return sxe_factory_look(factory, NULL); + } + + if (!(jitson->type & SXE_JITSON_TYPE_INDEXED)) // Force indexing. Would it be better to walk the unindexed object? + sxe_jitson_object_get_member(jitson, "", 0); + + sxe_factory_add(factory, "{", 1); + + for (first = true, i = 0; i < len; i++) { // For each bucket + for (index = jitson->index[i]; index; index = jitson[index].link) { // For each member in the bucket + if (!first) + sxe_factory_add(factory, ",", 1); + + sxe_jitson_build_json(&jitson[index], factory); // Output the member name + sxe_factory_add(factory, ":", 1); + sxe_jitson_build_json(&jitson[index] + sxe_jitson_size(&jitson[index]), factory); // Output the value + first = false; + } + } + + sxe_factory_add(factory, "}", 1); + return sxe_factory_look(factory, NULL); +} + +/** + * Register the JSON object type + * + * @note Objects cannot currently be compared + */ +uint32_t +sxe_jitson_type_register_object(void) +{ + return sxe_jitson_type_register("object", sxe_jitson_free_object, sxe_jitson_test_len, sxe_jitson_size_indexed, + sxe_jitson_len_base, sxe_jitson_object_clone, sxe_jitson_object_build_json, NULL, NULL); +} + +static int +sxe_jitson_reference_test(const struct sxe_jitson *jitson) +{ + return jitson_types[sxe_jitson_get_type(jitson->jitref)].test(jitson->jitref); +} + +static size_t +sxe_jitson_reference_len(const struct sxe_jitson *jitson) +{ + return jitson_types[sxe_jitson_get_type(jitson->jitref)].len(jitson->jitref); +} + +char * +sxe_jitson_reference_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + return jitson_types[sxe_jitson_get_type(jitson->jitref)].build_json(jitson->jitref, factory); +} + +uint32_t +sxe_jitson_type_register_reference(void) +{ + return sxe_jitson_type_register("reference", sxe_jitson_free_base, sxe_jitson_reference_test, sxe_jitson_size_1, + sxe_jitson_reference_len, NULL, sxe_jitson_reference_build_json, NULL, NULL); +} + +/** + * Initialize the types and register INVALID (0) and all JSON types: null, bool, number, string, array, object, and pointer. + * + * @param mintypes The minimum number of types to preallocate. Should be SXE_JITSON_MIN_TYPES + the number of additional types + * @param flags SXE_JITSON_FLAG_STRICT for standard JSON or a combination of: + * SXE_JITSON_FLAG_ALLOW_HEX to allow hexadecimal (and octal) unsigned integers + * SXE_JITSON_FLAG_OPTIMIZE to optimize while parsing, slowing it, but reducing space and speeding evaluation + */ +void +sxe_jitson_initialize(uint32_t mintypes, uint32_t flags) +{ + sxe_factory_alloc_make(type_factory, mintypes < SXE_JITSON_MIN_TYPES ? SXE_JITSON_MIN_TYPES : mintypes, 256); + jitson_types = (struct sxe_jitson_type *)sxe_factory_reserve(type_factory, mintypes * sizeof(*jitson_types)); + SXEA1(sxe_jitson_type_register("INVALID", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) == 0, + "Type 0 is the 'INVALID' type"); + SXEA1(sxe_jitson_type_register_null() == SXE_JITSON_TYPE_NULL, "Type 1 is 'null'"); + SXEA1(sxe_jitson_type_register_bool() == SXE_JITSON_TYPE_BOOL, "Type 2 is 'bool'"); + SXEA1(sxe_jitson_type_register_number() == SXE_JITSON_TYPE_NUMBER, "Type 3 is 'number'"); + SXEA1(sxe_jitson_type_register_string() == SXE_JITSON_TYPE_STRING, "Type 4 is 'string'"); + SXEA1(sxe_jitson_type_register_array() == SXE_JITSON_TYPE_ARRAY, "Type 5 is 'array'"); + SXEA1(sxe_jitson_type_register_object() == SXE_JITSON_TYPE_OBJECT, "Type 6 is 'object'"); + SXEA1(sxe_jitson_type_register_reference() == SXE_JITSON_TYPE_REFERENCE, "Type 7 is 'reference'"); + sxe_jitson_flags |= flags; + + sxe_jitson_builtins_initialize_private(); + + /* The following constant values are used to avoid malloc/free of temporary values + */ + sxe_jitson_true = sxe_jitson_const_get(SXE_JITSON_FLAG_STRICT, "true", sizeof("true") - 1); + sxe_jitson_false = sxe_jitson_const_get(SXE_JITSON_FLAG_STRICT, "false", sizeof("false") - 1); + sxe_jitson_null = sxe_jitson_const_get(SXE_JITSON_FLAG_STRICT, "null", sizeof("null") - 1); +} + +/** + * Finalize memory used for type and clear the types variables + */ +void +sxe_jitson_finalize(void) +{ + kit_free(sxe_factory_remove(type_factory, NULL)); + jitson_types = NULL; + type_count = 0; + + sxe_jitson_builtins_finalize_private(); +} + +bool +sxe_jitson_is_init(void) +{ + return jitson_types ? true : false; +} + +/** + * Free a jitson object if it was allocated + * + * @note All other threads that might access the object must remove their references to it before this function is called, + * unless all just in time operations (i.e. string length of a referenced string, construction of indeces) can be + * guaranteed to have already happened. + */ +void +sxe_jitson_free(const struct sxe_jitson *jitson) +{ + if (jitson && (jitson->type & SXE_JITSON_TYPE_ALLOCED)) { // If it was allocated, it's safe to free it + struct sxe_jitson *mutable = SXE_CAST_NOCONST(struct sxe_jitson *, jitson); + + jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].free(mutable); + mutable->type = SXE_JITSON_TYPE_INVALID; + kit_free(mutable); + } +} + +/** + * Test a jitson object + * + * @return SXE_JITSON_TEST_TRUE, SXE_JITSON_TEST_FALSE, or SXE_JITSON_TEST_ERROR + * + * @note For standard JSON types, tests never return SXE_JITSON_TEST_ERROR. + */ +int +sxe_jitson_test(const struct sxe_jitson *jitson) +{ + return jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].test(jitson); +} + +/** + * Determine the size in jitson tokens of a jitson object + */ +uint32_t +sxe_jitson_size(const struct sxe_jitson *jitson) +{ + return jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].size(jitson); +} + +/** + * Determine whether a jitson supports taking its length + */ +bool +sxe_jitson_supports_len(const struct sxe_jitson *jitson) +{ + return jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].len != NULL; +} + +/** + * Determine the length of a jitson value. For strings, this is the string length, for collections, the number of members. + */ +size_t +sxe_jitson_len(const struct sxe_jitson *jitson) +{ + SXEA1(sxe_jitson_supports_len(jitson), "Type %s does not support taking its length", sxe_jitson_get_type_as_str(jitson)); + return jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].len(jitson); +} + +bool +sxe_jitson_clone(const struct sxe_jitson *jitson, struct sxe_jitson *clone) +{ + uint32_t type = jitson->type & SXE_JITSON_TYPE_MASK; + bool success; + + SXEE6("(jitson=%p,clone=%p) // type=%u", jitson, clone, type); + success = !jitson_types[type].clone || jitson_types[type].clone(jitson, clone); // Succeeds if no clone or clone succeeds + SXER6("return %s;", success ? "true" : "false"); + return success; +} + +/** + * Build a JSON string from a jitson object + * + * @param jitson The jitson object to encode in JSON + * @param factory The factory object used to build the JSON string + */ +char * +sxe_jitson_build_json(const struct sxe_jitson *jitson, struct sxe_factory *factory) +{ + return jitson_types[jitson->type & SXE_JITSON_TYPE_MASK].build_json(jitson, factory); +} + +/** + * Compare two jitson objects + * + * @param left/right The objects to compare + * + * @return -1 if left < right, 0 if left == right, 1 if left > right, or SXE_JITSON_CMP_ERROR if the values can't be compared + * + * @note SXE_JITSON_CMP_ERROR will be returned if either jitson is NULL, the dereferenced jitsons have different types, or the + * type doesn't support comparisons + */ +int +sxe_jitson_cmp(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + if (!left || !right) // Garbage in, error out + return SXE_JITSON_CMP_ERROR; + + left = sxe_jitson_is_reference(left) ? left->jitref : left; // OK because refs to refs are not allowed + right = sxe_jitson_is_reference(right) ? right->jitref : right; // OK because refs to refs are not allowed + + uint32_t type = left->type & SXE_JITSON_TYPE_MASK; + + if (type != (right->type & SXE_JITSON_TYPE_MASK) || jitson_types[type].cmp == NULL) + return SXE_JITSON_CMP_ERROR; + + return jitson_types[type].cmp(left, right); +} + +/** + * Determine whether two jitson values are equal + * + * @param left/right The values to compare + * + * @return SXE_JITSON_TEST_TRUE if equal, SXE_JITSON_TEST_FALSE if not equal, or SXE_JITSON_TEST_ERROR if the type is incomparable + */ +int +sxe_jitson_eq(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + int ret; + + SXEA1(left && right, "both left and right values must be provided"); + left = sxe_jitson_is_reference(left) ? left->jitref : left; // OK because refs to refs are not allowed + right = sxe_jitson_is_reference(right) ? right->jitref : right; // OK because refs to refs are not allowed + + uint32_t type = left->type & SXE_JITSON_TYPE_MASK; + + if (type != (right->type & SXE_JITSON_TYPE_MASK)) + return SXE_JITSON_TEST_FALSE; + + if (jitson_types[type].eq) // If there is an eq, favor it, because it may be optimized over cmp + return jitson_types[type].eq(left, right); + + if (jitson_types[type].cmp) { + if ((ret = jitson_types[type].cmp(left, right)) == SXE_JITSON_CMP_ERROR) + return SXE_JITSON_TEST_ERROR; + + return ret == 0 ? SXE_JITSON_TEST_TRUE : SXE_JITSON_TEST_FALSE; + } + + return SXE_JITSON_TEST_ERROR; +} diff --git a/lib-sxe-jitson/sxe-jitson.c b/lib-sxe-jitson/sxe-jitson.c new file mode 100644 index 0000000..4dfbefc --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson.c @@ -0,0 +1,420 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-hash.h" +#include "sxe-jitson.h" +#include "sxe-log.h" +#include "sxe-spinlock.h" +#include "sxe-unicode.h" + +static pthread_mutex_t type_indexing = PTHREAD_MUTEX_INITIALIZER; // Lock around (slow) just in time indexing + +/** + * Allocate a jitson object and parse a JSON string into it. + * + * @param json A '\0' terminated JSON string. + * + * @return A jitson object or NULL with errno ENOMEM, EINVAL, EILSEQ, EMSGSIZE, ENODATA, ENOTUNIQ, or EOVERFLOW + */ +struct sxe_jitson * +sxe_jitson_new(const char *json) +{ + struct sxe_jitson_stack *stack = sxe_jitson_stack_get_thread(); + + if (!stack) + return NULL; + + if (!sxe_jitson_stack_parse_json(stack, json)) { + sxe_jitson_stack_clear(stack); + return NULL; + } + + return sxe_jitson_stack_get_jitson(stack); +} + +/** + * Get the unsigned integer value of a jitson whose type is SXE_JITSON_TYPE_NUMBER + * + * @return The numeric value as a uint64_t or ~0ULL if the value is can't be represented as a uint64_t + * + * @note If the value is can't be represented as a uint64_t, errno is set to EOVERFLOW + */ +uint64_t +sxe_jitson_get_uint(const struct sxe_jitson *jitson) +{ + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + SXEA6(sxe_jitson_get_type(jitson) == SXE_JITSON_TYPE_NUMBER, + "Can't get the numeric value of a %s", sxe_jitson_type_to_str(jitson->type)); + + if (!(jitson->type & SXE_JITSON_TYPE_IS_UINT)) { + uint64_t uint = (uint64_t)jitson->number; + + if ((double)uint != jitson->number) { + errno = EOVERFLOW; + return ~0ULL; + } + + return uint; + } + + return jitson->integer; +} + +/** + * Get the numeric (double) value of a jitson whose type is SXE_JITSON_TYPE_NUMBER + * + * @return The numeric value as a double or NAN if the value is can't be represented as a double + * + * @note If the value is can't be represented as a double, errno is set to EOVERFLOW + */ +double +sxe_jitson_get_number(const struct sxe_jitson *jitson) +{ + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + SXEA6(sxe_jitson_get_type(jitson) == SXE_JITSON_TYPE_NUMBER, + "Can't get the numeric value of a %s", sxe_jitson_type_to_str(jitson->type)); + + if (jitson->type & SXE_JITSON_TYPE_IS_UINT) { + double number = (double)jitson->integer; + + if ((uint64_t)number != jitson->integer) { + errno = EOVERFLOW; + return NAN; + } + + return number; + } + + return jitson->number; +} + +/** + * Get the string value of a jitson whose type is SXE_JITSON_TYPE_STRING or SXE_JITSON_TYPE_MEMBER_NAME + * + * @param jitson Pointer to the jitson + * @param len_out NULL or a pointer to a variable of type size_t to return the length of the string in + * + * @return The string value; if the jitson is a non string, results are undefined + */ +const char * +sxe_jitson_get_string(const struct sxe_jitson *jitson, size_t *len_out) +{ + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + SXEA6(sxe_jitson_get_type(jitson) == SXE_JITSON_TYPE_STRING, + "Can't get the string value of a %s", sxe_jitson_type_to_str(jitson->type)); + + if (len_out) + *len_out = sxe_jitson_len(jitson); // If it's a string_ref, may need to compute the length + + return jitson->type & SXE_JITSON_TYPE_IS_REF ? jitson->reference : jitson->string; +} + +/** + * Get the boolen value of a jitson whose type is SXE_JITSON_TYPE_BOOL + * + * return The bool value; if the jitson is not an boolean, results are undefined + */ +bool +sxe_jitson_get_bool(const struct sxe_jitson *jitson) +{ + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + SXEA6(sxe_jitson_get_type(jitson) == SXE_JITSON_TYPE_BOOL, + "Can't get the boolean value of a %s", sxe_jitson_type_to_str(jitson->type)); + return jitson->boolean; +} + +/** + * Get a member's value from an object + * + * @param jitson An object + * @param name The member name + * @param len Length of the member name or 0 if not known + * + * @return The member's value or NULL on error (ENOMEM) or if the member name was not found (ENOKEY). + */ +const struct sxe_jitson * +sxe_jitson_object_get_member(const struct sxe_jitson *jitson, const char *name, size_t len) +{ + volatile struct sxe_jitson *vol_jit; + struct sxe_jitson *member; + const struct sxe_jitson *con_memb; + uint32_t *bucket, *index; + const char *memname; + size_t memlen; + unsigned i; + bool is_race, do_lock; + + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + vol_jit = SXE_CAST_NOCONST(volatile struct sxe_jitson *, jitson); + SXEA1(sxe_jitson_get_type(jitson) == SXE_JITSON_TYPE_OBJECT, "Can't get a member value from a %s", + sxe_jitson_type_to_str(vol_jit->type)); + len = len ?: strlen(name); // SonarQube False Positive + + if (vol_jit->len == 0) { // Empty object + errno = ENOKEY; + return NULL; + } + + if (!(vol_jit->type & SXE_JITSON_TYPE_INDEXED)) { // If not already, index thread safely + if ((do_lock = !sxe_jitson_is_local(jitson))) + SXEA1(pthread_mutex_lock(&type_indexing) == 0, "Can't take indexing lock"); + + if (!(is_race = (vol_jit->type & SXE_JITSON_TYPE_INDEXED))) { // Recheck under the lock in case of a race + /* Allocate an array of len buckets + 1 to store the size in jitsons + */ + if (!(index = MOCKERROR(MOCK_FAIL_OBJECT_GET_MEMBER, NULL, ENOMEM, + kit_calloc(1, (vol_jit->len + 1) * sizeof(uint32_t))))) { + if (do_lock) + pthread_mutex_unlock(&type_indexing); + return NULL; + } + + for (member = SXE_CAST(struct sxe_jitson *, vol_jit + 1), i = 0; i < vol_jit->len; i++) { + // Only time it's safe to use vol_memb->len to get the length of the member name (if it's not a reference) + memlen = member->type & SXE_JITSON_TYPE_IS_REF ? strlen(member->reference) : member->len; // SonarQube False Positive + bucket = &index[sxe_hash_sum(sxe_jitson_get_string(member, NULL), memlen) % vol_jit->len]; + member->link = *bucket; + *bucket = (uint32_t)(member - vol_jit); + member = member + sxe_jitson_size(member); // Skip the member name + member = member + sxe_jitson_size(member); // Skip the member value + } + + /* ORDER IS IMPORTANT HERE. Must save vol_jit->integer before overwriting it by setting vol_jit->index. + * Must set the SXE_JITSON_TYPE_INDEXED flag last or another thread may use the index before it's there. + */ + index[vol_jit->len] = (uint32_t)vol_jit->integer; // Store the size at the end + vol_jit->index = index; + vol_jit->type |= SXE_JITSON_TYPE_INDEXED; + } + + if (do_lock) + pthread_mutex_unlock(&type_indexing); + + if (is_race) + SXEL4("Detected a race in just in time object indexing"); /* COVERAGE EXCLUSION: Race condition */ + } + + for (i = jitson->index[sxe_hash_sum(name, len) % jitson->len]; i != 0; i = con_memb->link) { + con_memb = &jitson[i]; + SXEA6(sxe_jitson_get_type(con_memb) == SXE_JITSON_TYPE_STRING, "Object keys must be strings, not %s", + sxe_jitson_type_to_str(con_memb->type)); + memname = sxe_jitson_get_string(con_memb, &memlen); + SXEA6(memname, "A member name should always be a valid string"); + + if (memlen == len && memcmp(memname, name, len) == 0) + return con_memb + sxe_jitson_size(con_memb); // Skip the member name, returning the value + } + + errno = ENOKEY; + return NULL; +} + +/** + * Get an element's value from an array or an array-like jitson + * + * @param jitson An array or array-like jitson + * @param idx The index of the element + * + * @return The element's value or NULL on error (ENOMEM) or if the index was out of range (ERANGE). + * + * @note No type checking is done in order to allow this function to be called on array-like types (e.g. ranges) + */ +const struct sxe_jitson * +sxe_jitson_array_get_element(const struct sxe_jitson *jitson, size_t idx) +{ + volatile struct sxe_jitson *vol_jit; + const struct sxe_jitson *element; + uint32_t *index, type; + unsigned i; + bool is_race, do_lock; + + vol_jit = SXE_CAST_NOCONST(volatile struct sxe_jitson *, sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson); + + if (idx >= vol_jit->len) { + SXEL2("Array element index %zu is not less than len %u", idx, vol_jit->len); + errno = ERANGE; + return NULL; + } + + if ((type = vol_jit->type) & SXE_JITSON_TYPE_IS_REF) { // If the array is a concatenation of two arrays + if (idx < sxe_jitson_len_array((&vol_jit->reference)[0])) + return sxe_jitson_array_get_element((&vol_jit->reference)[0], idx); + else + return sxe_jitson_array_get_element((&vol_jit->reference)[1], idx - sxe_jitson_len_array((&vol_jit->reference)[0])); // SonarQube False Positive + } + + if (type & SXE_JITSON_TYPE_IS_UNIF) { + SXEA6(vol_jit->uniform.size % sizeof(*jitson) == 0, + "The size of a uniform array element must currently be a multiple of a jitson"); + return SXE_CAST_NOCONST(const struct sxe_jitson *, &vol_jit[1 + (vol_jit->uniform.size / sizeof(*jitson)) * idx]); + } + + if (!(type & SXE_JITSON_TYPE_INDEXED)) { // If not already, index thread safely + if ((do_lock = !sxe_jitson_is_local(jitson))) + SXEA1(pthread_mutex_lock(&type_indexing) == 0, "Can't take indexing lock"); + + if (!(is_race = (vol_jit->type & SXE_JITSON_TYPE_INDEXED))) { // Recheck under the lock in case of a race + /* Allocate an array of len offsets + 1 to store the size in jitsons + */ + if (!(index = MOCKERROR(MOCK_FAIL_ARRAY_GET_ELEMENT, NULL, ENOMEM, + kit_malloc((vol_jit->len + 1) * sizeof(uint32_t))))) { + if (do_lock) + pthread_mutex_unlock(&type_indexing); + return NULL; + } + + element = SXE_CAST_NOCONST(const struct sxe_jitson *, vol_jit + 1); + + for (i = 0; i < vol_jit->len; i++, element += sxe_jitson_size(element)) + index[i] = (uint32_t)(element - vol_jit); + + /* ORDER IS IMPORTANT HERE. Must save jitson->integer before overwriting it by setting jitson->index. + * Must set the SXE_JITSON_TYPE_INDEXED flag last or another thread may use the index before it's there. + */ + index[vol_jit->len] = (uint32_t)vol_jit->integer; // Store the size at the end + vol_jit->index = index; + vol_jit->type |= SXE_JITSON_TYPE_INDEXED; + } + + if (do_lock) + pthread_mutex_unlock(&type_indexing); + + if (is_race) + SXEL4("Detected a race in just in time indexing"); /* COVERAGE EXCLUSION: Race condition */ + + SXEA1(vol_jit->index, "Index flag is set but there is no index"); + } + + return SXE_CAST_NOCONST(const struct sxe_jitson *, &vol_jit[vol_jit->index[idx]]); +} + +struct sxe_jitson * +sxe_jitson_make_null(struct sxe_jitson *jitson) +{ + jitson->type = SXE_JITSON_TYPE_NULL; + return jitson; +} + +struct sxe_jitson * +sxe_jitson_make_bool(struct sxe_jitson *jitson, bool boolean) +{ + jitson->type = SXE_JITSON_TYPE_BOOL; + jitson->boolean = boolean; + return jitson; +} + +struct sxe_jitson * +sxe_jitson_make_number(struct sxe_jitson *jitson, double number) +{ + jitson->type = SXE_JITSON_TYPE_NUMBER; + jitson->number = number; + return jitson; +} + +struct sxe_jitson * +sxe_jitson_make_uint(struct sxe_jitson *jitson, uint64_t uint) +{ + jitson->type = SXE_JITSON_TYPE_NUMBER | SXE_JITSON_TYPE_IS_UINT; + jitson->integer = uint; + return jitson; +} + +/** + * Create a jitson string value that references an immutable C string + */ +struct sxe_jitson * +sxe_jitson_make_string_ref(struct sxe_jitson *jitson, const char *string) +{ + jitson->type = SXE_JITSON_TYPE_STRING | SXE_JITSON_TYPE_IS_REF; + jitson->len = 0; // The length will be computed if and when needed and cached here if <= 4294967295 + jitson->reference = string; + return jitson; +} + +/** + * Create a reference to another jitson that will behave exactly like the original jitson + * + * @param jitson Pointer to jitson to create + * @param to jitson to refer to + * + * @note References are only valid during the lifetime of the jitson they refer to + */ +struct sxe_jitson * +sxe_jitson_make_reference(struct sxe_jitson *jitson, const struct sxe_jitson *to) +{ + jitson->type = SXE_JITSON_TYPE_REFERENCE; + // Don't create references to references + jitson->jitref = (to->type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_REFERENCE ? to->jitref : to; + return jitson; +} + +/** + * Duplicate a jitson value in allocated storage, deep cloning all indices/owned content. + * + * @param jitson The jitson value to duplicate + * + * @return The duplicate jitson or NULL on allocation failure + */ +struct sxe_jitson * +sxe_jitson_dup(const struct sxe_jitson *jitson) +{ + struct sxe_jitson *dup; + size_t size; + + jitson = sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed + size = sxe_jitson_size(jitson) * sizeof(*jitson); + + if (!(dup = MOCKERROR(MOCK_FAIL_DUP, NULL, ENOMEM, kit_malloc(size)))) + return NULL; + + memcpy(dup, jitson, size); + + if (!sxe_jitson_clone(jitson, dup)) { // If the type requires a deep clone, do it + kit_free(dup); + return NULL; + } + + dup->type |= SXE_JITSON_TYPE_ALLOCED; + return dup; +} + +char * +sxe_jitson_to_json(const struct sxe_jitson *jitson, size_t *len_out) +{ + struct sxe_factory factory[1]; + char *json; + + SXEE7("(jitson=%p,len_out=%p)", jitson, len_out); + sxe_factory_alloc_make(factory, 0, 0); + json = sxe_jitson_build_json(jitson, factory) ? sxe_factory_remove(factory, len_out) : NULL; + SXER7("return json=%p", json); + return json; +} diff --git a/lib-sxe-jitson/sxe-jitson.h b/lib-sxe-jitson/sxe-jitson.h new file mode 100644 index 0000000..e6389ee --- /dev/null +++ b/lib-sxe-jitson/sxe-jitson.h @@ -0,0 +1,515 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SXE_JITSON_H +#define SXE_JITSON_H + +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "sxe-factory.h" +#include "sxe-log.h" + +#define SXE_JITSON_FLAG_STRICT 0 // Disable all extensions. This is only valid for a sxe_jitson_source. +#define SXE_JITSON_FLAG_ALLOW_HEX 0x00000001 // Allow hexadecimal when parsing numbers, which isn't strictly valid JSON +#define SXE_JITSON_FLAG_ALLOW_CONSTS 0x00000002 // Replace parsed constants (default if sxe_jitson_type_init called) +#define SXE_JITSON_FLAG_ALLOW_IDENTS 0x00000004 // Return parsed identifiers (default if sxe_jitson_ident_register called) +#define SXE_JITSON_FLAG_OPTIMIZE 0x00000008 // Slows parsing but allows smaller values and faster operations. +#define SXE_JITSON_FLAG_CHECK_ORDER SXE_JITSON_FLAG_OPTIMIZE // Check whether arrays are ordered (backward compatibility) + +#define SXE_JITSON_MIN_TYPES 8 // The minimum number of types for JSON + +#define SXE_JITSON_TYPE_INVALID 0 +#define SXE_JITSON_TYPE_NULL 1 +#define SXE_JITSON_TYPE_BOOL 2 +#define SXE_JITSON_TYPE_NUMBER 3 +#define SXE_JITSON_TYPE_STRING 4 +#define SXE_JITSON_TYPE_ARRAY 5 +#define SXE_JITSON_TYPE_OBJECT 6 +#define SXE_JITSON_TYPE_REFERENCE 7 // A reference acts like the type the reference points to. + +#define SXE_JITSON_TYPE_MASK 0x0000FFFF // Bits included in the type enumeration +#define SXE_JITSON_TYPE_MK_SORT 0x00010000 // Set to allow insertion in order into a sorted array +#define SXE_JITSON_TYPE_IS_LOCAL 0x00020000 // Flag set if the object is thread-local +#define SXE_JITSON_TYPE_IS_HOMO 0x01000000 // Flag set for arrays that contain homogenously typed elements +#define SXE_JITSON_TYPE_IS_UNIF 0x02000000 // Flag set for arrays that contain uniformly sized elements (so no index needed) +#define SXE_JITSON_TYPE_IS_ORD 0x04000000 // Flag set for arrays that are ordered (element types must be homogenous) +#define SXE_JITSON_TYPE_IS_UINT 0x08000000 // Flag set for numbers that are unsigned integers +#define SXE_JITSON_TYPE_REVERSED 0x08000000 // Flag set for strings that have been reversed +#define SXE_JITSON_TYPE_IS_KEY 0x10000000 // Flag set for types (in JSON, strings) when they are used as keys in an object +#define SXE_JITSON_TYPE_IS_COPY 0 // Flag passed to API to indicate that strings/member names are to be copied +#define SXE_JITSON_TYPE_IS_REF 0x20000000 // Flag set for strings that are references (size == 0 until cached or if empty) + // and for arrays that are concatenations of two arrays +#define SXE_JITSON_TYPE_IS_OWN 0x40000000 // Flag set for values where the reference is owned by the object (to be freed) +#define SXE_JITSON_TYPE_INDEXED 0x40000000 // Flag set for arrays and object if they have been indexed +#define SXE_JITSON_TYPE_ALLOCED 0x80000000 // Flag set for the first jitson token in an allocated jitson + +#define SXE_JITSON_TEST_TRUE 1 +#define SXE_JITSON_TEST_FALSE 0 +#define SXE_JITSON_TEST_ERROR -1 + +#define SXE_JITSON_CMP_ERROR (INT_MAX) + +#define SXE_JITSON_STACK_ERROR (~0U) +#define SXE_JITSON_TOKEN_SIZE sizeof(struct sxe_jitson) +#define SXE_JITSON_STRING_SIZE sizeof(((struct sxe_jitson *)0)->string) + +struct sxe_jitson; // Partial structure decalaration required due to mutually recursive definition +struct sxe_jitson_stack; // Partial structure decalaration required due to mutually recursive definition +typedef bool (*sxe_jitson_castfunc_t)(struct sxe_jitson_stack *stack, const struct sxe_jitson *from); + +/* A jitson token. Copied strings of > 7 bytes length continue into the next token. Collections (arrays and objects) may + * initially store the size in jitsons of the entire collection in integer. If so, the size is atomically replaced by the index + * on first access. + */ +struct sxe_jitson { + union { + uint32_t type; // See definitions above + uint16_t shortype; // The bottom 16 bits just for readable gdbing + }; + union { + uint32_t len; // Length of string (if <= 4294967295) or number of elements/members in array/object + uint32_t link; // In an indexed object member name, this is the offset of the next member name in the bucket + }; + union { + uint32_t *index; // Points to offsets (len of them) to elements/members, or 0 for empty buckets + uint64_t integer; // JSON unsigned integer or size of array or object before indexing + double number; // JSON number, stored as a double wide floating point number + bool boolean; // True and false + char string[8]; // First 8 bytes of a string, including NUL. + const void *reference; // Points to a constant external value + const struct sxe_jitson *jitref; // Reference to another sxe_jitson. Type must be SXE_JITSON_TYPE_REFERENCE + sxe_jitson_castfunc_t castfunc; // Pointer to a cast function. + struct { + union { + bool no_value; // Object under construction has a member name with no value + uint32_t last; // If array and it's nonempty and ordered so far, the index of the last element + }; + uint32_t collection; // Object or array is nested in the object or array at this index - 1 or 0 if its the root + } partial; + struct { + uint32_t type; // For an array of uniformly sized and typed values, the type of the elements + uint32_t size; // For an array of uniformly sized values, the size of the elements in bytes + } uniform; + }; +}; + +struct sxe_jitson_source { + const char *json; // Pointer into a buffer containing JSON to be parsed + const char *next; // Pointer to the next character to be parsed + const char *end; // Pointer after the last character to be parsed (max pointer value if the buffer is NUL terminated). + const char *file; // File name or NULL + void *value; // An arbitrary user assigned value + unsigned line; // Line number or 0 + uint32_t flags; // Specific JSON extensions allowed while parsing this source +}; + +struct sxe_jitson_stack { + unsigned maximum; // Number of jitsons currently allocated to the stack + unsigned count; // Number of jitsons currently in use on the stack + struct sxe_jitson *jitsons; + unsigned open; // Index + 1 of the deepest open collection that's under construction or 0 if none + unsigned last; // Index of the last jitson object added to the stack + unsigned borrow; // Point at which this stack was last borrowed +}; + +/* Constants. sxe_jitson_type_initialize must be called before using them + */ +extern const struct sxe_jitson *sxe_jitson_true; +extern const struct sxe_jitson *sxe_jitson_false; +extern const struct sxe_jitson *sxe_jitson_null; + +extern uint32_t sxe_jitson_flags; // JSON extensions allowed by default (override with a sxe_jitson_source) + +#include "sxe-jitson-proto.h" +#include "sxe-jitson-source-proto.h" +#include "sxe-jitson-stack-proto.h" +#include "sxe-jitson-type-proto.h" + +/** + * Determine the type of a jitson without dereferencing it, for performance when a jitson is known to have been dereferenced. + * + * @param jitson + * + * @return type + */ +static inline uint32_t +sxe_jitson_get_type_no_deref(const struct sxe_jitson *jitson) +{ + return jitson->type & SXE_JITSON_TYPE_MASK; +} + +/* Inline functions to create an easier to use interface. + */ + +static inline const char * +sxe_jitson_get_type_as_str(const struct sxe_jitson *jitson) +{ + return sxe_jitson_type_to_str(sxe_jitson_get_type(jitson)); +} + +static inline bool +sxe_jitson_is_allocated(const struct sxe_jitson *jitson) +{ + return jitson->type & SXE_JITSON_TYPE_ALLOCED; +} + +static inline bool +sxe_jitson_is_reference(const struct sxe_jitson *jitson) +{ + return (jitson->type & SXE_JITSON_TYPE_MASK) == SXE_JITSON_TYPE_REFERENCE; +} + +static inline const struct sxe_jitson * +sxe_jitson_dereference(const struct sxe_jitson *jitson) +{ + return sxe_jitson_is_reference(jitson) ? jitson->jitref : jitson; // OK because refs to refs are not allowed +} + +static inline bool +sxe_jitson_is_local(const struct sxe_jitson *jitson) +{ + return jitson->type & SXE_JITSON_TYPE_IS_LOCAL; +} + +/** + * Begin construction of an object on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true on success, false on allocation failure + */ +static inline bool +sxe_jitson_stack_open_object(struct sxe_jitson_stack *stack, const char *name) +{ + SXE_USED_IN_DEBUG(name); + + SXEL7("Opening object %s at %u {", name, stack->count); + return sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT); +} + +/** + * Begin construction of a thread-local object on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true on success, false on allocation failure + */ +static inline bool +sxe_jitson_stack_open_local_object(struct sxe_jitson_stack *stack, const char *name) +{ + SXE_USED_IN_DEBUG(name); + + SXEL7("Opening thread-local object %s at %u {", name, stack->count); + return sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT | SXE_JITSON_TYPE_IS_LOCAL); +} + +/** + * Begin construction of an array on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true on success, false on allocation failure + */ +static inline bool +sxe_jitson_stack_open_array(struct sxe_jitson_stack *stack, const char *name) +{ + SXE_USED_IN_DEBUG(name); + + SXEL7("Opening array %s at %u [", name, stack->count); + return sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY); +} + +/** + * Begin construction of a thread-local array on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true on success, false on allocation failure + */ +static inline bool +sxe_jitson_stack_open_local_array(struct sxe_jitson_stack *stack, const char *name) +{ + SXE_USED_IN_DEBUG(name); + + SXEL7("Opening thread-local array %s at %u {", name, stack->count); + return sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_IS_LOCAL); +} + +/** + * End construction of an object on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true (allow chaining) + */ +static inline bool +sxe_jitson_stack_close_object(struct sxe_jitson_stack *stack, const char *name) +{ + unsigned index = stack->open - 1; + SXE_USED_IN_DEBUG(name); + SXE_USED_IN_DEBUG(index); + + sxe_jitson_stack_close_collection(stack); + SXEL7("} closed object %s at %u", name, index); + return true; +} + +/** + * End construction of an array on a stack + * + * @param stack + * @param name A name used in diagnostic message + * + * @return true (allow chaining) + */ +static inline bool +sxe_jitson_stack_close_array(struct sxe_jitson_stack *stack, const char *name) +{ + unsigned index = stack->open - 1; + SXE_USED_IN_DEBUG(name); + SXE_USED_IN_DEBUG(index); + + sxe_jitson_stack_close_collection(stack); + SXEL7("] closed array %s at %u", name, index); + return true; +} + +static inline bool +sxe_jitson_stack_add_member_string(struct sxe_jitson_stack *stack, const char *name, const char * value, uint32_t type) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) + && sxe_jitson_stack_add_string( stack, value, type); +} + +static inline bool +sxe_jitson_stack_add_member_null(struct sxe_jitson_stack *stack, const char *name) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_null(stack); +} + +static inline bool +sxe_jitson_stack_add_member_bool(struct sxe_jitson_stack *stack, const char *name, bool boolean) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_bool(stack, boolean); +} + +static inline bool +sxe_jitson_stack_add_member_number(struct sxe_jitson_stack *stack, const char *name, double number) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_number(stack, number); +} + +static inline bool +sxe_jitson_stack_add_member_uint(struct sxe_jitson_stack *stack, const char *name, uint64_t uint) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_uint(stack, uint); +} + +static inline bool +sxe_jitson_stack_add_member_reference(struct sxe_jitson_stack *stack, const char *name, const struct sxe_jitson *to) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_reference(stack, to); +} + +static inline bool +sxe_jitson_stack_add_member_dup(struct sxe_jitson_stack *stack, const char *name, const struct sxe_jitson *value) +{ + return sxe_jitson_stack_add_member_name(stack, name, SXE_JITSON_TYPE_IS_COPY) && sxe_jitson_stack_add_dup(stack, value); +} + +static inline struct sxe_jitson * +sxe_jitson_create_null(void) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_null(jitson); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +static inline struct sxe_jitson * +sxe_jitson_create_bool(bool boolean) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_bool(jitson, boolean); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +static inline struct sxe_jitson * +sxe_jitson_create_number(double number) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_number(jitson, number); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +static inline struct sxe_jitson * +sxe_jitson_create_uint(uint64_t integer) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_uint(jitson, integer); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +/** + * Create a jitson string value that references an immutable C string + */ +static inline struct sxe_jitson * +sxe_jitson_create_string_ref(const char *string) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_string_ref(jitson, string); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +/** + * Create a jitson string value with an owned reference to a duplication of a C string + */ +static inline struct sxe_jitson * +sxe_jitson_create_string_dup(const char *string) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + if (!(string = kit_strdup(string))) { + kit_free(jitson); + return NULL; + } + + sxe_jitson_make_string_ref(jitson, string); + jitson->type |= SXE_JITSON_TYPE_IS_OWN | SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +/** + * Create a reference to another jitson that will behave exactly like the original jitson + * + * @param jitson Pointer to jitson to create + * @param to jitson to refer to + * + * @note References are only valid during the lifetime of the jitson they refer to + */ +static inline struct sxe_jitson * +sxe_jitson_create_reference(const struct sxe_jitson *to) +{ + struct sxe_jitson *jitson; + + if (!(jitson = kit_malloc(sizeof(struct sxe_jitson)))) + return NULL; + + sxe_jitson_make_reference(jitson, to); + jitson->type |= SXE_JITSON_TYPE_ALLOCED; + return jitson; +} + +/** + * Consume characters that have been peeked at from a source. + * + * @param source The source to consume from + * @param size The number of bytes to consume + */ +static inline void +sxe_jitson_source_consume(struct sxe_jitson_source *source, size_t size) +{ + if (source->line && *source->next == '\n') + source->line++; + + source->next += size; +} + +static inline uint32_t +sxe_jitson_source_get_flags(const struct sxe_jitson_source *source) +{ + return source->flags; +} + +static inline size_t +sxe_jitson_source_get_consumed(const struct sxe_jitson_source *source) +{ + return source->next - source->json; +} + +static inline bool +sxe_jitson_source_is_exhausted(const struct sxe_jitson_source *source) +{ + return *source->next == '\0' || source->next >= source->end; +} + +/* For optimization; when calling sxe_jitson_len, it will use sxe_jitson_len_base for arrays + */ +static inline size_t +sxe_jitson_len_array(const struct sxe_jitson *jitson) +{ + return jitson->len; +} + +/* For backward compatibility + */ +#define sxe_jitson_type_init(mintypes, flags) sxe_jitson_initialize((mintypes), (flags)) +#define sxe_jitson_type_fini() sxe_jitson_finalize() + +#define MOCK_FAIL_STACK_NEW_OBJECT ((char *)sxe_jitson_new + 0) +#define MOCK_FAIL_STACK_NEW_JITSONS ((char *)sxe_jitson_new + 1) +#define MOCK_FAIL_STACK_EXPAND_AFTER_GET ((char *)sxe_jitson_new + 2) +#define MOCK_FAIL_STACK_DUP ((char *)sxe_jitson_new + 3) +#define MOCK_FAIL_STACK_EXPAND ((char *)sxe_jitson_new + 4) +#define MOCK_FAIL_OBJECT_GET_MEMBER ((char *)sxe_jitson_new + 5) +#define MOCK_FAIL_ARRAY_GET_ELEMENT ((char *)sxe_jitson_new + 6) +#define MOCK_FAIL_DUP ((char *)sxe_jitson_dup + 0) +#define MOCK_FAIL_OBJECT_CLONE ((char *)sxe_jitson_dup + 1) +#define MOCK_FAIL_ARRAY_CLONE ((char *)sxe_jitson_dup + 2) +#define MOCK_FAIL_STRING_CLONE ((char *)sxe_jitson_dup + 3) + +#endif diff --git a/lib-sxe-jitson/sxe-sortedarray.h b/lib-sxe-jitson/sxe-sortedarray.h new file mode 100644 index 0000000..34c3c40 --- /dev/null +++ b/lib-sxe-jitson/sxe-sortedarray.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +// This interface is deprecated; please use the updated kit-sortedarray interface instead + +#ifndef SXE_SORTEDARRAY_H +#define SXE_SORTEDARRAY_H + +#include "kit-sortedarray.h" + +#define SXE_SORTEDARRAY_DEFAULT KIT_SORTEDARRAY_DEFAULT +#define SXE_SORTEDARRAY_ALLOW_INSERTS KIT_SORTEDARRAY_ALLOW_INSERTS +#define SXE_SORTEDARRAY_ALLOW_GROWTH KIT_SORTEDARRAY_ALLOW_GROWTH +#define SXE_SORTEDARRAY_ZERO_COPY KIT_SORTEDARRAY_ZERO_COPY +#define SXE_SORTEDARRAY_CMP_CAN_FAIL KIT_SORTEDARRAY_CMP_CAN_FAIL + +#define sxe_sortedarray_class kit_sortedarray_class +#define sxe_sortedarray_add kit_sortedarray_add_element +#define sxe_sortedarray_delete kit_sortedarray_delete +#define sxe_sortedarray_find kit_sortedarray_find +#define sxe_sortedarray_get kit_sortedarray_get +#define sxe_sortedarray_intersect kit_sortedarray_intersect + +#endif diff --git a/lib-sxe-jitson/sxe-unicode.c b/lib-sxe-jitson/sxe-unicode.c new file mode 100644 index 0000000..cf770b8 --- /dev/null +++ b/lib-sxe-jitson/sxe-unicode.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-log.h" +#include "sxe-unicode.h" + +/* From https://www.ietf.org/rfc/rfc3629.txt + * + * Char. number range | UTF-8 octet sequence + * (hexadecimal) | (binary) + * --------------------+--------------------------------------------- + * 0000 0000-0000 007F | 0xxxxxxx + * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +unsigned +sxe_unicode_to_utf8(unsigned unicode, char *utf_out) +{ + if (unicode <= 0x7F) { + utf_out[0] = (char)unicode; + return 1; + } + + if (unicode <= 0x7FF) { + utf_out[0] = (char)(0xC0 | (unicode >> 6)); + utf_out[1] = (char)(0x80 | (unicode & 0x3F)); + return 2; + } + + if (unicode <= 0xFFFF) { + utf_out[0] = (char)(0xE0 | (unicode >> 12)); + utf_out[1] = (char)(0x80 | ((unicode >> 6) & 0x3F)); + utf_out[2] = (char)(0x80 | (unicode & 0x3F)); + return 3; + } + + if (unicode <= 0x10FFFF) { + utf_out[0] = (char)(0xF0 | (unicode >> 18)); + utf_out[1] = (char)(0x80 | ((unicode >> 12) & 0x3F)); + utf_out[2] = (char)(0x80 | ((unicode >> 6) & 0x3F)); + utf_out[3] = (char)(0x80 | (unicode & 0x3F)); + return 4; + } + + SXEL2("sxe_unicode_to_utf8: %u is not a valid unicode code point", unicode); + return 0; +} diff --git a/lib-sxe-jitson/sxe-unicode.h b/lib-sxe-jitson/sxe-unicode.h new file mode 100644 index 0000000..ff40ad8 --- /dev/null +++ b/lib-sxe-jitson/sxe-unicode.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SXE_UNICODE_H +#define SXE_UNICODE_H + +#include "sxe-unicode-proto.h" + +#endif diff --git a/lib-sxe-jitson/test/test-sxe-factory.c b/lib-sxe-jitson/test/test-sxe-factory.c new file mode 100644 index 0000000..637f72f --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-factory.c @@ -0,0 +1,39 @@ +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-factory.h" + +int +main(void) +{ + struct sxe_factory factory[1]; + size_t len; + uint64_t start_allocations; + char * data; + + plan_tests(9); + + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + sxe_factory_alloc_make(factory, 0, 0); + + MOCKFAIL_START_TESTS(1, sxe_factory_reserve); + is(sxe_factory_add(factory, "hello,", 0), -1, "Failed to add 'hello,' to the factory on realloc failure"); + MOCKFAIL_END_TESTS(); + + is(sxe_factory_add(factory, "hello,", 0), 6, "Added 'hello,' to the factory"); + is_eq(sxe_factory_look(factory, &len), "hello,", "Saw 'hello,' in the factory"); + is(len, 6, "Look returned correct length"); + is(sxe_factory_add(factory, " world.xxx", 7), 7, "Added ' world.' to the factory"); + is_eq(sxe_factory_look(factory, NULL), "hello, world.", "Saw 'hello, world.' in the factory"); + is_eq(data = sxe_factory_remove(factory, NULL), "hello, world.", "Removed 'hello, world.' from the factory"); + kit_free(data); + is(sxe_factory_look(factory, NULL), NULL, "Saw no data left in the factory"); + + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-const.c b/lib-sxe-jitson/test/test-sxe-jitson-const.c new file mode 100644 index 0000000..bb5f48e --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-const.c @@ -0,0 +1,131 @@ +/* Test the sxe-jitson const extension + */ + +#include +#include + +#include "kit-mockfail.h" +#include "sxe-jitson-const.h" +#include "sxe-thread.h" + +static bool +number_cast_func(struct sxe_jitson_stack *stack, const struct sxe_jitson *from) +{ + const char *string; + char *end; + unsigned long value; + size_t len; + unsigned index; + + if (sxe_jitson_get_type(from) != SXE_JITSON_TYPE_STRING) + return false; + + value = strtoul(string = sxe_jitson_get_string(from, &len), &end, 10); + + if ((size_t)(end - string) != len) + return false; + + if ((index = sxe_jitson_stack_expand(stack, 1)) == SXE_JITSON_STACK_ERROR) + return false; + + sxe_jitson_make_uint(&stack->jitsons[index], value); + return true; +} + +static bool +string_cast_func(struct sxe_jitson_stack *stack, const struct sxe_jitson *from) +{ + unsigned index; + char buf[12]; + + SXEA1(sxe_jitson_get_type(from) == SXE_JITSON_TYPE_NUMBER, "Expected a number"); + SXEA1((index = sxe_jitson_stack_expand(stack, 1)) != SXE_JITSON_STACK_ERROR, "Expected to expand the stack"); + + snprintf(buf, sizeof(buf), "%lu", sxe_jitson_get_uint(from)); + sxe_jitson_make_string_ref(&stack->jitsons[index], kit_strdup(buf)); + stack->jitsons[index].type |= SXE_JITSON_TYPE_IS_OWN; // Set as owned so that sxe_jitson_free_base() will free the string + return true; +} + +int +main(void) +{ + struct sxe_jitson_source source; + struct sxe_jitson_stack *stack; + struct sxe_jitson *jitson; // Constructed jitson values are returned as non-const + const struct sxe_jitson *element; + char *json_out; + uint64_t start_allocations; + + plan_tests(24); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + sxe_jitson_initialize(0, 0); // Initialize the JSON types, and don't enable hexadecimal + stack = sxe_jitson_stack_get_thread(); + + diag("Test the constants extension"); + { + is(sxe_jitson_new("[NONE,BIT0,BIT1]"), NULL, "Failed to parsed an array containing unknown constants"); + + ok(sxe_jitson_stack_open_collection( stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_string(stack, "NAME", "two-jitson-value", 0), "Added a name to the object"); + ok(sxe_jitson_stack_add_member_uint( stack, "BIT0", 1), "Added a bit flag"); + ok(sxe_jitson_stack_add_member_uint( stack, "BIT1", 2), "Added another bit flag"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + sxe_jitson_const_initialize(jitson); + + /* Test the failure case where a duplicated constant value is > 1 jitson and the allocation fails. + */ + stack = sxe_jitson_stack_get_thread(); + stack->maximum = 1; // Set initial maximum back to 1 + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + MOCKFAIL_SET_SKIP(1); // Skip 1st call, but fail thereafter + is(sxe_jitson_new("[NAME,BIT0,BIT1]"), NULL, "Failed to parse constants on alloc failure"); + MOCKFAIL_END_TESTS(); + + ok(jitson = sxe_jitson_new("[NAME,BIT0,BIT1]"), "Parsed array containing constants"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "[\"two-jitson-value\",1,2]", "Constants were correctly replaced"); + kit_free(json_out); + sxe_jitson_free(jitson); + + is(sxe_jitson_new("bull"), NULL, "Failed to parse a 1st letter misspelling of a keyword"); + + sxe_jitson_source_from_string(&source, "[NAME,BIT0,BIT1]", SXE_JITSON_FLAG_STRICT); + is(sxe_jitson_stack_load_json(stack, &source), NULL, "Failed to parse constants due to strict mode"); + } + + diag("Test cast operators"); + { + struct sxe_jitson number_cast, string_cast; + + sxe_jitson_const_register_cast(&number_cast, "number", number_cast_func); + ok(jitson = sxe_jitson_new("number(\"1\")"), "Cast \"1\" to a number"); + is_eq(sxe_jitson_get_type_as_str(jitson), "number", "number(\"1\") is a number"); + is(sxe_jitson_get_uint(jitson), 1, "number(\"1\") is 1"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("[number(\"1\")]"), "Parsed an array containing a cast"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got its element"); + is_eq(sxe_jitson_get_type_as_str(element), "number", "Its element is a number"); + is(sxe_jitson_get_uint(element), 1, "Its element is 1"); + sxe_jitson_free(jitson); + + ok(!sxe_jitson_new("number"), "Expect a ( after a cast name"); + ok(!sxe_jitson_new("number("), "Expect a JSON value after a cast name"); + ok(!sxe_jitson_new("number(null)"), "Expect cast function to fail if argument is not a string"); + ok(!sxe_jitson_new("number(\"1\""), "Expect a ) after the JSON value after a cast name"); + + /* The following should not leak the memory allocated by the cast function + */ + sxe_jitson_const_register_cast(&string_cast, "string", string_cast_func); + ok(!sxe_jitson_new("string(1"), "Expect a ) after the JSON value after a cast name"); + } + + sxe_jitson_const_finalize(); + sxe_jitson_type_fini(); + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-ident.c b/lib-sxe-jitson/test/test-sxe-jitson-ident.c new file mode 100644 index 0000000..60521d9 --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-ident.c @@ -0,0 +1,100 @@ +/* Test the sxe-jitson identifier extension + */ + +#include + +#include "kit-mockfail.h" +#include "sxe-jitson-const.h" +#include "sxe-jitson-ident.h" +#include "sxe-thread.h" + +static const struct sxe_jitson *my_value = NULL; + +static const struct sxe_jitson * +my_lookup(const char *ident, size_t len) +{ + SXE_UNUSED_PARAMETER(ident); + SXE_UNUSED_PARAMETER(len); + return my_value; +} + +int +main(void) +{ + struct sxe_jitson_source source; + struct sxe_jitson_stack *stack; + struct sxe_jitson *jitson; // Constructed jitson values are returned as non-const + const struct sxe_jitson *element; // Accessed jitson values are returned as const + char *string; + size_t len; + uint64_t start_allocations; + + plan_tests(27); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + ok(!sxe_jitson_is_init(), "Not yet initialized"); + sxe_jitson_type_init(0, 0); // Initialize the JSON types, and don't enable hexadecimal + sxe_jitson_ident_register(); + sxe_jitson_ident_register(); // Registration is idempotent + stack = sxe_jitson_stack_get_thread(); + + ok(jitson = sxe_jitson_new("[NONE,length_8,identifier]"), "Parsed an array containing unknown identifiers"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is(SXE_JITSON_TYPE_IDENT, sxe_jitson_get_type(element), "It's an identifier"); + is_eq(sxe_jitson_ident_get_name(element, &len), "NONE", "It's 'NONE'"); + is(len, 4, "It's length is 4"); + sxe_jitson_free(jitson); + + sxe_jitson_source_from_string(&source, "[NONE,length_8,identifier]", SXE_JITSON_FLAG_STRICT); + is(sxe_jitson_stack_load_json(stack, &source), NULL, "Failed to parse identifiers due to strict mode"); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_uint(stack, "NONE", 0), "Added value for NONE to the object"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + sxe_jitson_const_initialize(jitson); // Set the constants + + /* Test the failure case where an identifier is > 7 characters and jitson allocation fails. + */ + stack->maximum = 1; // Reset the thread stack's initial stack size to 1 + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + MOCKFAIL_SET_FREQ(3); + is(sxe_jitson_new("[NONE,BIT0,identifier]"), NULL, "Failed to parse constants on alloc failure"); + MOCKFAIL_END_TESTS(); + + ok(jitson = sxe_jitson_new("[NONE,BIT0,identifier]"), "Parsed array of a constant and an unknown identifier"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is(sxe_jitson_get_type(element), SXE_JITSON_TYPE_NUMBER, "It's a number"); + is(sxe_jitson_get_uint(element), 0, "It's 0"); + ok(element = sxe_jitson_array_get_element(jitson, 2), "Got the third element"); + is_eq(sxe_jitson_get_type_as_str(element), "identifier", "It's an indentifier"); + is_eq(string = sxe_jitson_to_json(element, NULL), "identifier", "identifier translated to string correctly"); + kit_free(string); + is_eq(sxe_jitson_ident_get_name(element, &len), "identifier", "It's 'identifier'"); + is(len, 10, "Its length is 10"); + + MOCKFAIL_START_TESTS(1, sxe_factory_reserve); + is(string = sxe_jitson_to_json(element, NULL), NULL, "identifier got null on malloc failure"); + MOCKFAIL_END_TESTS(); + + sxe_jitson_free(jitson); + + diag("Test lookup and test functions"); + { + ok(jitson = sxe_jitson_new("identifier"), "Constructed an identifier"); + is(sxe_jitson_test(jitson), SXE_JITSON_TEST_ERROR, "With no lookup, test returns SXE_JITSON_TEST_ERROR"); + is(sxe_jitson_ident_lookup_hook(my_lookup), NULL, "Hooked in the lookup function"); + is(sxe_jitson_test(jitson), SXE_JITSON_TEST_ERROR, "With not found, test returns SXE_JITSON_TEST_ERROR"); + my_value = sxe_jitson_true; + is(sxe_jitson_test(jitson), SXE_JITSON_TEST_TRUE, "With true found, test returns SXE_JITSON_TEST_TRUE"); + sxe_jitson_free(jitson); + } + + sxe_jitson_const_finalize(); + sxe_jitson_type_fini(); + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-oper.c b/lib-sxe-jitson/test/test-sxe-jitson-oper.c new file mode 100644 index 0000000..b073ed7 --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-oper.c @@ -0,0 +1,394 @@ +/* Test the sxe-jitson operator extension + */ + +#include +#include + +#include "kit-mockfail.h" +#include "sxe-jitson-oper.h" +#include "sxe-jitson-in.h" +#include "sxe-jitson-intersect.h" +#include "sxe-log.h" +#include "sxe-thread.h" + +static unsigned len_op = ~0U; + +static struct sxe_jitson jitson_true = { + .type = SXE_JITSON_TYPE_BOOL, + .boolean = true +}; + +static struct sxe_jitson jitson_false = { + .type = SXE_JITSON_TYPE_BOOL, + .boolean = false +}; + +static struct sxe_jitson jitson_null = { + .type = SXE_JITSON_TYPE_NULL +}; + +static const struct sxe_jitson * +and_op_default(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + if (!sxe_jitson_test(left)) + return &jitson_false; + + return sxe_jitson_test(right) ? &jitson_true : &jitson_false; +} + +static const struct sxe_jitson * +len_op_default(const struct sxe_jitson *arg) +{ + if (!sxe_jitson_supports_len(arg)) { + SXEL2(": Type %s doesn't support operator '%s'", sxe_jitson_get_type_as_str(arg), sxe_jitson_oper_get_name(len_op)); + errno = EOPNOTSUPP; + return NULL; + } + + return sxe_jitson_create_uint(sxe_jitson_len(arg)); +} + +static const struct sxe_jitson * +in_op_string_caseless(const struct sxe_jitson *left, const struct sxe_jitson *right) +{ + if (sxe_jitson_get_type(left) != SXE_JITSON_TYPE_STRING) { + SXEL2(": Can't look for a %s in a string", sxe_jitson_get_type_as_str(left)); + errno = EOPNOTSUPP; + return NULL; + } + + char *found = strcasestr(sxe_jitson_get_string(right, NULL), sxe_jitson_get_string(left, NULL)); + + if (!found) + return &jitson_null; + + return sxe_jitson_create_string_ref(found); +} + +/* Wonky override to take the length of a number. + */ +static const struct sxe_jitson * +len_op_number(const struct sxe_jitson *arg) +{ + char buf[16]; + + return sxe_jitson_create_number(snprintf(buf, sizeof(buf), "%g", sxe_jitson_get_number(arg))); +} + +static const char *string = "this STRING is 33 characters long"; + +int +main(void) +{ + struct sxe_jitson arg, element; + union sxe_jitson_oper_func func; + struct sxe_jitson_stack *stack; + struct sxe_jitson *collection, *coll_rhs; + const struct sxe_jitson *result; + char *json_str; + const char *substring; + uint64_t start_allocations; + unsigned and_op; + + tap_plan(90, TAP_FLAG_LINE_ON_OK, NULL); // Display test line numbers in OK messages (useful for tracing) + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + sxe_jitson_type_init(SXE_JITSON_MIN_TYPES, SXE_JITSON_FLAG_OPTIMIZE); + + diag("Test the operator extension"); + { + func.binary = and_op_default; + is(and_op = sxe_jitson_oper_register("&&", SXE_JITSON_OPER_BINARY, func), 1, "First operator is 1"); + is(sxe_jitson_oper_apply_binary(&jitson_true, and_op, &jitson_true), &jitson_true, "true && true is true"); + is(sxe_jitson_oper_apply_binary(&jitson_true, and_op, &jitson_false), &jitson_false, "true && false is false"); + + sxe_jitson_make_string_ref(&arg, string); + func.unary = len_op_default; + is(len_op = sxe_jitson_oper_register("len", SXE_JITSON_OPER_UNARY, func), 2, "Second operator is 2"); + is(sxe_jitson_oper_apply_unary(len_op, &jitson_true), NULL, "len true is invalid"); + ok(result = sxe_jitson_oper_apply_unary(len_op, &arg), "len string is implemented"); + is(sxe_jitson_get_number(result), 33, "len string is 33"); + sxe_jitson_free(result); + + sxe_jitson_make_string_ref(&element, "string"); + sxe_jitson_in_init(); + is(sxe_jitson_oper_in, 3, "Third operator is 3"); + ok(result = sxe_jitson_oper_apply_binary(&element, sxe_jitson_oper_in, &arg), "substring in string is implemented"); + is_eq(sxe_jitson_get_type_as_str(result), "null", "substring not found"); + + /* Override the standard implementation of string IN to ignore case + */ + func.binary = in_op_string_caseless; + sxe_jitson_oper_add_to_type(sxe_jitson_oper_in, SXE_JITSON_TYPE_STRING, func); + ok(result = sxe_jitson_oper_apply_binary(&element, sxe_jitson_oper_in, &arg), "substring in string is implemented"); + is_strncmp(substring = sxe_jitson_get_string(result, NULL), "STRING", 6, "Return begins with the substring"); + is(substring, string + strlen("this "), "And points into the containing string"); + sxe_jitson_free(result); + + sxe_jitson_make_number(&arg, 33); + ok(!sxe_jitson_oper_apply_binary(&element, sxe_jitson_oper_in, &arg), "Substring in a number is not implemented"); + is(errno, EOPNOTSUPP, "Got the expected error (%s)", strerror(errno)); + + sxe_jitson_make_number(&arg, 666); + func.unary = len_op_number; + sxe_jitson_oper_add_to_type(len_op, SXE_JITSON_TYPE_NUMBER, func); + ok(result = sxe_jitson_oper_apply_unary(len_op, &arg), "length of number is now implemented"); + is(sxe_jitson_get_uint(result), 3, "Returned length of 666 is 3"); + sxe_jitson_free(result); + + func.unary = NULL; // No default + is(sxe_jitson_oper_register("~", SXE_JITSON_OPER_UNARY, func), 4, "Fourth operator is 4"); + ok(!sxe_jitson_oper_apply_unary(4, &arg), "~ of number is not implemented"); + + func.binary = NULL; // No default + is(sxe_jitson_oper_register("||", SXE_JITSON_OPER_BINARY, func), 5, "Fifth operator is 5"); + ok(!sxe_jitson_oper_apply_binary(sxe_jitson_true, 5, sxe_jitson_false), "|| of bools is not implemented"); + } + + diag("Test the default IN operator"); + { + collection = sxe_jitson_new("[true, 1]"); + ok(sxe_jitson_eq(sxe_jitson_in(sxe_jitson_true, collection), sxe_jitson_true), "Found 'true'"); + sxe_jitson_make_number(&element, 1.1); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), sxe_jitson_null), "Didn't find 1.1"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("{\"one\": 1, \"zero\": 0}"); + is(sxe_jitson_in(&element, collection), NULL, "Can't use a number as a key"); + sxe_jitson_make_string_ref(&element, "one"); + sxe_jitson_make_number(&arg, 1); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), &arg), "Found \"one\" in object"); + sxe_jitson_make_string_ref(&element, "zero"); + sxe_jitson_make_number(&arg, 0); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), &arg), "Found \"zero\" in object"); + sxe_jitson_free(collection); + + ok(result = sxe_jitson_in(&element, sxe_jitson_null), "Able to look for a number in 'null'"); + is_eq(sxe_jitson_get_type_as_str(result), "null", "And get 'null' back"); + + collection = sxe_jitson_new("[\"a\", \"bsearchable\", \"is\", \"list\", \"this\"]"); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), sxe_jitson_null), "Didn't find \"one\" in sorted list"); + sxe_jitson_make_string_ref(&element, "is"); + ok(sxe_jitson_test(sxe_jitson_in(&element, collection)), "Found \"is\" in sorted list"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[]"); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), sxe_jitson_null), "Didn't find \"one\" in empty list"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[0,1,2,3,4,5,6]"); + sxe_jitson_make_number(&element, 1.0); + ok(sxe_jitson_test(sxe_jitson_in(&element, collection)), "Found 1 in [0,1,2,3,4,5,6]"); + sxe_jitson_make_number(&element, 0); + ok(sxe_jitson_test(sxe_jitson_in(&element, collection)), "Found 0 in [0,1,2,3,4,5,6]"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[0,[1,2,3],[4,5,6]]"); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), sxe_jitson_true), "0 IN [0,[1,2,3],[4,5,6]] -> true"); + sxe_jitson_make_uint(&element, 1); + result = sxe_jitson_in(&element, collection); + is_eq(json_str = result ? sxe_jitson_to_json(result, NULL) : NULL, "[1,2,3]", "1 IN [0,[1,2,3],[4,5,6]] -> [1,2,3]"); + kit_free(json_str); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[\"one\",\"two\"]"); + sxe_jitson_make_string_ref(&element, "on"); + ok(sxe_jitson_eq(sxe_jitson_in(&element, collection), sxe_jitson_null), "\"on\" IN [\"one\",\"two\"] -> null"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[3.14, 6.66]"); + sxe_jitson_make_uint(&element, 18446744073709551615ULL); + result = sxe_jitson_in(&element, collection); + is(result, NULL, "max uint64_t IN [3.14, 6.66] -> error"); + sxe_jitson_free(collection); + } + + diag("Test the default INTERSECT operator"); + { + sxe_jitson_intersect_init(); + ok(!sxe_jitson_intersect(sxe_jitson_null, sxe_jitson_true), "Can't intersect values that aren't arrays"); + + collection = sxe_jitson_new("[]"); + ok(!sxe_jitson_intersect(sxe_jitson_null, collection), "Can't intersect a value that isn't an array"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[2, 1]"); + ok(!sxe_jitson_intersect(collection, sxe_jitson_true), "Can't intersect a value that's not a array"); + + coll_rhs = sxe_jitson_new("[1, 2]"); + ok(!sxe_jitson_intersect(sxe_jitson_true, coll_rhs), "Can't intersect non-array with ordered array"); + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_OPEN); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect w/ordered on failure to open"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_ADD); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect w/ordered on failure to add"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_GET); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect w/ordered on failure to get"); + MOCKFAIL_END_TESTS(); + sxe_jitson_free(coll_rhs); + + coll_rhs = sxe_jitson_new("[1]"); + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_OPEN); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect on failure to open result"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_ADD); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect on failure to add to result"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_GET); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect on failure to get result"); + MOCKFAIL_END_TESTS(); + + stack = sxe_jitson_stack_get_thread(); // Get the tread stack + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an outside collection"); + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected [2, 1] with [1]"); + is_eq(sxe_jitson_get_type_as_str(result), "array", "Result is an array"); + is(sxe_jitson_len(result), 1, "Of one element"); + is(sxe_jitson_get_number(sxe_jitson_array_get_element(result, 0)), 1, "Which is 1"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_stack_close_collection(stack); // Close the outside collection + ok(coll_rhs = sxe_jitson_stack_get_jitson(stack), "Got the outside collection"); + sxe_jitson_free(coll_rhs); + + coll_rhs = collection; + collection = sxe_jitson_new("[1, 3]"); + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected [1, 3] with [2, 1]"); + is_eq(sxe_jitson_get_type_as_str(result), "array", "Result is an array"); + is(sxe_jitson_len(result), 1, "Of one element"); + is(sxe_jitson_get_number(sxe_jitson_array_get_element(result, 0)), 1, "Which is 1"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[{}]"); + coll_rhs = sxe_jitson_new("[{}]"); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect arrays containing objects"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[1,2,3]"); + coll_rhs = sxe_jitson_new("[2,4]"); + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected ordered arrays [1,2,3] and [2,4]"); + is(1, sxe_jitson_len(result), "Intersection has 1 element"); + is(2, sxe_jitson_get_uint(sxe_jitson_array_get_element(result, 0)), "It's 2"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[3,2,1]"); + coll_rhs = sxe_jitson_new("[4,2]"); + MOCKFAIL_START_TESTS(1, SXE_JITSON_INTERSECT_ADD); + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect unordered arrays if add fails"); + MOCKFAIL_END_TESTS(); + + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected unordered arrays [3,2,1] and [4,2]"); + is(1, sxe_jitson_len(result), "Intersection has 1 element"); + is(2, sxe_jitson_get_uint(sxe_jitson_array_get_element(result, 0)), "It's 2"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[0, 18446744073709551615]"); // 2nd element can only be represented as a uint + coll_rhs = sxe_jitson_new("[3.14159, 4.4]"); // 1st element can only be represented as a double + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect ordered arrays with incomparable types"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[18446744073709551615, 0]"); // Same as above but LHS list is unordered + ok(!sxe_jitson_intersect(collection, coll_rhs), "Unordered arrays with ordered with incomparable types"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[{},{}]"); // Technically an unordered array + coll_rhs = sxe_jitson_new("[{},{}]"); // Technically an unordered array + ok(!sxe_jitson_intersect(collection, coll_rhs), "Can't intersect unordered arrays with incomparable types"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[\"one-long\",\"two\"]"); // A non-uniform sorted array + coll_rhs = sxe_jitson_new("[\"three-long\",\"two\"]"); // Another non-uniform sorted array + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected non-uniform arrays"); + is(1, sxe_jitson_len(result), "Intersection has 1 element"); + is_eq(sxe_jitson_get_string(sxe_jitson_array_get_element(result, 0), NULL), "two", "Got expected value"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[\"one-long\",\"two-long\"]"); // A sorted array of size 32 elements + coll_rhs = sxe_jitson_new("[\"three\",\"two\"]"); // A sorted array of size 16 elements + ok(result = sxe_jitson_intersect(collection, coll_rhs), "Intersected uniform arrays of different sized elements"); + is(0, sxe_jitson_len(result), "Intersection has no elements"); + sxe_jitson_free(result); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + } + + diag("Test the INTERSECT_TEST operator"); // INTERSECT_TEST is optimized to just test wether there is an intersection + { + is(sxe_jitson_intersect_test(sxe_jitson_true, sxe_jitson_false), NULL, "Can't test intersect booleans"); + + collection = sxe_jitson_new("[]"); + is(sxe_jitson_intersect_test(sxe_jitson_true, collection), NULL, "Can't test intersect a boolean with an array"); + is(sxe_jitson_intersect_test(collection, collection), sxe_jitson_false, "No common elements"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[1]"); + is(sxe_jitson_intersect_test(collection, collection), sxe_jitson_true, "Common elements"); + sxe_jitson_free(collection); + + coll_rhs = sxe_jitson_new("[3.14159, 4.4]"); // 1st element can only be represented as a double + collection = sxe_jitson_new("[0, 18446744073709551615]"); // 2nd element can only be represented as a uint + ok(!sxe_jitson_intersect_test(collection, coll_rhs), "Can't test intersect ordered arrays with incomparable types"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[18446744073709551615]"); // Element can only be represented as a uint + ok(!sxe_jitson_intersect_test(collection, coll_rhs), "Can't test intersect unsorted/sorted arrays with incomparables"); + sxe_jitson_free(coll_rhs); + + coll_rhs = sxe_jitson_new("[4.4, 3.14159]"); // Unsorted elements can only be represented as doubles + ok(!sxe_jitson_intersect_test(collection, coll_rhs), "Can't test intersect unsorted arrays with incomparables"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[1, 2]"); + is(sxe_jitson_intersect_test(sxe_jitson_true, collection), NULL, "Can't test intersect boolean and sorted array"); + is(sxe_jitson_intersect_test(collection, collection), sxe_jitson_true, "Common elements in sorted arrays"); + coll_rhs = sxe_jitson_new("[0, 3]"); + is(sxe_jitson_intersect_test(collection, coll_rhs), sxe_jitson_false, "No common elements in sorted arrays"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[\"short\", \"snort\"]"); // Ordered strings stored in 1 jitson + coll_rhs = sxe_jitson_new("[\"long string\", \"null string\"]"); // Ordered strings stored in 2 jitsons + is(sxe_jitson_intersect_test(collection, coll_rhs), sxe_jitson_false, "Insta-false when sizes differ"); + sxe_jitson_free(coll_rhs); + sxe_jitson_free(collection); + + coll_rhs = sxe_jitson_new("[\"null string\", \"snort\"]"); // Different ordered strings, nonuniform sizes + collection = sxe_jitson_new("[\"long string\", \"short\"]"); // Ordered strings, nonuniform sizes + is(sxe_jitson_intersect_test(collection, collection), sxe_jitson_true, "Common elements in nonuniform sorted arrays"); + is(sxe_jitson_intersect_test(collection, coll_rhs), sxe_jitson_false, "No common elems in nonuniform sorted arrays"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[]"); // Unordered + is(sxe_jitson_intersect_test(collection, coll_rhs), sxe_jitson_false, "No common elems in unsorted/sorted arrays"); + sxe_jitson_free(collection); + + collection = sxe_jitson_new("[\"snort\"]"); // 1 element arrays are alway unordered + is(sxe_jitson_intersect_test(collection, coll_rhs), sxe_jitson_true, "Common elems in unsorted/sorted arrays"); + is(sxe_jitson_intersect_test(coll_rhs, collection), sxe_jitson_true, "Sorted array intersected with unsorted"); + sxe_jitson_free(collection); + sxe_jitson_free(coll_rhs); + } + + sxe_jitson_oper_fini(); + sxe_jitson_type_fini(); + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-race-check.c b/lib-sxe-jitson/test/test-sxe-jitson-race-check.c new file mode 100644 index 0000000..f6bd585 --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-race-check.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include + +#include "sxe-jitson.h" +#include "sxe-util.h" + +static volatile unsigned next_main; +static volatile unsigned next_worker_1; +static volatile unsigned next_worker_2; +static struct sxe_jitson *jitson; +static bool test_objects = false; + +#if SXE_DEBUG +static unsigned max_iter = 99999; +#else +static unsigned max_iter = 999999; +#endif + +static void * +worker(void *next_void) +{ + unsigned i, last = 0; + volatile unsigned *next = next_void; + + for (i = 1; i <= max_iter; i++) { + while (last >= next_main) + sched_yield(); + + if (test_objects) + sxe_jitson_object_get_member(jitson, "one", 3); + else + sxe_jitson_array_get_element(jitson, 1); + + last = next_main; + *next = next_main; + } + + return NULL; +} + +int +main(int argc, char **argv) +{ + pthread_t worker1, worker2; + unsigned i; + + + if (argc > 1) + if (strncmp(argv[1], "-o", 2) == 0) + test_objects = true; + + plan_tests(2); + sxe_jitson_initialize(8, 0); + + assert(pthread_create(&worker1, NULL, &worker, SXE_CAST(void *, &next_worker_1)) == 0); + assert(pthread_create(&worker2, NULL, &worker, SXE_CAST(void *, &next_worker_2)) == 0); + + for (i = 1; i <= max_iter; i++) { + if (test_objects) + assert((jitson = sxe_jitson_new("{\"one\": 1, \"two\": 2}"))); + else + assert((jitson = sxe_jitson_new("[1, \"I'm a long string\"]"))); + + next_main++; + + while (next_worker_1 < next_main) + sched_yield(); + + while (next_worker_2 < next_main) + sched_yield(); + + sxe_jitson_free(jitson); + } + + is(pthread_join(worker1, NULL), 0, "Joined worker1"); + is(pthread_join(worker2, NULL), 0, "Joined worker2"); + + sxe_jitson_finalize(); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-range.c b/lib-sxe-jitson/test/test-sxe-jitson-range.c new file mode 100644 index 0000000..c44f5a0 --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-range.c @@ -0,0 +1,98 @@ +/* Test the sxe-jitson range extension + */ + +#include + +#include "kit-mockfail.h" +#include "sxe-jitson-const.h" +#include "sxe-jitson-in.h" +#include "sxe-jitson-oper.h" +#include "sxe-jitson-range.h" +#include "sxe-thread.h" + +int +main(void) +{ + struct sxe_jitson value, to; + struct sxe_jitson *jitson; + struct sxe_jitson_stack *stack; + char *json; + uint64_t start_allocations; + + plan_tests(29); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + sxe_jitson_initialize(0, 0); // Initialize the JSON types, and don't enable hexadecimal by default + sxe_jitson_const_initialize(NULL); // Don't add any constants, but allow casts to be added. + sxe_jitson_in_init(); + sxe_jitson_range_register(); + sxe_jitson_range_register(); // Registration is idempotent + stack = sxe_jitson_stack_get_thread(); + + diag("Happy path cases"); + { + ok(jitson = sxe_jitson_new("range([1,5])"), "Parsed an range from 'range([1,5])'"); + ok(sxe_jitson_test(jitson), "Ranges always test true"); + is_eq(json = sxe_jitson_to_json(jitson, NULL), "range([1,5])", "Back to expected JSON text"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 0), jitson), sxe_jitson_null, "0 is not IN range([1,5])"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 1), jitson), sxe_jitson_true, "1 is IN range([1,5])"); + is(sxe_jitson_in(sxe_jitson_make_number(&value, 3.5), jitson), sxe_jitson_true, "1 is IN range([1,5])"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 1), jitson), sxe_jitson_true, "3.5 is IN range([1,5])"); + is(sxe_jitson_in(sxe_jitson_make_number(&value, 5.0), jitson), sxe_jitson_true, "5.0 is IN range([1,5])"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, ~0ULL), jitson), sxe_jitson_null, "%llu is not IN range([1,5])", ~0ULL); + kit_free(json); + sxe_jitson_free(jitson); + + sxe_jitson_make_uint( &value, 1); + sxe_jitson_make_number(&to, 5.0); + ok(sxe_jitson_stack_add_range(stack, &value, &to), "Added a range from 'range([1,5.0])'"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got it from the stack"); + ok(sxe_jitson_test(jitson), "Ranges always test true"); + is_eq(json = sxe_jitson_to_json(jitson, NULL), "range([1,5])", "Back to expected JSON text"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 0), jitson), sxe_jitson_null, "0 is not IN range([1,5]"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 1), jitson), sxe_jitson_true, "1 is IN range([1,5]"); + is(sxe_jitson_in(sxe_jitson_make_number(&value, 3.5), jitson), sxe_jitson_true, "1 is IN range([1,5]"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, 1), jitson), sxe_jitson_true, "3.5 is IN range([1,5]"); + is(sxe_jitson_in(sxe_jitson_make_number(&value, 5.0), jitson), sxe_jitson_true, "5.0 is IN range([1,5]"); + is(sxe_jitson_in(sxe_jitson_make_uint(&value, ~0ULL), jitson), sxe_jitson_null, "%llu is not IN range([1,5]", ~0ULL); + kit_free(json); + sxe_jitson_free(jitson); + } + + diag("Failure cases"); + { + ok(!sxe_jitson_new("range(\"1,5\")"), "Can't parse a range from 'range(\"1,5\")'"); + ok(!sxe_jitson_new("range([1,2,3])"), "Can't parse a range array that has more than 2 elements"); + ok(!sxe_jitson_new("range([5,1])"), "Can't parse a range array whose elements are out of order"); + ok(!sxe_jitson_new("range([1,\"2\"])"), "Can't parse a range array whose elements are incomparable"); + + sxe_jitson_make_string_ref(&value, "one"); + sxe_jitson_make_string_ref(&to, "five"); + ok(!sxe_jitson_stack_add_range(stack, &value, &to), "Can't add a range that isn't ordered"); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND_AFTER_GET); + MOCKFAIL_SET_SKIP(1); + ok(!sxe_jitson_new("range([1,5])"), "Failed to parse a range on failure to allocate memory"); + MOCKFAIL_END_TESTS(); + + ok(jitson = sxe_jitson_new("range([1,5])"), "Parsed an range from 'range([1,5])'"); + MOCKFAIL_START_TESTS(1, sxe_factory_reserve); + ok(!sxe_jitson_to_json(jitson, NULL), "Failed to convert to JSON text on failure to allocate memory"); + MOCKFAIL_END_TESTS(); + sxe_jitson_free(jitson); + + sxe_jitson_make_uint( &value, 1); + sxe_jitson_make_number(&to, 5.0); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND_AFTER_GET); + ok(!sxe_jitson_stack_add_range(stack, &value, &to), "Failed to construct a range on failure to allocate memory"); + MOCKFAIL_END_TESTS(); + } + + sxe_jitson_oper_fini(); + sxe_jitson_const_finalize(); + sxe_jitson_finalize(); + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson-source.c b/lib-sxe-jitson/test/test-sxe-jitson-source.c new file mode 100644 index 0000000..8a9856b --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson-source.c @@ -0,0 +1,111 @@ +/* Test the sxe-jitson source object + */ + +#include +#include + +#include "sxe-jitson.h" +#include "sxe-thread.h" + +int +main(void) +{ + struct sxe_jitson_source source; + size_t len; + uint64_t start_allocations; + + plan_tests(28); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + diag("Test line number handling"); + { + sxe_jitson_source_from_buffer(&source, "a\nb\n\nc", 5, SXE_JITSON_FLAG_STRICT); // c is not included in the buffer + sxe_jitson_source_set_file_line(&source, NULL, 1); + is(sxe_jitson_source_get_nonspace(&source), 'a', "Read 'a'"); + is(source.line, 1, "On line 1"); + is(sxe_jitson_source_get_char(&source), '\n', "Read '\\n'"); + is(source.line, 2, "Now on line 2"); + is(sxe_jitson_source_get_nonspace(&source), 'b', "Read 'b'"); + is(sxe_jitson_source_peek_char(&source), '\n', "Peeked at '\\n'"); + is(source.line, 2, "Still on line 2"); + sxe_jitson_source_consume(&source, 1); + is(source.line, 3, "Consumed '\\n' and now on line 3"); + is(sxe_jitson_source_peek_nonspace(&source), '\0', "Skipped final newline and returned EOF"); + is(sxe_jitson_source_get_char(&source), '\0', "Read '\\0' at EOF"); + } + + diag("Test new error case"); + { + struct sxe_jitson_stack *stack = sxe_jitson_stack_get_thread(); + + sxe_jitson_source_from_string(&source, "not a string", SXE_JITSON_FLAG_STRICT); + ok(!sxe_jitson_stack_load_string(stack, &source), "Successfully failed to load a non-string"); + is(EINVAL, errno, "errno is EINVAL"); + } + + diag("Test get literal function"); + { + const char *literal; + + sxe_jitson_source_from_string(&source, "\"boo\"", SXE_JITSON_FLAG_STRICT); + is_eq(literal = sxe_jitson_source_get_literal(&source, &len), "\"boo\"", "Got the expected literal string"); + is(len, sizeof("\"boo\"") - 1, "Got the expected length"); + + sxe_jitson_source_from_string(&source, "not a literal", SXE_JITSON_FLAG_STRICT); + ok(!sxe_jitson_source_get_literal(&source, NULL), "Correctly detected non-literal string"); + + sxe_jitson_source_from_string(&source, "\"unterminated", SXE_JITSON_FLAG_STRICT); + ok(!sxe_jitson_source_get_literal(&source, NULL), "Correctly detected unterminated literal string"); + } + + diag("Test diagnostic function"); + { + sxe_jitson_source_from_string(&source, + "01234567890123456789012345678901234567890123456789012345678901234567890123456789", + SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_left(&source), "012345678901234567890123456789012345678901234567890123456789...", + "Correctly truncated source left to 60 characters"); + sxe_jitson_source_from_string(&source, "012345678901234567890123456789012345678901234567890123456789012", + SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_left(&source), "012345678901234567890123456789012345678901234567890123456789012", + "Correctly didn't truncated source with only 63 characters"); + sxe_jitson_source_from_buffer(&source, "012345678901234567890123456789012345678901234567890123456789012", 63, + SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_left(&source), "012345678901234567890123456789012345678901234567890123456789012", + "Correctly didn't truncated source with only 63 characters"); + sxe_jitson_source_from_string(&source, "", SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_left(&source), "", "Correctly returned \"\" for empty source"); + } + + diag("Test peek token function"); + { + sxe_jitson_source_from_string(&source, " ?=", SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_peek_token(&source, &len), "?=", "Correctly peeked at token in string source"); + is(len, 2, "Cerrectly determined token's length"); + + sxe_jitson_source_from_buffer(&source, " ?=garbage", 3, SXE_JITSON_FLAG_STRICT); + is_eq(sxe_jitson_source_peek_token(&source, &len), "?=garbage", "Correctly peeked at token in buffer source"); + is(len, 2, "Cerrectly determined token's length"); + + sxe_jitson_source_from_buffer(&source, " ?=", 1, SXE_JITSON_FLAG_STRICT); + is(sxe_jitson_source_peek_token(&source, &len), NULL, "Correctly detected lack of token in buffer source"); + } + + diag("Test repeated sxe_jitson_source_get_char()"); + { + /* Although nothing currently does this, ensure that repeated calls at end-of-string work */ + unsigned i; + + sxe_jitson_source_from_string(&source, "x\0y", SXE_JITSON_FLAG_STRICT); + is(sxe_jitson_source_get_char(&source), 'x', "Got 'x' from the source"); + for (i = 0; i < 100; i++) + if (sxe_jitson_source_get_char(&source)) + break; + is(i, 100, "Read '\\0' from the end of string 100 times"); + } + + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-jitson.c b/lib-sxe-jitson/test/test-sxe-jitson.c new file mode 100644 index 0000000..3f81aea --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-jitson.c @@ -0,0 +1,1214 @@ +#include +#include +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mockfail.h" +#include "sxe-jitson.h" +#include "sxe-test-memory.h" +#include "sxe-thread.h" + +static void +test_stacked_array_is_ordered(struct sxe_jitson_stack *stack, const char *json, bool expect_ordered) +{ + struct sxe_jitson *jitson; + + if (!(jitson = sxe_jitson_stack_get_jitson(stack))) + fail("Failed to get '%s' from the stack (error == %s)", json, strerror(errno)); + if (sxe_jitson_get_type(jitson) != SXE_JITSON_TYPE_ARRAY) + fail("'%s' is not an array", json); + else + is((bool)(jitson->type & SXE_JITSON_TYPE_IS_ORD), expect_ordered, + "'%s' is %sordered", json, jitson->type & SXE_JITSON_TYPE_IS_ORD ? "" : "not "); + + sxe_jitson_free(jitson); +} + +static void +test_parsing_ordered_array(const char *json, bool expect_ordered) +{ + struct sxe_jitson_source source; + struct sxe_jitson_stack *stack = sxe_jitson_stack_get_thread(); + + sxe_jitson_source_from_string(&source, json, SXE_JITSON_FLAG_OPTIMIZE); + + if (!sxe_jitson_stack_load_json(stack, &source)) + fail("Failed to load '%s' (error == %s)", json, strerror(errno)); + else + test_stacked_array_is_ordered(stack, json, expect_ordered); +} + +static void +test_constructing_ordered_array(const char *name, bool expect_ordered, ...) +{ + struct sxe_jitson_stack *stack = sxe_jitson_stack_get_thread(); + va_list varargs; + const char *element; + + if (!sxe_jitson_stack_open_array(stack, name)) { + fail("Failed to open array '%s'", name); + return; + } + + va_start(varargs, expect_ordered); + + while ((element = va_arg(varargs, const char *))) + if (!sxe_jitson_stack_add_string(stack, element, SXE_JITSON_TYPE_IS_REF)) { + fail("Failed to add '%s' to array '%s'", element, name); + va_end(varargs); + return; + } + + va_end(varargs); + sxe_jitson_stack_close_array(stack, name); + test_stacked_array_is_ordered(stack, name, expect_ordered); +} + +int +main(void) +{ + struct sxe_jitson_source source; + struct sxe_jitson prim_left, prim_right; + struct sxe_jitson_stack *stack; + struct sxe_jitson *array, *clone, *jitson; // Constructed jitson values are returned as non-const + const struct sxe_jitson *element, *member; // Accessed jitson values are returned as const + char *json_out; + size_t len; + uint64_t start_allocations; + + tap_plan(520 + 5 * 27, TAP_FLAG_LINE_ON_OK, NULL); // Display test line numbers in OK messages (useful for tracing) + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + ok(!sxe_jitson_is_init(), "Not yet initialized"); + sxe_jitson_type_init(0, 0); // Initialize the JSON types, and don't enable hexadecimal + ok(sxe_jitson_is_init(), "Now initialized"); + + diag("Memory allocation failure tests"); + { + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_NEW_OBJECT); + is(sxe_jitson_stack_new(1), NULL, "Failed to allocate a stack object"); + MOCKFAIL_END_TESTS(); + + ok(stack = sxe_jitson_stack_new(1), "Allocated a non-thread stack"); + sxe_jitson_stack_free(stack); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_NEW_JITSONS); + is(sxe_jitson_new("0"), NULL, "Failed to allocate the thread stack's initial jitsons"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + is(sxe_jitson_new("{\"one\":1}"), NULL, "Failed to realloc the thread stack's jitsons on a string"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + is(sxe_jitson_new("\"01234567\""), NULL, "Failed to realloc the thread stack's jitsons on a long string"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + is(sxe_jitson_new("[0,1]"), NULL, "Failed to realloc the thread stack's jitsons"); + MOCKFAIL_END_TESTS(); + + jitson = sxe_jitson_new("999999999"); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND_AFTER_GET); + is(sxe_jitson_new("0"), NULL, "Failed to allocate a new stack after getting the parsed object"); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + is(sxe_jitson_new("{\"x\": 0}"), NULL, "Failed to realloc the thread stack's jitsons on string inside an object"); + MOCKFAIL_END_TESTS(); + + /* Don't do this at the beginning of the program (need to test failure to allocate above) + */ + stack = sxe_jitson_stack_get_thread(); + + MOCKFAIL_START_TESTS(7, MOCK_FAIL_STACK_EXPAND); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + ok(!sxe_jitson_stack_add_null(stack), "Failed to realloc to add null to an open array"); + ok(!sxe_jitson_stack_add_bool(stack, true), "Failed to realloc to add true to an open array"); + ok(!sxe_jitson_stack_add_number(stack, 0.0), "Failed to realloc to add 0.0 to an open array"); + ok(!sxe_jitson_stack_add_uint(stack, 0), "Failed to realloc to add 0 to an open array"); + ok(!sxe_jitson_stack_add_reference(stack, jitson), "Failed to realloc to add a reference to an open array"); + ok(!sxe_jitson_stack_add_dup(stack, jitson), "Failed to realloc to add a duplicate to an open array"); + sxe_jitson_stack_clear(stack); + MOCKFAIL_END_TESTS(); + + MOCKFAIL_START_TESTS(1, sxe_factory_reserve); + is(sxe_jitson_to_json(jitson, NULL), NULL, "Failed to encode large to JSON on realloc failure"); + errno = 0; + MOCKFAIL_END_TESTS(); + sxe_jitson_free(jitson); + } + + diag("Happy path parsing"); + { + ok(jitson = sxe_jitson_new("0"), "Parsed '0' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "'0' is a number"); + ok(sxe_jitson_get_number(jitson) == 0.0 , "Value %f == 0.0", sxe_jitson_get_number(jitson)); + is(sxe_jitson_get_uint(jitson), 0, "Value %"PRIu64" == 0", sxe_jitson_get_uint(jitson)); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new(" 666\t"), "Parsed ' 666\\t' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "'666' is a number"); + ok(sxe_jitson_get_number(jitson) == 666.0, "Value %f == 666.0", sxe_jitson_get_number(jitson)); + is(sxe_jitson_get_uint(jitson), 666, "Value %"PRIu64" == 666", sxe_jitson_get_uint(jitson)); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new(" -0.1"), "Parsed '-0.1' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "'-0.1'' is a number"); + ok(sxe_jitson_get_number(jitson) == -0.1, "Value %f == -0.1", sxe_jitson_get_number(jitson)); + is(errno, 0, "Error = '%s'", strerror(errno)); + is(sxe_jitson_get_uint(jitson), ~0ULL, "Value %"PRIu64" == ~OULL", sxe_jitson_get_uint(jitson)); + is(errno, EOVERFLOW, "Error = '%s'", strerror(errno)); + sxe_jitson_free(jitson); + errno = 0; + + ok(jitson = sxe_jitson_new("1E-100"), "Parsed '1E=100' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "1E100' is a number"); + ok(sxe_jitson_get_number(jitson) == 1E-100, "Value %f == 1E100", sxe_jitson_get_number(jitson)); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("0xDEADBEEF"), "Parsed '0xDEADBEEF' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "0xDEADBEEF' is parsed as a number"); + is(sxe_jitson_get_uint(jitson), 0, "0x* == 0 (hex not enabled)"); + sxe_jitson_free(jitson); + + sxe_jitson_flags |= SXE_JITSON_FLAG_ALLOW_HEX; + ok(jitson = sxe_jitson_new("0xDEADBEEF"), "Parsed '0xDEADBEEF' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "0xDEADBEEF' is a number"); + is(sxe_jitson_get_uint(jitson), 0xDEADBEEF, "0x%"PRIx64" == 0xDEADBEEF", sxe_jitson_get_uint(jitson)); + is(sxe_jitson_get_number(jitson), (double)0xDEADBEEF, "%f == (double)0xDEADBEEF", sxe_jitson_get_number(jitson)); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "3735928559", "0xDEADBEEF is 3735928559 in decimal"); + kit_free(json_out); + sxe_jitson_free(jitson); + + sxe_jitson_source_from_string(&source, "0xDEADBEEF", SXE_JITSON_FLAG_STRICT); + ok(sxe_jitson_stack_load_json(stack, &source), "Loaded '0xDEADBEEF' (error %s)", strerror(errno)); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the loaded value"); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "0xDEADBEEF' is a number"); + is(sxe_jitson_get_uint(jitson), 0, "0x* == 0 (hex not enabled due to strict mode)"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("0xDEADBEEFDEADBEEF"), "Parsed '0xDEADBEEFDEADBEEF' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "0xDEADBEEFDEADBEEF' is a number"); + is(sxe_jitson_get_uint(jitson), 0xDEADBEEFDEADBEEF, "0x%"PRIx64" == 0xDEADBEEFDEADBEEF", sxe_jitson_get_uint(jitson)); + ok(isnan(sxe_jitson_get_number(jitson)), "%f == NAN", sxe_jitson_get_number(jitson)); + is(errno, EOVERFLOW, "Error = %s", strerror(errno)); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "16045690984833335023", + "0xDEADBEEFDEADBEEF is 16045690984833335023 in decimal"); + kit_free(json_out); + sxe_jitson_free(jitson); + errno = 0; + + ok(jitson = sxe_jitson_new("0xDEADBEEFDEADBEEFDEAD"), "Parsed '0xDEADBEEFDEADBEEFDEAD' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NUMBER, "0xDEADBEEFDEADBEEFDEAD' is a number"); + is(sxe_jitson_get_uint(jitson), 0xFFFFFFFFFFFFFFFF, "0x%"PRIx64" == 0xFFFFFFFFFFFFFFFF", sxe_jitson_get_uint(jitson)); + is(errno, ERANGE, "Error = %s", strerror(errno)); + sxe_jitson_free(jitson); + errno = 0; + + ok(jitson = sxe_jitson_new("\"\""), "Parsed '\"\"' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "'\"\"' is a string"); + is_eq(sxe_jitson_get_string(jitson, NULL), "", "Correct value"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new(" \"x\"\n"), "Parsed ' \"x\"\\n' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "' \"x\"\\n' is a string"); + is_eq(sxe_jitson_get_string(jitson, &len), "x", "Correct value"); + is(len, 1, "Correct length"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("\"\\\"\\\\\\/\\b\\f\\n\\r\\t\""), "Parsed '\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"' (error %s)", + strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "'\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"' is a string"); + is_eq(sxe_jitson_get_string(jitson, NULL), "\"\\/\b\f\n\r\t", "Correct value"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "\"\\u0022\\u005c/\\u0008\\u000c\\u000a\\u000d\\u0009\"", + "Control characters are correctly encoded"); + sxe_jitson_free(jitson); + kit_free(json_out); + + ok(jitson = sxe_jitson_new("\"\\u20aC\""), "Parsed '\"\\u20aC\"' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "'\"\\u20aC\"' is a string"); + is_eq(sxe_jitson_get_string(jitson, NULL), "\xE2\x82\xAC", "Correct UTF-8 value"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "\"\xE2\x82\xAC\"", "Valid UTC code points are not escaped"); + sxe_jitson_free(jitson); + kit_free(json_out); + + // Checks to ensure some string specific functions are accessible to new types based on strings + ok(jitson = sxe_jitson_new(" \"stuff\"\n"), "Parsed ' \"stuff\"\\n' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "' \"stuff\"\\n' is a string"); + is_eq(sxe_jitson_get_string(jitson, &len), "stuff", "Correct value"); + is(len, 5, "Correct length"); + is(sxe_jitson_string_len(jitson), 5, "Correct string length via direct function"); + is(sxe_jitson_string_test(jitson), SXE_JITSON_TEST_TRUE, "Nonempty strings test true via direct function"); + is(sxe_jitson_string_cmp(jitson, jitson), 0, "Correct comparison via direct function"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new(" {\t} "), "Parsed ' {\\t} ' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_OBJECT, "' {\\t} ' is an object" ); + is(sxe_jitson_len(jitson), 0, "Correct len"); + is(sxe_jitson_size(jitson), 1, "Correct size"); // Test DPT-1404b + ok(!sxe_jitson_test(jitson), "Empty objects test false"); + is(sxe_jitson_object_get_member(jitson, "x", 1), NULL, "Search for any member will fail"); // Test DPT-1404a + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "{}", "Encoded back to JSON correctly"); + kit_free(json_out); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("{\"key\":\"value\"}"), "Parsed '{\"key\":\"value\"}' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_OBJECT, "'{\"key\":\"value\"}' is an object"); + is(sxe_jitson_len(jitson), 1, "Correct len"); + is(sxe_jitson_size(jitson), 3, "Correct size"); + ok(sxe_jitson_test(jitson), "Nonempty objects test true"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("[1, 2,4]"), "Parsed '[1, 2,4]' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_ARRAY, "'[1, 2,4]' is an array" ); + is(sxe_jitson_len(jitson), 3, "Correct len"); + ok(sxe_jitson_test(jitson), "Nonempty arrays test true"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("[]"), "Parsed '[]' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_ARRAY, "'[]' is an array" ); + is(sxe_jitson_len(jitson), 0, "Correct len"); + ok(!sxe_jitson_test(jitson), "Empty arrays test false"); + sxe_jitson_free(jitson); + } + + diag("Test identifiers and parsing of terminals"); + { + const char *ident; + int i; + char test_id[2] = "\0"; + + for (i = -128; i <= 127; i++) { + test_id[0] = (char)i; + sxe_jitson_source_from_string(&source, test_id, 0); + ident = sxe_jitson_source_get_identifier(&source, &len); + + if (i == '.' || i == '_' || (i >= '0' && i <= '9') || (i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')) { + if (ident != test_id) + break; + } else if (ident != NULL) + break; + } + + is(i, 128, "All identifier characters are correctly classified"); + + sxe_jitson_source_from_string(&source, "@", SXE_JITSON_FLAG_STRICT); + ok(!sxe_jitson_source_peek_identifier(&source, &len), "Peek identifier fails on non-identifier character"); + + sxe_jitson_source_from_string(&source, "identifier +", SXE_JITSON_FLAG_STRICT); // Peeking allowed in STRICT mode + ok(sxe_jitson_source_peek_identifier(&source, &len), "Peek identifier succeeds"); + is(len, sizeof("identifier") - 1, "Length of identifier is correct"); + + ok(jitson = sxe_jitson_new("true"), "Parsed 'true' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_BOOL, "'true' is a boolean" ); + is(sxe_jitson_get_bool(jitson), true, "Correct value"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("false"), "Parsed 'false' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_BOOL, "'false' is a boolean" ); + is(sxe_jitson_get_bool(jitson), false, "Correct value"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "false", "Encoded back to JSON correctly"); + kit_free(json_out); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("null"), "Parsed 'null' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_NULL, "'null' is the null type" ); + sxe_jitson_free(jitson); + } + + diag("Cover edge cases of parsing"); + { + ok(!sxe_jitson_new("{0:1}"), "Failed to parse non-string key '{0:1}' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("\""), "Failed to parse unterminated string '\"' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("\"\\"), "Failed to parse unterminated escape '\"\\' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("\"\\u"), "Failed to parse truncated unicode escape '\"\\u' (error %s)", strerror(errno)); + ok(!sxe_jitson_new(""), "Failed to parse empty string '' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("{\"k\"0}"), "Failed to parse object missing colon '{\"k\"0}' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("{\"k\":}"), "Failed to parse object missing value '{\"k\":}' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("{\"k\":0"), "Failed to parse object missing close '{\"k\":0' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("[0"), "Failed to parse array missing close '[0' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("0."), "Failed to parse invalid fraction '0.' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("1.0E"), "Failed to parse invalid exponent '1.0E' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("fathead"), "Failed to parse invalid token 'fathead' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("n"), "Failed to parse invalid token 'n' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("twit"), "Failed to parse invalid token 'twit' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("-x"), "Failed to parse invalid number '-x' (error %s)", strerror(errno)); + ok(!sxe_jitson_new("0xx"), "Failed to parse invalid hex number '0xx' (error %s)", strerror(errno)); + + char json[65562]; // Big enough for an object with a > 64K member name + json[len = 0] = '{'; + json[++len] = '"'; + + while (len < 65538) + json[++len] = 'm'; + + json[++len] = '"'; + ok(!sxe_jitson_new(json), "Failed to parse member name of 64K chanracters (error %s)", strerror(errno)); + errno = 0; + } + + diag("Cover type to string"); + { + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_INVALID), "INVALID", "INVALID type"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_NUMBER), "number", "number"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_STRING), "string", "string"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_OBJECT), "object", "object"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_ARRAY), "array", "array"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_BOOL), "bool", "bool"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_NULL), "null", "null"); + is_eq(sxe_jitson_type_to_str(SXE_JITSON_TYPE_MASK), "ERROR", "out of range type is an ERROR"); + is(errno, ERANGE, "Errno is ERANGE"); + errno = 0; + } + + diag("Test object membership function, object duplication, and reencoding"); + { + ok(jitson = sxe_jitson_new("{\"a\": 1, \"biglongname\": \"B\", \"c\": [2, 3], \"d\" : {\"e\": 4}, \"f\": true}"), + "Parsed complex object (error %s)", strerror(errno)); + is(sxe_jitson_size(jitson), 16, "Object is %zu bytes", sizeof(struct sxe_jitson) * sxe_jitson_size(jitson)); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_OBJECT_GET_MEMBER); + ok(!sxe_jitson_object_get_member(jitson, "a", 0), "Can't access object on failure to calloc index"); + MOCKFAIL_END_TESTS(); + + ok(member = sxe_jitson_object_get_member(jitson, "a", 0), "Object has a member 'a'"); + is(sxe_jitson_get_number(member), 1, "Member is the number 1"); + ok(!sxe_jitson_object_get_member(jitson, "biglongname", 1), "Object has no member 'b'"); + ok(member = sxe_jitson_object_get_member(jitson, "biglongname", 0), "Object has a member 'biglongname'"); + is_eq(sxe_jitson_get_string(member, NULL), "B", "Member is the string 'B'"); + ok(member = sxe_jitson_object_get_member(jitson, "c", 0), "Object has a member 'c'"); + is(sxe_jitson_len(member), 2, "Member is an array of 2 elements"); + ok(member = sxe_jitson_object_get_member(jitson, "d", 1), "Object has a member 'd'"); + is(sxe_jitson_len(member), 1, "Member is an object with 1 member"); + ok(member = sxe_jitson_object_get_member(jitson, "f", 0), "Object has a member 'f'"); + ok(sxe_jitson_get_bool(member), "Member is 'true'"); + + /* Test duplication + */ + ok(clone = sxe_jitson_dup(jitson), "Duplicated the object"); + is(5, sxe_jitson_len(clone), "Clone has 5 members too"); + ok(member = sxe_jitson_object_get_member(clone, "c", 0), "One of the members is 'c'"); + ok(element = sxe_jitson_array_get_element(member, 1), "Got second element of array member 'c'"); + is(sxe_jitson_get_uint(element), 3, "It's the unsigned integer 3"); + is_eq(json_out = sxe_jitson_to_json(clone, NULL), + "{\"f\":true,\"a\":1,\"c\":[2,3],\"biglongname\":\"B\",\"d\":{\"e\":4}}", + "Encoder spat out same JSON as we took in"); + sxe_jitson_free(clone); + + is(sxe_jitson_size(jitson), 16, "Objects can be sized once indexed"); + sxe_jitson_free(jitson); + kit_free(json_out); + } + + diag("Test array element function and reencoding"); + { + ok(jitson = sxe_jitson_new("[0, \"anotherlongstring\", {\"member\": null}, true]"), + "Parsed complex array (error %s)", strerror(errno)); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_ARRAY_GET_ELEMENT); + ok(!sxe_jitson_array_get_element(jitson, 0), "Can't access array on failure to malloc index"); + MOCKFAIL_END_TESTS(); + + ok(element = sxe_jitson_array_get_element(jitson, 0), "Array has a element 0"); + is(sxe_jitson_get_number(element), 0, "Element is the number 0"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Array has a element 1"); + is_eq(sxe_jitson_get_string(element, NULL), "anotherlongstring", "Element is the string 'anotherlongstring'"); + ok(element = sxe_jitson_array_get_element(jitson, 2), "Array has a element 2"); + is(sxe_jitson_get_type(element), SXE_JITSON_TYPE_OBJECT, "Elememt is an object"); + ok(element = sxe_jitson_array_get_element(jitson, 3), "Array has a element 3"); + ok(sxe_jitson_get_bool(element), "Element is 'true'"); + ok(!sxe_jitson_array_get_element(jitson, 4), "Object has no element 4"); + + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "[0,\"anotherlongstring\",{\"member\":null},true]", + "Encoder spat out same JSON as we took in"); + is(sxe_jitson_size(jitson), 8, "Arrays can be sized once indexed"); + sxe_jitson_free(jitson); + kit_free(json_out); + } + + diag("Test bug fixes against regressions"); + { + ok(jitson = sxe_jitson_new("{\"A.D.\": 1, \"x\": 0}"), "Parsed problem member name (error %s)", strerror(errno)); + ok(member = sxe_jitson_object_get_member(jitson, "A.D.", 0), "Object has a member 'A.D.'"); + is(sxe_jitson_get_type(member), SXE_JITSON_TYPE_NUMBER, "A.D.'s value is a number"); + sxe_jitson_free(jitson); + + // Test DPT-1404b (an object that contains an empty object) + ok(jitson = sxe_jitson_new("{\"catalog\":{\"osversion-current\":{}, \"version\": 1}}"), "Parsed object containing {}"); + ok(!sxe_jitson_object_get_member(jitson, "osversion-current", 0), "Object has no member 'osversion-current'"); + sxe_jitson_free(jitson); + + // Test DPT-1408.1 (sxe_jitson_stack_clear should clear the open collection index) + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + sxe_jitson_stack_clear(stack); + is(stack->open , 0, "Open collection flag was cleared"); + + // Test DPT-1408.2 (sxe_jitson_stack_parse_string should clear the stack on failure) + ok(!sxe_jitson_stack_parse_json(stack, "\""), "Failed to parse an unterminated string"); + is(stack->count, 0, "Stack was cleared"); + + // Test DPT-1408.3 (sxe_jitson_stack_parse_string should clear the stack on failure) + ok(!sxe_jitson_stack_parse_json(stack, "{"), "Failed to parse a truncated object"); + is(stack->count, 0, "Stack was cleared"); + + // Test DPT-1408.4 (It should be possible to construct an object with an array as a member) + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_name(stack, "endpoint.certificates", SXE_JITSON_TYPE_IS_COPY), "Add a member"); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Member's value is an empty array"); + ok(sxe_jitson_stack_close_collection(stack), "Close array - can't fail"); + ok(sxe_jitson_stack_close_collection(stack), "Close outer object - can't fail"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + sxe_jitson_free(jitson); + + // Test DPT-1408.5 (sxe_jitson_object_get_member should allow non-NUL terminated member names) + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_number(stack, "a", 1), "Add member 'a' value 1"); + sxe_jitson_stack_close_collection(stack); // Close object + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + ok(sxe_jitson_object_get_member(jitson, "a+", 1), "Got the member with non-terminated name"); + + // Test DPT-1408.8 (Need to be able to duplicate into a collection) + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + ok(sxe_jitson_stack_add_dup(stack, jitson), "Duplicated previous test object in array"); + ok(sxe_jitson_stack_add_null(stack), "Followed by a null"); + sxe_jitson_stack_close_collection(stack); // Close array + ok(clone = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(element = sxe_jitson_array_get_element(clone, 0), "Got the cloned first element"); + ok(member = sxe_jitson_object_get_member(element, "a", 0), "Got the member 'a' from it"); + is(sxe_jitson_get_number(member), 1, "It's value is correct"); + ok(!sxe_jitson_is_allocated(element), "Cloned object isn't marked allocated even tho the object cloned was"); + sxe_jitson_free(clone); + + // Test DPT-1408.9 (sxe_dup should duplicate referred to jitson values) + struct sxe_jitson reference[1]; + sxe_jitson_make_reference(reference, jitson); + clone = sxe_jitson_dup(reference); + ok(!sxe_jitson_is_reference(clone), "The duped reference is not itself a reference"); + sxe_jitson_free(clone); + + sxe_jitson_free(jitson); + + // Test bug found by Dejan in DPT-1422 (more than one unicode escape sequence) + ok(jitson = sxe_jitson_new("\"\\u000AONE\\u000ATWO\""), "Parsed '\"\\u000AONE\\u000ATWO\"' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "'\"\\u000AONE\\u000ATWO\"' is a string"); + is_eq(sxe_jitson_get_string(jitson, NULL), "\nONE\nTWO", "Correct UTF-8 value"); + sxe_jitson_free(jitson); + + // Test DPT-1702 (can't create a reference to an allocated reference) + struct sxe_jitson *ref; + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + ok(sxe_jitson_stack_add_string(stack, "hello", SXE_JITSON_TYPE_IS_COPY), "Added \"hello\" to it"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(ref = sxe_jitson_create_reference(jitson), "Created a reference to the JSON [\"hello\"]"); + ok(clone = sxe_jitson_create_reference(ref), "Created a clone of the reference"); + ok(json_out = sxe_jitson_to_json(clone, NULL), "Converted the cloned reference to JSON text"); + is_eq(json_out, "[\"hello\"]", "It's '[\"hello\"]'"); + kit_free(json_out); + sxe_jitson_free(clone); + sxe_jitson_free(ref); + sxe_jitson_free(jitson); + } + + diag("Test stack boundaries"); + { + struct sxe_jitson *j2; + + // Verify that our NUL doesn't overflow + ok(jitson = sxe_jitson_new("\"12345678\""), "Parsed '\"12345678\"' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_STRING, "'\"12345678\"' is a string"); + is_eq(sxe_jitson_get_string(jitson, NULL), "12345678", "Correct value"); + + ok(j2 = sxe_jitson_new("\"2nd\""), "Parsed '\"2nd\"' (error %s)", strerror(errno)); + is(sxe_jitson_get_type(j2), SXE_JITSON_TYPE_STRING, "'\"2nd\"' is a string"); + is_eq(sxe_jitson_get_string(j2, NULL), "2nd", "Correct value"); + + is_eq(sxe_jitson_get_string(jitson, NULL), "12345678", "The first one is still the correct value"); + + sxe_jitson_free(jitson); + sxe_jitson_free(j2); + } + + diag("Test simple construction"); + { + struct sxe_jitson primitive[1]; + + sxe_jitson_make_null(primitive); + is(sxe_jitson_get_type(primitive), SXE_JITSON_TYPE_NULL, "null is null"); + ok(!sxe_jitson_test(primitive), "null tests false"); + + sxe_jitson_make_bool(primitive, true); + is(sxe_jitson_get_type(primitive), SXE_JITSON_TYPE_BOOL, "true is a bool"); + ok(sxe_jitson_test(primitive), "true tests true"); + + sxe_jitson_make_number(primitive, 1.3E100); + is(sxe_jitson_get_type(primitive), SXE_JITSON_TYPE_NUMBER, "1.3E100 is a number"); + ok(sxe_jitson_test(primitive), "1.3E100 tests true"); + + sxe_jitson_make_number(primitive, 1.0); + is(sxe_jitson_get_uint(primitive), 1, "Number 1.0 is uint 1"); + + sxe_jitson_make_string_ref(primitive, "hello, world"); + is(sxe_jitson_get_type(primitive), SXE_JITSON_TYPE_STRING, "A string_ref is a string"); + is_eq(sxe_jitson_get_string(primitive, NULL), "hello, world", "String_refs values can be retrieved"); + is(primitive->len, 0, "String_refs don't know their lengths on creation"); + ok(sxe_jitson_test(primitive), "Non-empty string ref is true"); + is(sxe_jitson_len(primitive), 12, "String_ref is 12 characters"); + is(primitive->len, 12, "String_refs cache their lengths"); + sxe_jitson_make_string_ref(primitive, ""); + ok(!sxe_jitson_test(primitive), "Empty string_ref tests false"); + } + + diag("Test object construction and duplication failures"); + { + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "{}", "Look, it's an empty object"); + kit_free(json_out); + + ok(clone = sxe_jitson_dup(jitson), "Cloned an empty array"); + is(sxe_jitson_get_type(clone), SXE_JITSON_TYPE_OBJECT, "Clone is an '%s' (expected 'object')", + sxe_jitson_get_type_as_str(clone)); + is(sxe_jitson_len(clone), 0, "Clone has 0 length (it is empty)"); + sxe_jitson_free(clone); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened another object on the stack"); + ok(sxe_jitson_stack_add_dup_members(stack, jitson), "Added duplicates of members of the empty object"); + ok(sxe_jitson_stack_add_member_number(stack, "pi", 3.14159), "Added member 'pi'"); + sxe_jitson_stack_close_collection(stack); + ok(clone = sxe_jitson_stack_get_jitson(stack), "Got the extended clone from the stack"); + is(sxe_jitson_len(clone), 1, "It has only 1 member"); + ok(sxe_jitson_object_get_member(clone, "pi", sizeof("pi") - 1), "It has the additional member"); + sxe_jitson_free(clone); + sxe_jitson_free(jitson); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_name(stack, "null", SXE_JITSON_TYPE_IS_REF), "Added a member name reference"); + ok(sxe_jitson_stack_add_null(stack), "Added a null value"); + ok(sxe_jitson_stack_add_member_bool(stack, "bool", true), "Added a member 'bool' value true"); + ok(sxe_jitson_stack_add_member_number(stack, "number", 1.14159), "Added a member number in one call"); + ok(sxe_jitson_stack_add_member_name(stack, "hello.world", SXE_JITSON_TYPE_IS_REF), "Added long member name reference"); + ok(sxe_jitson_stack_add_string(stack, kit_strdup("hello, world"), SXE_JITSON_TYPE_IS_OWN), + "Added long string owned reference"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + ok(element = sxe_jitson_object_get_member(jitson, "bool", 0), "Got the 'bool' member"); + ok(clone = sxe_jitson_dup(element), "Cloned the bool member"); + ok(sxe_jitson_is_allocated(clone), "Clone is allocated even though value wasn't"); + sxe_jitson_free(clone); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), + "{\"hello.world\":\"hello, world\",\"number\":1.14159,\"bool\":true,\"null\":null}", "Got the expected object"); + kit_free(json_out); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened another object on the stack"); + ok(sxe_jitson_stack_add_dup_members(stack, jitson), "Added duplicates of members"); + ok(sxe_jitson_stack_add_member_number(stack, "pi", 3.14159), "Added member 'pi'"); + sxe_jitson_stack_close_collection(stack); + ok(clone = sxe_jitson_stack_get_jitson(stack), "Got the extended clone from the stack"); + ok(sxe_jitson_object_get_member(clone, "hello.world", 0), "It has a member from the cloned object"); + ok(sxe_jitson_object_get_member(clone, "pi", sizeof("pi") - 1), "It has the additional member"); + is_eq(json_out = sxe_jitson_to_json(clone, NULL), + "{\"bool\":true,\"null\":null,\"pi\":3.14159,\"hello.world\":\"hello, world\",\"number\":1.14159}", + "Got the expected object"); + kit_free(json_out); + sxe_jitson_free(clone); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_DUP); + ok(!sxe_jitson_dup(jitson), "Can't duplicate an object if malloc fails"); + MOCKFAIL_END_TESTS(); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_OBJECT_CLONE); + ok(!sxe_jitson_dup(jitson), "Can't clone an object if malloc fails"); + MOCKFAIL_END_TESTS(); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STRING_CLONE); + ok(!sxe_jitson_dup(jitson), "Can't clone an object that contains an owned string if strdup returns NULL"); + MOCKFAIL_END_TESTS(); + + stack = sxe_jitson_stack_get_thread(); + stack->maximum = 1; // Reset the initial maximum stack size to allow testing the following out of memory conditions + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened yet another object on the stack"); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + ok(!sxe_jitson_stack_add_dup_members(stack, jitson), "Can't duplicate members if stack can't be expanded"); + MOCKFAIL_END_TESTS(); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STRING_CLONE); + ok(!sxe_jitson_stack_add_dup_members(stack, jitson), "Can't dup members that include an owned string if strdup fails"); + MOCKFAIL_END_TESTS(); + sxe_jitson_stack_clear(stack); + + sxe_jitson_free(jitson); + } + + diag("Test array construction and string edge cases"); + { + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "[]", "Look, it's an empty array"); + kit_free(json_out); + + ok(clone = sxe_jitson_dup(jitson), "Cloned an empty array"); + is_eq(sxe_jitson_get_type_as_str(clone), "array", "Clone is an array"); + is(sxe_jitson_len(clone), 0, "Clone has 0 length (it is empty)"); + sxe_jitson_free(clone); + sxe_jitson_free(jitson); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + ok(sxe_jitson_stack_add_string(stack, "shortly", SXE_JITSON_TYPE_IS_COPY), "Added a copy of a short string"); + ok(sxe_jitson_stack_add_string(stack, "longerly", SXE_JITSON_TYPE_IS_COPY), "Added a copy of a longer string"); + ok(sxe_jitson_stack_add_string(stack, "longer than 23 characters", SXE_JITSON_TYPE_IS_COPY), + "Added a copy of a long string needing more than 2 tokens"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "[\"shortly\",\"longerly\",\"longer than 23 characters\"]", + "Got the expected JSON"); + kit_free(json_out); + + MOCKFAIL_START_TESTS(1, MOCK_FAIL_ARRAY_CLONE); + ok(!sxe_jitson_dup(jitson), "Can't clone an array if malloc fails"); + MOCKFAIL_END_TESTS(); + + ok(clone = sxe_jitson_dup(jitson), "Cloned a non-empty array"); + sxe_jitson_free(jitson); + is_eq(sxe_jitson_get_type_as_str(clone), "array", "Clone is an array"); + is(sxe_jitson_len(clone), 3, "Clone has expected length"); + sxe_jitson_free(clone); + + stack->maximum = 1; // Set initial maximum back to 1 + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + ok(!sxe_jitson_stack_add_string(stack, "", SXE_JITSON_TYPE_IS_REF), "Failed to add a string ref expanding in add_value"); + MOCKFAIL_END_TESTS(); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + ok(!sxe_jitson_stack_add_string(stack, "", SXE_JITSON_TYPE_IS_COPY), "Failed to add a string copy expanding in add_value"); + MOCKFAIL_END_TESTS(); + MOCKFAIL_START_TESTS(1, MOCK_FAIL_STACK_EXPAND); + MOCKFAIL_SET_FREQ(2); // Fail on the second attempt to expand the stack + ok(!sxe_jitson_stack_add_string(stack, "i'm too long", SXE_JITSON_TYPE_IS_COPY), "Failed to add a long string"); + MOCKFAIL_END_TESTS(); + ok(sxe_jitson_stack_add_null(stack), "Added null after failed string adds"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + is_eq(sxe_jitson_get_type_as_str(sxe_jitson_array_get_element(jitson, 0)), "null", "Able to retreive null from array"); + sxe_jitson_free(jitson); + } + + diag("Test references and cloning a string that's an owned reference"); + { + struct sxe_jitson reference[1]; + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened an array on the stack"); + ok(sxe_jitson_stack_add_string(stack, "one", SXE_JITSON_TYPE_IS_COPY), "Added a copy of a string"); + ok(sxe_jitson_stack_add_string(stack, "two", SXE_JITSON_TYPE_IS_REF), "Added a weak reference to a string"); + ok(sxe_jitson_stack_add_string(stack, kit_strdup("three"), SXE_JITSON_TYPE_IS_OWN), "Strong (ownership) ref to a string"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + is(sxe_jitson_size(jitson), 4, "Array of 3 short strings is 4 jitsons"); + + sxe_jitson_make_reference(reference, jitson); + is(sxe_jitson_get_type(reference), SXE_JITSON_TYPE_ARRAY, "A reference to an array has type array"); + ok(sxe_jitson_test(reference), "A reference to a non-empty array tests true"); + is(sxe_jitson_size(reference), 1, "A reference to a non-empty array requires only 1 jitson"); + is(sxe_jitson_len(reference), 3, "A reference to an array has len of the array"); + json_out = sxe_jitson_to_json(reference, NULL); + is_eq(json_out, "[\"one\",\"two\",\"three\"]", "A reference to an array's json the array's"); + kit_free(json_out); + ok(member = sxe_jitson_array_get_element(reference, 2), "Can get a element of an array from a reference"); + is_eq(sxe_jitson_get_string(member, NULL), "three", "Got the correct element"); + + MOCKFAIL_START_TESTS(2, MOCK_FAIL_STRING_CLONE); + ok(!sxe_jitson_dup(member), "Can't clone a string if strdup returns NULL"); + ok(!sxe_jitson_dup(jitson), "Can't clone an array that contains an owned string if strdup returns NULL"); + MOCKFAIL_END_TESTS(); + + ok(clone = sxe_jitson_dup(member), "Cloned the owned string"); + is_eq(sxe_jitson_get_string(member, NULL), "three", "Got the correct content"); + ok(sxe_jitson_get_string(member, NULL) != sxe_jitson_get_string(clone, NULL), "The owned string is duplicated"); + sxe_jitson_free(clone); + sxe_jitson_free(reference); + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened another array on the stack"); + ok(sxe_jitson_stack_add_reference(stack, sxe_jitson_array_get_element(jitson, 2)), "Added a reference to an array"); + sxe_jitson_stack_close_collection(stack); + ok(array = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(member = sxe_jitson_array_get_element(array, 0), "Got the reference from the array"); + is_eq(sxe_jitson_get_string(member, NULL), "three", "Got the correct element"); + is(member->type, SXE_JITSON_TYPE_REFERENCE, "Raw type of reference is %s (expected REFERENCE)", + sxe_jitson_type_to_str(member->type)); + sxe_jitson_free(array); + is(sxe_jitson_get_type(jitson), SXE_JITSON_TYPE_ARRAY, "After freeing all references, array is still an array"); + sxe_jitson_free(jitson); + } + + diag("Test a non-NUL terminated source"); + { + sxe_jitson_source_from_buffer(&source, "{}", 1, SXE_JITSON_FLAG_STRICT); + ok(!sxe_jitson_stack_load_json(stack, &source), "Length is respected, truncating valid JSON to invalid"); + } + + diag("Test cmp (comparison) functions"); + { + sxe_jitson_make_uint(&prim_left, 0); + sxe_jitson_make_uint(&prim_right, 1); + is(sxe_jitson_cmp(&prim_left, &prim_right), -1, "0 cmp 1 == -1"); + + sxe_jitson_make_number(&prim_right, 0.0); + is(sxe_jitson_cmp(&prim_left, &prim_right), 0, "0 cmp 0.0 == 0"); + + sxe_jitson_make_number(&prim_right, 0.1); + is(sxe_jitson_cmp(&prim_left, &prim_right), -1, "0 cmp 0.1 == -1"); + + sxe_jitson_make_uint(&prim_left, ~0ULL); + is(sxe_jitson_cmp(&prim_left, &prim_right), SXE_JITSON_CMP_ERROR, "Cannot compare 18446744073709551615 with 0.1"); + + sxe_jitson_make_number(&prim_left, 0.2); + is(sxe_jitson_cmp(&prim_left, &prim_right), 1, "0.2 cmp 0.1 == 1"); + + sxe_jitson_make_uint(&prim_right, 1); + is(sxe_jitson_cmp(&prim_left, &prim_right), -1, "0.2 cmp 1 == -1"); + + sxe_jitson_make_number(&prim_left, 2.0); + is(sxe_jitson_cmp(&prim_left, &prim_right), 1, "2.0 cmp 1 == 1"); + + sxe_jitson_make_number(&prim_left, -1); + sxe_jitson_make_uint( &prim_right, ~0ULL); + is(sxe_jitson_cmp(&prim_left, &prim_right), SXE_JITSON_CMP_ERROR, "Cannot compare -1 with 18446744073709551615"); + + sxe_jitson_make_string_ref(&prim_left, "10"); + sxe_jitson_make_string_ref(&prim_right, "5"); + is(sxe_jitson_cmp(&prim_left, &prim_right), -1, "\"10\" cmp \"5\" == -1"); + + clone = sxe_jitson_new("[1]"); + jitson = sxe_jitson_new("[]"); + is(sxe_jitson_cmp(clone, jitson), 1, "[1] is prefixed by []"); + + sxe_jitson_free(jitson); + jitson = sxe_jitson_new("[0,\"x\"]"); + is(sxe_jitson_cmp(clone, jitson), 1, "[1] cmp [0,\"x\"] == 1"); + + sxe_jitson_free(clone); + clone = sxe_jitson_new("[0]"); + is(sxe_jitson_cmp(clone, jitson), -1, "[0] prefixes [0,\"x\"]"); + + sxe_jitson_free(jitson); + jitson = sxe_jitson_new("[0]"); + is(sxe_jitson_cmp(clone, jitson), 0, "[0] cmp [0] == 0"); + is(sxe_jitson_cmp(clone, &prim_right), SXE_JITSON_CMP_ERROR, "Cannot compare an array to a string"); + is(sxe_jitson_cmp(clone, NULL), SXE_JITSON_CMP_ERROR, "Cannot compare an array if there is nothing to compare to"); + + sxe_jitson_free(jitson); + sxe_jitson_free(clone); + } + + diag("Test orderred arrays"); + { + test_parsing_ordered_array("[]", false); // Trivial arrays are not considered ordered + test_parsing_ordered_array("[null]", false); // Trivial arrays are not considered ordered + test_parsing_ordered_array("[0,1]", true); + test_parsing_ordered_array("[0,0]", true); + test_parsing_ordered_array("[1,0]", false); + test_parsing_ordered_array("[1,\"2\"]", false); + + sxe_jitson_flags |= SXE_JITSON_FLAG_OPTIMIZE; // Check arrays for order by default + + test_constructing_ordered_array("[]", true, NULL); + test_constructing_ordered_array("[\"0\"]", true, "0", NULL); + test_constructing_ordered_array("[\"0\",\"1\"]", true, "0", "1", NULL); + test_constructing_ordered_array("[\"0\",\"0\"]", true, "0", "0", NULL); + test_constructing_ordered_array("[\"1\",\"0\"]", false, "1", "0", NULL); + + SXEA1(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Failed to open tuple"); + SXEA1(sxe_jitson_stack_add_uint(stack, 2), "Failed to add org to tuple"); + SXEA1(sxe_jitson_stack_add_uint(stack, 0), "Failed to add parent to tuple"); + SXEA1(sxe_jitson_stack_add_string(stack, "0:1", SXE_JITSON_TYPE_IS_REF), "Failed to add list id to tuple"); + sxe_jitson_stack_close_collection(stack); + SXEA1((jitson = sxe_jitson_stack_get_jitson(stack)), "Failed to get tuple from stack"); + ok(!(jitson->type & SXE_JITSON_TYPE_IS_ORD), "Mixed tuple is not ordered"); + ok(!(jitson->type & SXE_JITSON_TYPE_IS_HOMO), "Mixed tuple is not homogenous"); + ok(jitson->type & SXE_JITSON_TYPE_IS_UNIF, "Mixed tuple is uniform"); + sxe_jitson_free(jitson); + + SXEA1(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Failed to open tuple"); + SXEA1(sxe_jitson_stack_add_string(stack, "short", SXE_JITSON_TYPE_IS_COPY), "Failed to add short to tuple"); + SXEA1(sxe_jitson_stack_add_string(stack, "long_string", SXE_JITSON_TYPE_IS_COPY), "Failed to add long_string to tuple"); + sxe_jitson_stack_close_collection(stack); + SXEA1((jitson = sxe_jitson_stack_get_jitson(stack)), "Failed to get tuple from stack"); + ok(!(jitson->type & SXE_JITSON_TYPE_IS_ORD), "Non-uniform tuple is not ordered"); + ok(jitson->type & SXE_JITSON_TYPE_IS_HOMO, "Non-uniform tuple is homogenous"); + ok(!(jitson->type & SXE_JITSON_TYPE_IS_UNIF), "Non-uniform tuple is not uniform"); + sxe_jitson_free(jitson); + } + + diag("Test eq method"); + { + sxe_jitson_make_null(&prim_left); + is(sxe_jitson_eq(&prim_left, sxe_jitson_null), SXE_JITSON_TEST_TRUE, "Constructed null eq builtin null"); + + sxe_jitson_make_bool(&prim_left, false); + is(sxe_jitson_eq(&prim_left, sxe_jitson_true), SXE_JITSON_TEST_FALSE, "Constructed false !eq builtin true"); + + sxe_jitson_make_string_ref(&prim_left, "not"); + sxe_jitson_make_string_ref(&prim_right, "equal"); + is(sxe_jitson_eq(&prim_left, &prim_right), SXE_JITSON_TEST_FALSE, "Compared two strings with unknown lengths"); + isnt(sxe_jitson_len(&prim_left), sxe_jitson_len(&prim_right), "Their lengths differ"); + is(sxe_jitson_eq(&prim_left, &prim_right), SXE_JITSON_TEST_FALSE, "Equality of string of diff lengths is optimized"); + + clone = sxe_jitson_new("[null, 1]"); + jitson = sxe_jitson_new("[]"); + is(sxe_jitson_eq(clone, jitson), SXE_JITSON_TEST_FALSE, "Compare two arrays of differing lengths"); + + sxe_jitson_free(jitson); + jitson = sxe_jitson_new("[null, 1.0]"); + is(sxe_jitson_eq(clone, jitson), SXE_JITSON_TEST_TRUE, "Compare arrays with unorderable types and mixed float/unsigned"); + sxe_jitson_free(jitson); + + jitson = sxe_jitson_new("[false, 1.0]"); + is(sxe_jitson_eq(clone, jitson), SXE_JITSON_TEST_FALSE, "Inequality of arrays with incompatible types"); + sxe_jitson_free(jitson); + sxe_jitson_free(clone); + + clone = sxe_jitson_new("{\"x\": 1}"); + jitson = sxe_jitson_new("{}"); + is(sxe_jitson_eq(clone, jitson), SXE_JITSON_TEST_ERROR, "Compare two objects of differing lengths isn't supported"); + sxe_jitson_free(jitson); + sxe_jitson_free(clone); + + clone = sxe_jitson_new("[-1]"); // Negative numbers are stored as doubles + jitson = sxe_jitson_new("[18446744073709551615]"); // ULONG_MAX can't be converted to a double + is(sxe_jitson_eq(clone, jitson), SXE_JITSON_TEST_ERROR, "Compare two arrays containing incompatible types"); + sxe_jitson_free(jitson); + sxe_jitson_free(clone); + } + + diag("Test insertion sorted arrays"); + { + sxe_jitson_flags &= ~SXE_JITSON_FLAG_OPTIMIZE; // Make sure insertion sorting works without enabling optimization + + SXEA1(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT), "Opened an array"); + is(stack->maximum, 4, "Stack has expected maximum size"); + for (unsigned i = 32; i >= 1; i--) { // This forces every insert to do a memmove and guarantees the stack has to grow several times (DPT-2004) + SXEA1(sxe_jitson_stack_add_uint(stack, i), "Failed to add %u to array", i); + } + is(stack->maximum, 64, "Stack has expected maximum size after growth"); + sxe_jitson_stack_close_collection(stack); + SXEA1((jitson = sxe_jitson_stack_get_jitson(stack)), "Failed to get array from stack"); + ok(jitson->type & SXE_JITSON_TYPE_IS_ORD, "Array is ordered"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(jitson, 0)), 1, "First element is 1"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(jitson, 1)), 2, "Second element is 2"); + sxe_jitson_free(jitson); + + SXEA1(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT), "Opened an array"); + SXEA1(sxe_jitson_stack_add_string(stack, "alpha", SXE_JITSON_TYPE_IS_REF), "Failed to add 'alpha' to array"); + SXEA1(sxe_jitson_stack_add_string(stack, "beta", SXE_JITSON_TYPE_IS_REF), "Failed to add 'beta' to array"); + SXEA1(sxe_jitson_stack_add_string(stack, "gamma", SXE_JITSON_TYPE_IS_REF), "Failed to add 'gamma' to array"); + SXEA1(sxe_jitson_stack_add_string(stack, "delta", SXE_JITSON_TYPE_IS_REF), "Failed to add 'delta' to array"); + sxe_jitson_stack_close_collection(stack); + SXEA1((jitson = sxe_jitson_stack_get_jitson(stack)), "Failed to get array from stack"); + ok(jitson->type & SXE_JITSON_TYPE_IS_ORD, "Array is ordered"); + is_eq(sxe_jitson_get_string(sxe_jitson_array_get_element(jitson, 2), NULL), "delta", "Third element is 'delta'"); + is_eq(sxe_jitson_get_string(sxe_jitson_array_get_element(jitson, 3), NULL), "gamma", "Fourth element is 'gamma'"); + is(sxe_jitson_size(jitson), 5, "Array is 5 jitsons long"); + sxe_jitson_free(jitson); + } + + diag("Test stuff with optimization enabled"); + { + sxe_jitson_flags |= SXE_JITSON_FLAG_OPTIMIZE; // Reenable optimization + + ok(sxe_jitson_stack_open_collection( stack, SXE_JITSON_TYPE_OBJECT), "Opened object"); + ok(sxe_jitson_stack_add_member_string(stack, "endpoint.os.type", "win", SXE_JITSON_TYPE_IS_COPY), "Added os.type"); + ok(sxe_jitson_stack_add_member_string(stack, "endpoint.os.version", "10", SXE_JITSON_TYPE_IS_COPY), "Added version"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the constructed object from the stack"); + sxe_jitson_free(jitson); + + /* Make sure sorted array construction works if the sorted array isn't the first thing on the stack + */ + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened outer array"); + ok(sxe_jitson_stack_add_string(stack, "I am a very very very very long string", SXE_JITSON_TYPE_IS_COPY), + "Added a copy of a verrrry long string"); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT), "Opened inner array"); + ok(sxe_jitson_stack_add_uint(stack, 2), "Added 2 to array"); + ok(sxe_jitson_stack_add_uint(stack, 1), "Added 1 to array"); + sxe_jitson_stack_close_collection(stack); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got array from stack"); + ok(!(jitson->type & SXE_JITSON_TYPE_IS_ORD), "Outer array is not ordered"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got inner array"); + + is(element->type & (SXE_JITSON_TYPE_IS_ORD | SXE_JITSON_TYPE_IS_UNIF | SXE_JITSON_TYPE_IS_HOMO), + SXE_JITSON_TYPE_IS_ORD | SXE_JITSON_TYPE_IS_UNIF | SXE_JITSON_TYPE_IS_HOMO, + "Inner array is ordered with uniformly sized, homogenously typed elements"); + is(element->uniform.type, SXE_JITSON_TYPE_NUMBER, "And contains numbers"); + sxe_jitson_free(jitson); + + struct { + const char *name; + unsigned count; + unsigned in[6]; + unsigned out[6]; + } tests[] = { + { "umbrella.source.organization_ids", 1, { 42 }, { 42 } }, + { "umbrella.source.identity_ids", 0, { }, { } }, + { "umbrella.source.identity_type_ids", 6, { 100000, 99999, 10001, 0, 1 << 31, 1 << 30 }, { 0, 10001, 99999, 100000, 1 << 30, 1 << 31 } }, + }; + unsigned i, t; + + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened a collection"); + for (t = 0; t < sizeof(tests) / sizeof(*tests); t++) { + sxe_jitson_stack_add_member_name(stack, tests[t].name, SXE_JITSON_TYPE_IS_REF); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY | SXE_JITSON_TYPE_MK_SORT), "test %u: Opened a self-sorting array", t); + for (i = 0; i < tests[t].count; i++) + ok(sxe_jitson_stack_add_number(stack, tests[t].in[i]), "Added %u to the array", tests[t].in[i]); + sxe_jitson_stack_close_collection(stack); + } + sxe_jitson_stack_close_collection(stack); + + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the jitson"); + is(jitson->len, t, "The jitson contains %u elements", t); + + for (t = 0; t < sizeof(tests) / sizeof(*tests); t++) { + const struct sxe_jitson *entry = sxe_jitson_object_get_member(jitson, tests[t].name, 0); + ok(entry, "Got object '%s'", tests[t].name); + ok(entry->type & SXE_JITSON_TYPE_IS_ORD, "The array is ordered"); + for (i = 0; i < tests[t].count; i++) + is(sxe_jitson_get_number(sxe_jitson_array_get_element(entry, i)), tests[t].out[i], + "The '%s' value %u is correct", tests[t].name, i); + } + + sxe_jitson_free(jitson); + + jitson = sxe_jitson_new("[0,[1,2,3],[4,5,6]]"); + is(jitson->type & (SXE_JITSON_TYPE_IS_ORD | SXE_JITSON_TYPE_IS_UNIF | SXE_JITSON_TYPE_IS_HOMO), 0, + "Array is not ordered, uniformly sized, or homogenously typed"); + sxe_jitson_free(jitson); + } + + diag("Test stuff with optimization enabled"); + { + sxe_jitson_flags |= SXE_JITSON_FLAG_OPTIMIZE; // Reenable optimization + + ok(sxe_jitson_stack_open_collection( stack, SXE_JITSON_TYPE_OBJECT), "Opened object"); + ok(sxe_jitson_stack_add_member_string(stack, "endpoint.os.type", "win", SXE_JITSON_TYPE_IS_COPY), "Added os.type"); + ok(sxe_jitson_stack_add_member_string(stack, "endpoint.os.version", "10", SXE_JITSON_TYPE_IS_COPY), "Added version"); + sxe_jitson_stack_close_collection(stack); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the constructed object from the stack"); + sxe_jitson_free(jitson); + } + + diag("Test stack to string diagnostic function"); + { +#if !SXE_DEBUG + skip(9, "Skipping test of debug function test_stack_to_str"); +#else + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_OBJECT), "Opened an object"); + is_eq(sxe_jitson_stack_to_str(stack), "{}", "Stack dump's right"); + ok(sxe_jitson_stack_add_member_name(stack, "endpoint.os.type", SXE_JITSON_TYPE_IS_COPY), "Added member name 1"); + is_eq(sxe_jitson_stack_to_str(stack), "{}", "Stack dump's right"); + ok(sxe_jitson_stack_add_string(stack, "win", SXE_JITSON_TYPE_IS_COPY), "Added member value"); + is_eq(sxe_jitson_stack_to_str(stack), "{\"endpoint.os.type\":\"win\"", "Stack dump's right"); + ok(sxe_jitson_stack_add_member_name(stack, "endpoint.os.version", SXE_JITSON_TYPE_IS_REF), "Added member name 2"); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Opened inner array"); + is_eq(sxe_jitson_stack_to_str(stack),"{\"endpoint.os.type\":\"win\",\"endpoint.os.version\":[]", "Stack dump's right"); + sxe_jitson_stack_clear(stack); +#endif + } + + diag("Test ability to extend types and construct complex types"); + { + is(NULL, sxe_jitson_type_get_extra(SXE_JITSON_TYPE_NULL), "Initially, types have no extra data"); + sxe_jitson_type_set_extra(SXE_JITSON_TYPE_NULL, "NULL"); + is_eq("NULL", sxe_jitson_type_get_extra(SXE_JITSON_TYPE_NULL), "Type has expected extra data"); + + SXEA1(sxe_jitson_stack_open_array(stack, "pair"), "Failed to open"); + SXEA1(sxe_jitson_stack_expand(stack, 1) != SXE_JITSON_STACK_ERROR, "Failed to grow"); + SXEA1(sxe_jitson_make_uint(&stack->jitsons[stack->count - 1], 2), "Failed to make 2"); + SXEA1(sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_NUMBER, NULL) != SXE_JITSON_STACK_ERROR, "Failed to add 2"); + SXEA1(sxe_jitson_stack_expand(stack, 1) != SXE_JITSON_STACK_ERROR, "Failed to grow"); + SXEA1(sxe_jitson_make_uint(&stack->jitsons[stack->count - 1], 1), "Failed to make 1"); + SXEA1(sxe_jitson_stack_add_value(stack, 1, SXE_JITSON_TYPE_NUMBER, NULL) != SXE_JITSON_STACK_ERROR, "Failed to add 1"); + SXEA1(sxe_jitson_stack_close_array(stack, "pair"), "Failed to close"); + SXEA1(jitson = sxe_jitson_stack_get_jitson(stack), "Failed to get"); + + ok(!(jitson->type & SXE_JITSON_TYPE_IS_ORD), "Pair is not ordered"); + ok(jitson->type & SXE_JITSON_TYPE_IS_HOMO, "Pair is homogenous"); + ok(jitson->type & SXE_JITSON_TYPE_IS_UNIF, "Pair is uniform"); + sxe_jitson_free(jitson); + } + + diag("Test for the bugs that led to DPT-3052"); + { + ok(jitson = sxe_jitson_new("[\"I-fujitsu\\u30b5\\u30a4\\u30c8\",\"x\"]"), "Parsed a string with unicode characters"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is_eq(sxe_jitson_get_string(element, NULL), "I-fujitsu\u30b5\u30a4\u30c8", "It looks right"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the next element"); + is_eq(sxe_jitson_get_type_as_str(element), "string", "It's a string"); + is_eq(sxe_jitson_get_string(element, NULL), "x", "It's the right one"); + sxe_jitson_free(jitson); + + /* Test that if a JSON string includes UTF-8 characters (technically not valid JSON) we continue to behave "as expected" + */ + SXEA1(strlen("Navegaci\u00f3n expl\u00edcita") == 22, "Expected length"); + ok(jitson = sxe_jitson_new("[\"Navegaci\u00f3n expl\u00edcita\",\"x\"]"), "Parsed a string with UTF-8 characters"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is_eq(sxe_jitson_get_string(element, NULL), "Navegaci\u00f3n expl\u00edcita", "It looks right"); + is(sxe_jitson_size(element), 2, "23 byte string fits in 2 jitsons"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the next element"); + is_eq(sxe_jitson_get_type_as_str(element), "string", "It's a string"); + is_eq(sxe_jitson_get_string(element, NULL), "x", "It's the right one"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("[\"\\u624d\\u8302\\u3055\\u3093\\u30c6\\u30b9\\u30c8\\u30a4\\u30f3\\u30bf\\u30fc\\u30cd\\u30c3\\u30c82\",\"x\",\"y\"]"), + "Parsed another string with unicode characters"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is_eq(sxe_jitson_get_string(element, NULL), "\u624d\u8302\u3055\u3093\u30c6\u30b9\u30c8\u30a4\u30f3\u30bf\u30fc\u30cd\u30c3\u30c82", "It looks right"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the next element"); + is_eq(sxe_jitson_get_type_as_str(element), "string", "It's a string"); + is_eq(sxe_jitson_get_string(element, NULL), "x", "It's the right one"); + sxe_jitson_free(jitson); + } + + diag("Test thread-local indexing"); + { + sxe_jitson_flags &= ~SXE_JITSON_FLAG_OPTIMIZE; // Make sure optimization not enabled to test indexing + + ok(sxe_jitson_stack_open_local_object(stack, "obj"), "Opened an object on the stack"); + ok(sxe_jitson_stack_add_member_name(stack, "ids", SXE_JITSON_TYPE_IS_COPY), "Add a member"); + ok(sxe_jitson_stack_open_collection(stack, SXE_JITSON_TYPE_ARRAY), "Member's value is an array"); + ok(sxe_jitson_stack_add_uint(stack, 11), "Added 11 to the array"); + ok(sxe_jitson_stack_add_uint(stack, 12), "Added 12 to the array"); + ok(sxe_jitson_stack_close_collection(stack), "Close array - can't fail"); + ok(sxe_jitson_stack_close_collection(stack), "Close outer object - can't fail"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the object from the stack"); + ok(sxe_jitson_is_local(jitson), "Object is thread-local"); + ok(!(jitson->type & SXE_JITSON_TYPE_INDEXED), "Object is not indexed"); + ok(member = sxe_jitson_object_get_member(jitson, "ids", 0), "Got first member of the object 'ids'"); + ok(jitson->type & SXE_JITSON_TYPE_INDEXED, "Object is now indexed"); + is(sxe_jitson_len(member), 2, "Member is an array of 2 elements"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "{\"ids\":[11,12]}", "Object is right"); + kit_free(json_out); + sxe_jitson_stack_clear(stack); + is(stack->count, 0, "Stack was cleared"); + sxe_jitson_free(jitson); + + ok(sxe_jitson_stack_open_local_array(stack, "ids"), "Opened an array on the stack"); + ok(sxe_jitson_stack_add_uint(stack, 21), "Added 21 to the array"); + ok(sxe_jitson_stack_add_uint(stack, 22), "Added 22 to the array"); + ok(sxe_jitson_stack_close_collection(stack), "Close array - can't fail"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(sxe_jitson_is_local(jitson), "Array is thread-local"); + ok(!(jitson->type & SXE_JITSON_TYPE_INDEXED), "Array is not indexed"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got first element of array 'ids'"); + ok(jitson->type & SXE_JITSON_TYPE_INDEXED, "Array is now indexed"); + is(sxe_jitson_get_uint(element), 21, "It's the unsigned integer 21"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got second element of array 'ids'"); + is(sxe_jitson_get_uint(element), 22, "It's the unsigned integer 22"); + is_eq(json_out = sxe_jitson_to_json(jitson, NULL), "[21,22]", "Array is right"); + kit_free(json_out); + sxe_jitson_stack_clear(stack); + is(stack->count, 0, "Stack was cleared"); + sxe_jitson_free(jitson); + } + + diag("Tests for DPT-3141 (yet another unicode bug)"); + { + ok(jitson = sxe_jitson_new("[\"Lagerh\\u00e4user ALLOW WLAN\",666]"), "Parsed [23 character UTF-8 string,666]"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is(sxe_jitson_len(element), 23, "It's length is correct"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the second element"); + is(sxe_jitson_get_uint(element), 666, "It's the number of the beast"); + sxe_jitson_free(jitson); + + ok(jitson = sxe_jitson_new("[\"Lagerhxxuser ALLOW WLAN\",667]"), "Parsed [23 character string,667]"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is(sxe_jitson_len(element), 23, "It's length is correct"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the second element"); + is(sxe_jitson_get_uint(element), 667, "It's the number of the beast + 1"); + sxe_jitson_free(jitson); + + unsigned i; + char json_buf[64]; + const char head[] = "[\"abcdefghijklmnopqrstuvwxyz"; +# define HEAD_AND_UNICODE_LEN (sizeof(head) - 1 + sizeof("\\u00e4") - 1) + + /* Test the above string with \u00e4 inserted in every possible place + */ + for (i = 2; i < sizeof(head); i++) { + memcpy(json_buf, head, i); + memcpy(json_buf + i, "\\u00e4", sizeof("\\u00e4") - 1); + strcpy(json_buf + i + sizeof("\\u00e4") - 1, head + i); + snprintf(json_buf + HEAD_AND_UNICODE_LEN, sizeof(json_buf) - HEAD_AND_UNICODE_LEN, "\",%u]", i); + + ok(jitson = sxe_jitson_new(json_buf), "Parsed string with inserted 2 byte UTF-8"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is(sxe_jitson_len(element), 28, "It's length is correct"); + ok(element = sxe_jitson_array_get_element(jitson, 1), "Got the second element"); + is(sxe_jitson_get_uint(element), i, "It's the correct number"); + sxe_jitson_free(jitson); + } + } + + diag("Tests for DPT-3261 - sxe_jitson_stack_push_string_reversed"); + { + ok(sxe_jitson_stack_open_array(stack, "pair"), "Opened array"); + ok(sxe_jitson_stack_add_value(stack, 0, SXE_JITSON_TYPE_STRING, NULL) != SXE_JITSON_STACK_ERROR, + "Prepared to add the value"); + ok(sxe_jitson_stack_push_string_reversed(stack, "reverse", 0), "Pushed reversed string"); + SXEA1(sxe_jitson_stack_value_added(stack, sxe_jitson_flags), "Failed to add a value that's already pushed"); + ok(sxe_jitson_stack_add_uint(stack, 777), "Added a number"); + SXEA1(sxe_jitson_stack_close_array(stack, "pair"), "Failed to close the array"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is_eq(sxe_jitson_get_type_as_str(element), "string", "Reversed string is a string"); + ok(sxe_jitson_get_type(element) | SXE_JITSON_TYPE_REVERSED, "Reversed flag is set"); + is_eq(sxe_jitson_get_string(element, &len), "esrever", "Reversed string is 'esrever'"); + is(len, 7, "Length of 'esrever' is 7"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(jitson, 1)), 777, "Got expected guard value"); + sxe_jitson_free(jitson); + + ok(sxe_jitson_stack_open_array(stack, "pair"), "Opened array"); + ok(sxe_jitson_stack_add_value(stack, 0, SXE_JITSON_TYPE_STRING, NULL) != SXE_JITSON_STACK_ERROR, + "Prepared to add the value"); + ok(sxe_jitson_stack_push_string_reversed(stack, "i'm backwards, you wooly", 24), "Pushed the reversed string"); + SXEA1(sxe_jitson_stack_value_added(stack, sxe_jitson_flags), "Failed to add value"); + ok(sxe_jitson_stack_add_uint(stack, 888), "Added a number"); + SXEA1(sxe_jitson_stack_close_array(stack, "pair"), "Failed to close the array"); + ok(jitson = sxe_jitson_stack_get_jitson(stack), "Got the array from the stack"); + ok(element = sxe_jitson_array_get_element(jitson, 0), "Got the first element"); + is_eq(sxe_jitson_get_string(element, &len), "yloow uoy ,sdrawkcab m'i", "Reversed string is correct"); + is(len, 24, "Length is 24"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(jitson, 1)), 888, "Got expected guard value"); + sxe_jitson_free(jitson); + } + + diag("Tests for DPT-3064 - Concatenate arrays"); + { + ok(array = sxe_jitson_new("[1, 2]"), "Created array 1"); + ok(jitson = sxe_jitson_new("[3, 4]"), "Created array 2"); + ok(sxe_jitson_stack_push_concat_array(stack, array, jitson, SXE_JITSON_TYPE_IS_OWN), "Pushed the concatenation"); + ok(clone = sxe_jitson_stack_get_jitson(stack), "Got concatenated array from stack"); + is(sxe_jitson_len(clone), 4, "It's got 4 elements"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(clone, 0)), 1, "Got the correct first element"); + is(sxe_jitson_get_uint(sxe_jitson_array_get_element(clone, 3)), 4, "Got the correct fourth element"); + is(sxe_jitson_size(clone), 2, "Concatenations take 2 jitsons"); + is_eq(json_out = sxe_jitson_to_json(clone, NULL), "[1,2,3,4]", "Array is right"); + is_eq(sxe_jitson_get_type_as_str(*(struct sxe_jitson **)(clone + 1)), "array", "Memory layout looks correct"); + kit_free(json_out); + sxe_jitson_free(clone); + + prim_left.type = prim_right.type = SXE_JITSON_TYPE_ARRAY; + prim_left.len = prim_right.len = ~0; + ok(!sxe_jitson_stack_push_concat_array(stack, &prim_left, &prim_right, SXE_JITSON_TYPE_IS_REF), "Overflow error"); + is(errno, EOVERFLOW, "It's EOVERFLOW"); + } + + sxe_jitson_type_fini(); + sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + + return exit_status(); +} diff --git a/lib-sxe-jitson/test/test-sxe-unicode.c b/lib-sxe-jitson/test/test-sxe-unicode.c new file mode 100644 index 0000000..b6ad4dc --- /dev/null +++ b/lib-sxe-jitson/test/test-sxe-unicode.c @@ -0,0 +1,29 @@ +#include +#include + +#include "sxe-test-memory.h" +#include "sxe-unicode.h" + +int +main(void) +{ + size_t memory; + char utf8[8]; + + plan_tests(10); + memset(utf8, 0, sizeof(utf8)); + memory = test_memory(); + + is(sxe_unicode_to_utf8('A', utf8), 1, "'A' is encoded in one byte"); + is_eq(utf8, "A", "'A' is encoded as 'A'"); + is(sxe_unicode_to_utf8(0xA2, utf8), 2, "Non-ASCII ISO 8851 characters take 2 bytes"); // Cent sign + is_eq(utf8, "\xC2\xA2", "0xA2 is encoded as 'A'"); + is(sxe_unicode_to_utf8(0x20AC, utf8), 3, "'\u20AC' is encoded as 3 bytes"); + is_eq(utf8, "\xE2\x82\xAC", "0x20AC is encoded as 0xE2 0x82 0xAC"); + is(sxe_unicode_to_utf8(0x10348, utf8), 4, "Gothic letter hwair is encoded as 4 bytes"); + is_eq(utf8, "\xF0\x90\x8D\x88", "'\U00010348' is encoded as 0xF0 0x90 0x8D 0x88"); + is(sxe_unicode_to_utf8(0xFFFFFFFF, utf8), 0, "0xFFFFFFFF is an invalid code point"); + + is(test_memory(), memory, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-list/GNUmakefile b/lib-sxe-list/GNUmakefile new file mode 100644 index 0000000..2e58a50 --- /dev/null +++ b/lib-sxe-list/GNUmakefile @@ -0,0 +1,3 @@ +LIBRARIES = sxe-list + +include ../dependencies.mak diff --git a/lib-sxe-list/sxe-list-walker.c b/lib-sxe-list/sxe-list-walker.c new file mode 100644 index 0000000..8400a7a --- /dev/null +++ b/lib-sxe-list/sxe-list-walker.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-list.h" +#include "sxe-log.h" +#include "sxe-util.h" + +/** + * Construct a list walker + * + * @param walker Pointer to the list walker + * @param list Pointer to the list to walk + */ +void +sxe_list_walker_construct(SXE_LIST_WALKER * walker, SXE_LIST * list) +{ + SXEE6("sxe_list_walker_construct(walker=%p,list=%p)", walker, list); + walker->list = list; + walker->back = &list->sentinel; + walker->node = &list->sentinel; + SXER6("return"); +} + +/** + * Go back to the previous node that the walker was pointing to + * + * @param walker Pointer to the list walker + * + * @return Pointer to the current object or NULL if the walker was previously at the list head or has already been moved back. + */ +void * +sxe_list_walker_back(SXE_LIST_WALKER * walker) +{ + void * result = NULL; + + SXEE6("sxe_list_walker_back(walker=%p)", walker); + + if (walker->back != &walker->list->sentinel) { + walker->node = walker->back; + walker->back = &walker->list->sentinel; + result = (char *)walker->node - walker->list->offset; + } + + SXER6("return %p", result); + return result; +} +/** + * Find the current node that the walker is pointing to + * + * @param walker Pointer to the list walker + * + * @return Pointer to the current object or NULL if the walker is at the list head. + */ +void * +sxe_list_walker_find(SXE_LIST_WALKER * walker) +{ + void * result = NULL; + + SXEE6("sxe_list_walker_find(walker=%p)", walker); + + if (walker->node != &walker->list->sentinel) { + result = (char *)walker->node - walker->list->offset; + } + + SXER6("return %p", result); + return result; +} + +/** + * Step to the next object in the list + * + * @param walker Pointer to the list walker + * + * @return Pointer to the next object or NULL if the end of the list has been reached. + */ +void * +sxe_list_walker_step(SXE_LIST_WALKER * walker) +{ + void * result = NULL; + + SXEE6("sxe_list_walker_step(walker=%p)", walker); + walker->back = walker->node; + walker->node = SXE_PTR_FIX(walker->list, SXE_LIST_NODE *, walker->node->next); + + if (walker->node != &walker->list->sentinel) { + result = (char *)walker->node - walker->list->offset; + } + + SXER6("return %p", result); + return result; +} diff --git a/lib-sxe-list/sxe-list.c b/lib-sxe-list/sxe-list.c new file mode 100644 index 0000000..5428dfd --- /dev/null +++ b/lib-sxe-list/sxe-list.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-list.h" +#include "sxe-log.h" +#include "sxe-util.h" + +/* Define some macros to hopefully simplify the position indendependence + */ +#define NULL_REL SXE_PTR_REL(list, void *, NULL) +#define NODE_PTR_FIX(node) SXE_PTR_FIX(list, SXE_LIST_NODE *, node) +#define SENTINEL_PTR_REL(list) SXE_PTR_REL(list, SXE_LIST_NODE *, &list->sentinel) + +/* For readability, define these pseudo members for the list structure. + */ +#define HEAD sentinel.next +#define TAIL sentinel.prev + +/* List link prev pointers point headwards, and next pointers point tailwards. */ + +/** + * Construct a list object. + * + * @note Trace logged at dump level to prevent flooding the logs at trace level on startup + */ +void +sxe_list_construct_impl(SXE_LIST * list, unsigned id, size_t offset) +{ + SXEE7("sxe_list_construct_impl(list=%p, id=%u, offset=%zu)", list, id, offset); + list->HEAD = SENTINEL_PTR_REL(list); + list->TAIL = SENTINEL_PTR_REL(list); + list->sentinel.id = id; + list->length = 0; + list->offset = offset; + SXER7("return"); +} + +/** + * Push an object onto the tail of a list + */ +void +sxe_list_push(SXE_LIST * list, void * object) +{ + SXE_LIST_NODE * node; + + SXEE6("sxe_list_push(list=%p,object=%p)", list, object); + node = (SXE_LIST_NODE *)(void *)((char *)object + list->offset); + node->next = SENTINEL_PTR_REL(list); + node->prev = list->TAIL; + list->TAIL = SXE_PTR_REL(list, SXE_LIST_NODE *, node); + NODE_PTR_FIX(node->prev)->next = SXE_PTR_REL(list, SXE_LIST_NODE *, node); + node->id = list->sentinel.id; + list->length++; + SXER6("return"); +} + +/** + * Unshift an object onto the head of a list + */ +void +sxe_list_unshift(SXE_LIST * list, void * object) +{ + SXE_LIST_NODE * node; + + SXEE6("sxe_list_unshift(list=%p,object=%p)", list, object); + node = (SXE_LIST_NODE *)(void *)((char *)object + list->offset); + node->prev = SENTINEL_PTR_REL(list); + node->next = list->HEAD; + list->HEAD = SXE_PTR_REL(list, SXE_LIST_NODE *, node); + NODE_PTR_FIX(node->next)->prev = SXE_PTR_REL(list, SXE_LIST_NODE *, node); + node->id = list->sentinel.id; + list->length++; + SXER6("return"); +} + +/** + * Remove an object from a list (returning a pointer to it) + */ +void * +sxe_list_remove(SXE_LIST * list, void * object) +{ + SXE_LIST_NODE * node; + + SXEE6("sxe_list_remove(list=%p,object=%p)", list, object); + SXEA1((list->HEAD != SENTINEL_PTR_REL(list)) && (list->TAIL != SENTINEL_PTR_REL(list)), + "Can't remove an object from an empty list; list->HEAD %p, list->TAIL %p, SENTINEL_PTR_REL(list) %p", + list->HEAD, list->TAIL, SENTINEL_PTR_REL(list)); + SXEA1((list->HEAD != SENTINEL_PTR_REL(list)) || (list->TAIL != SENTINEL_PTR_REL(list)), + "List is in an inconsistant state; list->HEAD %p, list->TAIL %p, SENTINEL_PTR_REL(list) %p", + list->HEAD, list->TAIL, SENTINEL_PTR_REL(list)); + node = (SXE_LIST_NODE *)(void *)((char *)object + list->offset); + SXEA6(node->id == list->sentinel.id, "Node %p on list %p has id %u but list has %u", node, list, node->id, list->sentinel.id); + SXEA1((node->next != NULL_REL) && (node->prev != NULL_REL), "Node is not on a list"); + + NODE_PTR_FIX(node->next)->prev = node->prev; + NODE_PTR_FIX(node->prev)->next = node->next; + node->prev = NULL_REL; + node->next = NULL_REL; + list->length--; + SXER6("return object=%p", object); + return object; +} + +/** + * Pop an object off the tail of a list + */ +void * +sxe_list_pop(SXE_LIST * list) +{ + SXE_LIST_NODE * node = NODE_PTR_FIX(list->TAIL); + void * result; + + SXEE6("sxe_list_pop(list=%p)", list); + + if (node == &list->sentinel) { + SXEA1(list->HEAD == SENTINEL_PTR_REL(list), "List has no element at its tail, but has %p at its head", + NODE_PTR_FIX(list->HEAD)); + result = NULL; + goto SXE_ERROR_OUT; + } + + result = sxe_list_remove(list, (char *)node - list->offset); + +SXE_ERROR_OUT: + SXER6("return %p", result); + return result; +} + +/** + * Shift an object off the head of a list + */ +void * +sxe_list_shift(SXE_LIST * list) +{ + SXE_LIST_NODE * node = NODE_PTR_FIX(list->HEAD); + void * result; + + SXEE6("sxe_list_shift(list=%p)", list); + + if (node == &list->sentinel) { + SXEA1(list->TAIL == SENTINEL_PTR_REL(list), "List has no element at its head, but has %p at its tail", + NODE_PTR_FIX(list->TAIL)); + result = NULL; + goto SXE_ERROR_OUT; + } + + result = sxe_list_remove(list, (char *)node - list->offset); + +SXE_ERROR_OUT: + SXER6("return %p", result); + return result; +} + +/** + * Peek at the object at the head of a list + */ +void * +sxe_list_peek_head(SXE_LIST * list) +{ + SXE_LIST_NODE * node; + void * result; + + SXEE6("sxe_list_peek_head(list=%p)", list); + + if (list->HEAD == SENTINEL_PTR_REL(list)) { + SXEA1(list->TAIL == SENTINEL_PTR_REL(list), "List head is the sentinel but tail is not the sentinel"); + result = NULL; + goto SXE_ERROR_OUT; + } + + SXEA1(list->TAIL != SENTINEL_PTR_REL(list), "List head is not the sentinel but tail is the sentinel"); + SXEA1(NODE_PTR_FIX(list->HEAD)->prev == SENTINEL_PTR_REL(list), "List's tail object is not the first object"); + node = NODE_PTR_FIX(list->HEAD); + SXEA6(node->id == list->sentinel.id, "Node %p on list %p has id %u but list has %u", node, list, + node->id, list->sentinel.id); + + /* Catch a bug seen in pool testing. + */ + SXEA6(NODE_PTR_FIX(list->TAIL)->next == SENTINEL_PTR_REL(list), "List's tail object is not the last object"); + result = (char *)node - list->offset; + +SXE_ERROR_OUT: + SXER6("return object=%p%s", result, (result == 0) ? " // NULL" : ""); + return result; +} + +/** + * Peek at the object at the tail of a list + */ +void * +sxe_list_peek_tail(SXE_LIST * list) +{ + SXE_LIST_NODE * node; + void * result; + + SXEE6("sxe_list_peek_tail(list=%p)", list); + + if (list->TAIL == SENTINEL_PTR_REL(list)) { + SXEA1(list->HEAD == SENTINEL_PTR_REL(list), "List tail is the sentinel but head is not the sentinel"); + result = NULL; + goto SXE_ERROR_OUT; + } + + SXEA1(list->HEAD != SENTINEL_PTR_REL(list), "List tail is not the sentinel but head is the sentinel"); + SXEA1(NODE_PTR_FIX(list->TAIL)->next == SENTINEL_PTR_REL(list), "List's head object is not the last object"); + node = NODE_PTR_FIX(list->TAIL); + SXEA6(node->id == list->sentinel.id, "Node %p on list %p has id %u but list has %u", node, list, + node->id, list->sentinel.id); + + /* Catch a bug seen in pool testing. + */ + SXEA6(NODE_PTR_FIX(list->HEAD)->prev == SENTINEL_PTR_REL(list), "List's head object is not the first object"); + result = (char *)node - list->offset; + +SXE_ERROR_OUT: + SXER6("return object=%p%s", result, (result == 0) ? " // NULL" : ""); + return result; +} diff --git a/lib-sxe-list/sxe-list.h b/lib-sxe-list/sxe-list.h new file mode 100644 index 0000000..be2d233 --- /dev/null +++ b/lib-sxe-list/sxe-list.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_LIST_H__ +#define __SXE_LIST_H__ + +#include +#include + +#define SXE_LIST_GET_LENGTH(list) ((list)->length) +#define SXE_LIST_IS_EMPTY(list) ((list)->length == 0) +#define SXE_LIST_NODE_GET_ID(node) ((node)->id) + +#define SXE_LIST_CONSTRUCT(list, id, type, node) sxe_list_construct_impl((list), (id), offsetof(type, node)) + +typedef struct SXE_LIST_NODE { + struct SXE_LIST_NODE * next; + struct SXE_LIST_NODE * prev; + unsigned id; +} SXE_LIST_NODE; + +typedef struct SXE_LIST { + SXE_LIST_NODE sentinel; + unsigned length; + size_t offset; +} SXE_LIST; + +typedef struct SXE_LIST_WALKER { + SXE_LIST * list; + SXE_LIST_NODE * back; + SXE_LIST_NODE * node; +} SXE_LIST_WALKER; + +#include "lib-sxe-list-proto.h" + +#endif /* __SXE_LIST_H__ */ diff --git a/lib-sxe-list/test/test-sxe-list.c b/lib-sxe-list/test/test-sxe-list.c new file mode 100644 index 0000000..a300bb0 --- /dev/null +++ b/lib-sxe-list/test/test-sxe-list.c @@ -0,0 +1,149 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "kit-alloc.h" +#include "sxe-list.h" +#include "sxe-log.h" + +static struct blob { + SXE_LIST list; + struct list_obj { + unsigned id; + SXE_LIST_NODE node; + } list_obj[4]; +} blob; + +int +main(void) +{ + unsigned i; + struct list_obj * obj_ptr; + struct blob * blob_copy; + struct blob * blob_ptr; + SXE_LIST_WALKER walker; + struct list_obj * last_obj_ptr = NULL; /* STFU gcc */ + uint64_t start_allocations; + + for (i = 0; i < 4; i++) { + blob.list_obj[i].id = i; + } + + plan_tests(47); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + SXE_LIST_CONSTRUCT(&blob.list, 1, struct list_obj, node); + is(SXE_LIST_GET_LENGTH(&blob.list), 0, "List has 0 elements"); + is(sxe_list_peek_head( &blob.list), NULL, "Can't peek at head of empty list"); + is(sxe_list_peek_tail( &blob.list), NULL, "Can't peek at tail of empty list"); + + sxe_list_push(&blob.list, &blob.list_obj[0]); + obj_ptr = sxe_list_pop(&blob.list); + ok(obj_ptr != NULL, "Popped an object back from list"); + is(obj_ptr->id, 0, "Popped object 0 back from list"); + obj_ptr = sxe_list_pop(&blob.list); + ok(obj_ptr == NULL, "Pop detected that list is empty"); + + sxe_list_push(&blob.list, &blob.list_obj[0]); + sxe_list_unshift(&blob.list, &blob.list_obj[1]); + sxe_list_push(&blob.list, &blob.list_obj[2]); + sxe_list_unshift(&blob.list, &blob.list_obj[3]); /* List now contains: 3, 1, 0, 2 */ + + SXEA1((blob_copy = kit_malloc(sizeof(blob))) != NULL, "Couldn't allocate memory for the list and all the objects"); + memcpy(blob_copy, &blob, sizeof(blob)); + + for (blob_ptr = &blob; blob_ptr != NULL; blob_ptr = (blob_ptr == &blob ? blob_copy : NULL)) { + is(sxe_list_peek_head(&blob_ptr->list), &blob_ptr->list_obj[3], "Peek at object 3 on head of list"); + is(sxe_list_peek_tail(&blob_ptr->list), &blob_ptr->list_obj[2], "Peek at object 2 on tail of list"); + is(SXE_LIST_GET_LENGTH(&blob_ptr->list), 4, "List has 4 elements"); + sxe_list_walker_construct(&walker, &blob_ptr->list); + + for (i = 0; (obj_ptr = (struct list_obj *)sxe_list_walker_step(&walker)) != NULL; i++) { + is(obj_ptr, (struct list_obj *)sxe_list_walker_find(&walker), "Walker find agrees with next"); + + if (obj_ptr->id == 1) { + i++; + break; + } + } + + ok(obj_ptr != NULL, "Found an element with id == 1"); + is(i, 2, "Two elements visited"); + + sxe_list_walker_construct(&walker, &blob_ptr->list); + + for (i = 0; (obj_ptr = (struct list_obj *)sxe_list_walker_step(&walker)) != NULL; i++) { + last_obj_ptr = obj_ptr; + + if (obj_ptr->id == 4) { + i++; + break; + } + } + + ok(obj_ptr == NULL, "Found no element with id == 4"); + is(i, 4, "All four elements visited"); + obj_ptr = sxe_list_walker_back(&walker); + is(obj_ptr->id, last_obj_ptr->id, "Backed up to element %u is last element %u", + obj_ptr->id, last_obj_ptr->id); + obj_ptr = sxe_list_remove(&blob_ptr->list, &blob_ptr->list_obj[1]); + is(obj_ptr, &blob_ptr->list_obj[1], "Removed object 1"); + obj_ptr = sxe_list_shift(&blob_ptr->list); + ok(obj_ptr != NULL, "Shifted an object from the list"); + is(obj_ptr->id, 3, "Shifted object 3 from the list"); + obj_ptr = sxe_list_pop(&blob_ptr->list); + ok(obj_ptr != NULL, "Popped another object back from list"); + is(obj_ptr->id, 2, "Popped object 2 back from list"); + obj_ptr = sxe_list_shift(&blob_ptr->list); + ok(obj_ptr != NULL, "Shifted the last object from the list"); + is(obj_ptr->id, 0, "Shifted object 0 from the list"); + obj_ptr = sxe_list_shift(&blob_ptr->list); + ok(obj_ptr == NULL, "Shift detected that list is empty"); + + sxe_list_unshift(&blob_ptr->list, &blob_ptr->list_obj[1]); + sxe_list_push(&blob_ptr->list, &blob_ptr->list_obj[3]); + sxe_list_remove(&blob_ptr->list, &blob_ptr->list_obj[3]); + sxe_list_unshift(&blob_ptr->list, &blob_ptr->list_obj[3]); + sxe_list_remove(&blob_ptr->list, &blob_ptr->list_obj[3]); + + obj_ptr = sxe_list_shift(&blob_ptr->list); + ok(obj_ptr != NULL, "Shifted the last object from the list"); + is(obj_ptr->id, 1, "Shifted object 1 from the list"); + + /* Try to reproduce problem found while developing pools. + */ + SXE_LIST_CONSTRUCT(&blob_ptr->list, 2, struct list_obj, node); + sxe_list_push(&blob_ptr->list, &blob_ptr->list_obj[0]); + sxe_list_push(&blob_ptr->list, &blob_ptr->list_obj[1]); + sxe_list_remove(&blob_ptr->list, &blob_ptr->list_obj[0]); + sxe_list_push(&blob_ptr->list, &blob_ptr->list_obj[0]); + + memset(blob_ptr, 0xBE, sizeof(blob)); + } + + kit_free(blob_copy); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-log/GNUmakefile b/lib-sxe-log/GNUmakefile new file mode 100644 index 0000000..a927ac8 --- /dev/null +++ b/lib-sxe-log/GNUmakefile @@ -0,0 +1,7 @@ +LIBRARIES = sxe-log + +include ../dependencies.mak + +ifneq ($(OS),Windows_NT) + LINK_FLAGS += -lpthread +endif diff --git a/lib-kit/kit-time.c b/lib-sxe-log/kit-time.c similarity index 55% rename from lib-kit/kit-time.c rename to lib-sxe-log/kit-time.c index 5cc3f78..f3e2027 100644 --- a/lib-kit/kit-time.c +++ b/lib-sxe-log/kit-time.c @@ -22,11 +22,13 @@ */ #include -#include "kit.h" +#include -/* Cache time for fast access */ -static __thread uint32_t cached_seconds; -static __thread uint64_t cached_nanoseconds; +#include "kit-time.h" + +static time_t start_seconds; // Starting value for seconds, set on first call to kit_time_get +static __thread uint32_t cached_seconds; // Cached seconds since start_seconds for fast access +static __thread uint64_t cached_nanoseconds; // Cached nanoseconds for fast access const char * kit_clocktype(void) @@ -38,72 +40,79 @@ kit_clocktype(void) #endif } -/* Calculate and return current seconds and nanoseconds */ +/* Calculate and return current seconds and nanoseconds + */ static void -kit_time_get(uint32_t *seconds, uint64_t *nanoseconds) +kit_time_get(uint32_t *seconds_out, uint64_t *nanoseconds_out) { + time_t seconds; + #ifdef CLOCK_MONOTONIC struct timespec ts; - if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) - return; /* COVERAGE EXCLUSION: todo: test clock_gettime() failure... can it fail? */ - - if (seconds != NULL) - *seconds = ts.tv_sec; - - if (nanoseconds != NULL) - *nanoseconds = ts.tv_sec * 1000000000ULL + ts.tv_nsec; + clock_gettime(CLOCK_MONOTONIC, &ts); + seconds = ts.tv_sec; + *nanoseconds_out = ts.tv_sec * 1000000000ULL + ts.tv_nsec; #else struct timeval tv; - if (gettimeofday(&tv, NULL) != 0) - return; + gettimeofday(&tv, NULL); + seconds = tv.tv_sec; + *nanoseconds_out = tv.tv_sec * 1000000000ULL + tv.tv_usec * 1000; +#endif - if (seconds != NULL) - *seconds = tv.tv_sec; + if (!start_seconds) + start_seconds = seconds - 1; - if (nanoseconds != NULL) - *nanoseconds = tv.tv_sec * 1000000000ULL + tv.tv_usec * 1000; -#endif + *seconds_out = (uint32_t)(seconds - start_seconds); // This shouldn't roll over until the box has been up for 136 years } +/* Return cached nanoseconds + */ uint64_t kit_time_cached_nsec(void) { return cached_nanoseconds; } +/* Return cached seconds since first called +1 + */ uint32_t kit_time_cached_sec(void) { return cached_seconds; } -/* Update the cached time values */ +/* Update the cached time + */ void kit_time_cached_update(void) { kit_time_get(&cached_seconds, &cached_nanoseconds); } -/* Return current nanoseconds */ +/* Return current nanoseconds + */ uint64_t kit_time_nsec(void) { - uint64_t nanoseconds = 0; - - kit_time_get(NULL, &nanoseconds); + uint32_t seconds; + uint64_t nanoseconds; + kit_time_get(&seconds, &nanoseconds); return nanoseconds; } -/* Return current seconds */ +/* Return current seconds since first called +1 + * + * @note On the first call, the value 1 is returned. Subsequent calls will be >= 1 + */ uint32_t kit_time_sec(void) { - uint32_t seconds = 0; - - kit_time_get(&seconds, NULL); + uint32_t seconds; + uint64_t nanoseconds; + kit_time_get(&seconds, &nanoseconds); return seconds; } diff --git a/lib-sxe-log/kit-time.h b/lib-sxe-log/kit-time.h new file mode 100644 index 0000000..97204db --- /dev/null +++ b/lib-sxe-log/kit-time.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef KIT_TIME_H +#define KIT_TIME_H + +#include + +#include "kit-time-proto.h" + +static inline unsigned long +kit_time_ms(void) +{ + return kit_time_nsec() / 1000000ULL; +} + +#endif diff --git a/lib-sxe-log/kit-timestamp.c b/lib-sxe-log/kit-timestamp.c new file mode 100644 index 0000000..4d37e1c --- /dev/null +++ b/lib-sxe-log/kit-timestamp.c @@ -0,0 +1,56 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "kit-mock.h" +#include "kit-timestamp.h" + +#define TIMESTAMP_SECONDS_SIZE sizeof("YYYYmmDDHHMMSS") + +/* Since these functions will be called from the log package, they should not themselves log */ + +kit_timestamp_t +kit_timestamp_get(void) +{ + struct timeval tv; + + assert(gettimeofday(&tv, NULL) >= 0); + return kit_timestamp_from_timeval(&tv); +} + +char * +kit_timestamp_to_buf(kit_timestamp_t timestamp, char * buffer, unsigned size) +{ + time_t unix_time; + struct tm broken_time; + + assert(size >= KIT_TIMESTAMP_STRING_SIZE); + unix_time = kit_timestamp_to_unix_time(timestamp); + assert(gmtime_r(&unix_time, &broken_time) != NULL); + assert(strftime(buffer, size, "%Y%m%d%H%M%S", &broken_time) != 0); + buffer[TIMESTAMP_SECONDS_SIZE - 1] = '.'; + snprintf(&buffer[TIMESTAMP_SECONDS_SIZE], size - TIMESTAMP_SECONDS_SIZE, "%06lu", + (timestamp & ((1 << KIT_TIMESTAMP_BITS_IN_FRACTION) - 1)) * 1000000 / KIT_TIMESTAMP_1_SEC); + return buffer; +} diff --git a/lib-sxe-log/kit-timestamp.h b/lib-sxe-log/kit-timestamp.h new file mode 100644 index 0000000..091c32b --- /dev/null +++ b/lib-sxe-log/kit-timestamp.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __KIT_TIMESTAMP_H__ +#define __KIT_TIMESTAMP_H__ + +#include +#include + +#define KIT_TIMESTAMP_STRING_SIZE sizeof("YYYYmmDDHHMMSS.uuuuuu") +#define KIT_TIMESTAMP_BITS_IN_FRACTION 20 +#define KIT_TIMESTAMP_1_SEC ((kit_timestamp_t)(1UL << KIT_TIMESTAMP_BITS_IN_FRACTION)) +#define KIT_TIMESTAMP_FROM_UNIX_TIME(unix_time) ((kit_timestamp_t)(unix_time) << KIT_TIMESTAMP_BITS_IN_FRACTION) + +/* kit_timestamp_t is a single continuously increasing 64 bit timestamp that replaces SXE_TIME in the libkit library. + * + * SXE_TIME was 32bits of seconds since 1970 (time_t) followed by 32bits representing the fraction of the second in nanoseconds. + * kit_timestamp is 44 bits of the time_t followed by 20 bits needed to represent the fraction of the second in microseconds. + */ +typedef uint64_t kit_timestamp_t; + +static inline time_t +kit_timestamp_to_unix_time(kit_timestamp_t timestamp) +{ + return (time_t)(timestamp >> KIT_TIMESTAMP_BITS_IN_FRACTION); +} + +static inline kit_timestamp_t +kit_timestamp_from_timeval(const struct timeval *tv) +{ + return ((kit_timestamp_t)tv->tv_sec << KIT_TIMESTAMP_BITS_IN_FRACTION) + + ((kit_timestamp_t)tv->tv_usec * KIT_TIMESTAMP_1_SEC / 1000000); +} + +static inline void +kit_timestamp_to_timeval(kit_timestamp_t timestamp, struct timeval *tv_out) +{ + tv_out->tv_sec = timestamp >> KIT_TIMESTAMP_BITS_IN_FRACTION; + tv_out->tv_usec = (timestamp & ((1 << KIT_TIMESTAMP_BITS_IN_FRACTION) - 1)) * 1000000 / KIT_TIMESTAMP_1_SEC; +} + +#include "kit-timestamp-proto.h" + +#endif diff --git a/lib-sxe-log/sxe-atomic.h b/lib-sxe-log/sxe-atomic.h new file mode 100644 index 0000000..c6ab1c3 --- /dev/null +++ b/lib-sxe-log/sxe-atomic.h @@ -0,0 +1,89 @@ +/* Copyright (c) 2022 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_ATOMIC__ +#define __SXE_ATOMIC__ + +/* If using Windows + */ +#ifdef _WIN32 + +#include +#include + +/* See https://docs.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-interlockedexchangeadd */ + +static inline uint32_t +sxe_atomic_add32(uint32_t volatile *to, uint32_t amount) +{ + return (uint32_t)InterlockedExchangeAdd((LONG volatile *)to, (LONG)amount); +} + +static inline uint64_t +sxe_atomic_add64(uint64_t volatile *to, uint64_t amount) +{ + return (uint64_t)InterlockedExchangeAdd64((LONG64 volatile *)to, (LONG64)amount); +} + +static inline uint32_t +sxe_atomic_sub32(uint32_t volatile *from, uint32_t amount) +{ + return (uint32_t)InterlockedExchangeAdd((LONG volatile *)from, -(LONG)amount); +} + +static inline uint64_t +sxe_atomic_sub64(uint64_t volatile *from, uint64_t amount) +{ + return (uint64_t)InterlockedExchangeAdd64((LONG64 volatile *)from, -(LONG64)amount); +} + +/* Otherwise, assume gcc + */ +#else + +/* See https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html#g_t_005f_005fsync-Builtins */ + +static inline uint32_t +sxe_atomic_add32(uint32_t volatile *to, uint32_t amount) +{ + return __sync_add_and_fetch(to, amount); +} + +static inline uint64_t +sxe_atomic_add64(uint64_t volatile *to, uint64_t amount) +{ + return __sync_add_and_fetch(to, amount); +} + +static inline uint32_t +sxe_atomic_sub32(uint32_t volatile *from, uint32_t amount) +{ + return __sync_sub_and_fetch(from, amount); +} + +static inline uint64_t +sxe_atomic_sub64(uint64_t volatile *from, uint64_t amount) +{ + return __sync_sub_and_fetch(from, amount); +} + +#endif +#endif diff --git a/lib-sxe-log/sxe-log-legacy.h b/lib-sxe-log/sxe-log-legacy.h new file mode 100644 index 0000000..a37a6f2 --- /dev/null +++ b/lib-sxe-log/sxe-log-legacy.h @@ -0,0 +1,768 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_LOG_LEGACY_H__ +#define __SXE_LOG_LEGACY_H__ + +/* Legacy levels 8-9 are mapped to 6-7 */ + +#define SXEL8 SXEL6 +#define SXEE8 SXEE6 +#define SXER8 SXER6 +#define SXEA8 SXEA6 +#define SXEV8 SXEV6 +#define SXED8 SXED6 +#define SXEL8I SXEL6I +#define SXEE8I SXEE6I +#define SXER8I SXER6I +#define SXEA8I SXEA6I +#define SXEV8I SXEV6I +#define SXED8I SXED6I + +#define SXEL9 SXEL7 +#define SXEE9 SXEE7 +#define SXER9 SXER7 +#define SXEA9 SXEA7 +#define SXEV9 SXEV7 +#define SXED9 SXED7 +#define SXEL9I SXEL7I +#define SXEE9I SXEE7I +#define SXER9I SXER7I +#define SXEA9I SXEA7I +#define SXEV9I SXEV7I +#define SXED9I SXED7I + +#define SXEL10 SXEL1 +#define SXEL11 SXEL1 +#define SXEL12 SXEL1 +#define SXEL13 SXEL1 +#define SXEL14 SXEL1 +#define SXEL15 SXEL1 +#define SXEL16 SXEL1 +#define SXEL17 SXEL1 +#define SXEL18 SXEL1 +#define SXEL19 SXEL1 +#define SXEL20 SXEL2 +#define SXEL21 SXEL2 +#define SXEL22 SXEL2 +#define SXEL23 SXEL2 +#define SXEL24 SXEL2 +#define SXEL25 SXEL2 +#define SXEL26 SXEL2 +#define SXEL27 SXEL2 +#define SXEL28 SXEL2 +#define SXEL29 SXEL2 +#define SXEL30 SXEL3 +#define SXEL31 SXEL3 +#define SXEL32 SXEL3 +#define SXEL33 SXEL3 +#define SXEL34 SXEL3 +#define SXEL35 SXEL3 +#define SXEL36 SXEL3 +#define SXEL37 SXEL3 +#define SXEL38 SXEL3 +#define SXEL39 SXEL3 +#define SXEL40 SXEL4 +#define SXEL41 SXEL4 +#define SXEL42 SXEL4 +#define SXEL43 SXEL4 +#define SXEL44 SXEL4 +#define SXEL45 SXEL4 +#define SXEL46 SXEL4 +#define SXEL47 SXEL4 +#define SXEL48 SXEL4 +#define SXEL49 SXEL4 +#define SXEL50 SXEL5 +#define SXEL51 SXEL5 +#define SXEL52 SXEL5 +#define SXEL53 SXEL5 +#define SXEL54 SXEL5 +#define SXEL55 SXEL5 +#define SXEL56 SXEL5 +#define SXEL57 SXEL5 +#define SXEL58 SXEL5 +#define SXEL59 SXEL5 + +#define SXEA10 SXEA1 +#define SXEA11 SXEA1 +#define SXEA12 SXEA1 +#define SXEA13 SXEA1 +#define SXEA14 SXEA1 +#define SXEA15 SXEA1 +#define SXEA16 SXEA1 +#define SXEA17 SXEA1 +#define SXEA18 SXEA1 +#define SXEA19 SXEA1 +#define SXEA20 SXEA2 +#define SXEA21 SXEA2 +#define SXEA22 SXEA2 +#define SXEA23 SXEA2 +#define SXEA24 SXEA2 +#define SXEA25 SXEA2 +#define SXEA26 SXEA2 +#define SXEA27 SXEA2 +#define SXEA28 SXEA2 +#define SXEA29 SXEA2 +#define SXEA30 SXEA3 +#define SXEA31 SXEA3 +#define SXEA32 SXEA3 +#define SXEA33 SXEA3 +#define SXEA34 SXEA3 +#define SXEA35 SXEA3 +#define SXEA36 SXEA3 +#define SXEA37 SXEA3 +#define SXEA38 SXEA3 +#define SXEA39 SXEA3 +#define SXEA40 SXEA4 +#define SXEA41 SXEA4 +#define SXEA42 SXEA4 +#define SXEA43 SXEA4 +#define SXEA44 SXEA4 +#define SXEA45 SXEA4 +#define SXEA46 SXEA4 +#define SXEA47 SXEA4 +#define SXEA48 SXEA4 +#define SXEA49 SXEA4 +#define SXEA50 SXEA5 +#define SXEA51 SXEA5 +#define SXEA52 SXEA5 +#define SXEA53 SXEA5 +#define SXEA54 SXEA5 +#define SXEA55 SXEA5 +#define SXEA56 SXEA5 +#define SXEA57 SXEA5 +#define SXEA58 SXEA5 +#define SXEA59 SXEA5 + +#define SXEV10 SXEV1 +#define SXEV11 SXEV1 +#define SXEV12 SXEV1 +#define SXEV13 SXEV1 +#define SXEV14 SXEV1 +#define SXEV15 SXEV1 +#define SXEV16 SXEV1 +#define SXEV17 SXEV1 +#define SXEV18 SXEV1 +#define SXEV19 SXEV1 +#define SXEV20 SXEV2 +#define SXEV21 SXEV2 +#define SXEV22 SXEV2 +#define SXEV23 SXEV2 +#define SXEV24 SXEV2 +#define SXEV25 SXEV2 +#define SXEV26 SXEV2 +#define SXEV27 SXEV2 +#define SXEV28 SXEV2 +#define SXEV29 SXEV2 +#define SXEV30 SXEV3 +#define SXEV31 SXEV3 +#define SXEV32 SXEV3 +#define SXEV33 SXEV3 +#define SXEV34 SXEV3 +#define SXEV35 SXEV3 +#define SXEV36 SXEV3 +#define SXEV37 SXEV3 +#define SXEV38 SXEV3 +#define SXEV39 SXEV3 +#define SXEV40 SXEV4 +#define SXEV41 SXEV4 +#define SXEV42 SXEV4 +#define SXEV43 SXEV4 +#define SXEV44 SXEV4 +#define SXEV45 SXEV4 +#define SXEV46 SXEV4 +#define SXEV47 SXEV4 +#define SXEV48 SXEV4 +#define SXEV49 SXEV4 +#define SXEV50 SXEV5 +#define SXEV51 SXEV5 +#define SXEV52 SXEV5 +#define SXEV53 SXEV5 +#define SXEV54 SXEV5 +#define SXEV55 SXEV5 +#define SXEV56 SXEV5 +#define SXEV57 SXEV5 +#define SXEV58 SXEV5 +#define SXEV59 SXEV5 + +#define SXED10 SXED1 +#define SXED20 SXED2 +#define SXED30 SXED3 +#define SXED40 SXED4 +#define SXED50 SXED5 + +#define SXEE50 SXEE5 +#define SXEE51 SXEE5 +#define SXEE52 SXEE5 +#define SXEE53 SXEE5 +#define SXEE54 SXEE5 +#define SXEE55 SXEE5 +#define SXEE56 SXEE5 +#define SXEE57 SXEE5 +#define SXEE58 SXEE5 +#define SXEE59 SXEE5 +#define SXER50 SXER5 +#define SXER51 SXER5 +#define SXER52 SXER5 +#define SXER53 SXER5 +#define SXER54 SXER5 +#define SXER55 SXER5 +#define SXER56 SXER5 +#define SXER57 SXER5 +#define SXER58 SXER5 +#define SXER59 SXER5 + +#define SXEL60 SXEL6 +#define SXEL61 SXEL6 +#define SXEL62 SXEL6 +#define SXEL63 SXEL6 +#define SXEL64 SXEL6 +#define SXEL65 SXEL6 +#define SXEL66 SXEL6 +#define SXEL67 SXEL6 +#define SXEL68 SXEL6 +#define SXEL69 SXEL6 +#define SXEE60 SXEE6 +#define SXEE61 SXEE6 +#define SXEE62 SXEE6 +#define SXEE63 SXEE6 +#define SXEE64 SXEE6 +#define SXEE65 SXEE6 +#define SXEE66 SXEE6 +#define SXEE67 SXEE6 +#define SXEE68 SXEE6 +#define SXEE69 SXEE6 +#define SXER60 SXER6 +#define SXER61 SXER6 +#define SXER62 SXER6 +#define SXER63 SXER6 +#define SXER64 SXER6 +#define SXER65 SXER6 +#define SXER66 SXER6 +#define SXER67 SXER6 +#define SXER68 SXER6 +#define SXER69 SXER6 +#define SXEA60 SXEA6 +#define SXEA61 SXEA6 +#define SXEA62 SXEA6 +#define SXEA63 SXEA6 +#define SXEA64 SXEA6 +#define SXEA65 SXEA6 +#define SXEA66 SXEA6 +#define SXEA67 SXEA6 +#define SXEA68 SXEA6 +#define SXEA69 SXEA6 +#define SXEV60 SXEV6 +#define SXEV61 SXEV6 +#define SXEV62 SXEV6 +#define SXEV63 SXEV6 +#define SXEV64 SXEV6 +#define SXEV65 SXEV6 +#define SXEV66 SXEV6 +#define SXEV67 SXEV6 +#define SXEV68 SXEV6 +#define SXEV69 SXEV6 +#define SXED60 SXED6 + +#define SXEL70 SXEL7 +#define SXEL71 SXEL7 +#define SXEL72 SXEL7 +#define SXEL73 SXEL7 +#define SXEL74 SXEL7 +#define SXEL75 SXEL7 +#define SXEL76 SXEL7 +#define SXEL77 SXEL7 +#define SXEL78 SXEL7 +#define SXEL79 SXEL7 +#define SXEE70 SXEE7 +#define SXEE71 SXEE7 +#define SXEE72 SXEE7 +#define SXEE73 SXEE7 +#define SXEE74 SXEE7 +#define SXEE75 SXEE7 +#define SXEE76 SXEE7 +#define SXEE77 SXEE7 +#define SXEE78 SXEE7 +#define SXEE79 SXEE7 +#define SXER70 SXER7 +#define SXER71 SXER7 +#define SXER72 SXER7 +#define SXER73 SXER7 +#define SXER74 SXER7 +#define SXER75 SXER7 +#define SXER76 SXER7 +#define SXER77 SXER7 +#define SXER78 SXER7 +#define SXER79 SXER7 +#define SXEA70 SXEA7 +#define SXEA71 SXEA7 +#define SXEA72 SXEA7 +#define SXEA73 SXEA7 +#define SXEA74 SXEA7 +#define SXEA75 SXEA7 +#define SXEA76 SXEA7 +#define SXEA77 SXEA7 +#define SXEA78 SXEA7 +#define SXEA79 SXEA7 +#define SXEV70 SXEV7 +#define SXEV71 SXEV7 +#define SXEV72 SXEV7 +#define SXEV73 SXEV7 +#define SXEV74 SXEV7 +#define SXEV75 SXEV7 +#define SXEV76 SXEV7 +#define SXEV77 SXEV7 +#define SXEV78 SXEV7 +#define SXEV79 SXEV7 +#define SXED70 SXED7 + +#define SXEL10I SXEL1I +#define SXEL11I SXEL1I +#define SXEL12I SXEL1I +#define SXEL13I SXEL1I +#define SXEL14I SXEL1I +#define SXEL15I SXEL1I +#define SXEL16I SXEL1I +#define SXEL17I SXEL1I +#define SXEL18I SXEL1I +#define SXEL19I SXEL1I +#define SXEL20I SXEL2I +#define SXEL21I SXEL2I +#define SXEL22I SXEL2I +#define SXEL23I SXEL2I +#define SXEL24I SXEL2I +#define SXEL25I SXEL2I +#define SXEL26I SXEL2I +#define SXEL27I SXEL2I +#define SXEL28I SXEL2I +#define SXEL29I SXEL2I +#define SXEL30I SXEL3I +#define SXEL31I SXEL3I +#define SXEL32I SXEL3I +#define SXEL33I SXEL3I +#define SXEL34I SXEL3I +#define SXEL35I SXEL3I +#define SXEL36I SXEL3I +#define SXEL37I SXEL3I +#define SXEL38I SXEL3I +#define SXEL39I SXEL3I +#define SXEL40I SXEL4I +#define SXEL41I SXEL4I +#define SXEL42I SXEL4I +#define SXEL43I SXEL4I +#define SXEL44I SXEL4I +#define SXEL45I SXEL4I +#define SXEL46I SXEL4I +#define SXEL47I SXEL4I +#define SXEL48I SXEL4I +#define SXEL49I SXEL4I +#define SXEL50I SXEL5I +#define SXEL51I SXEL5I +#define SXEL52I SXEL5I +#define SXEL53I SXEL5I +#define SXEL54I SXEL5I +#define SXEL55I SXEL5I +#define SXEL56I SXEL5I +#define SXEL57I SXEL5I +#define SXEL58I SXEL5I +#define SXEL59I SXEL5I + +#define SXEA10I SXEA1I +#define SXEA11I SXEA1I +#define SXEA12I SXEA1I +#define SXEA13I SXEA1I +#define SXEA14I SXEA1I +#define SXEA15I SXEA1I +#define SXEA16I SXEA1I +#define SXEA17I SXEA1I +#define SXEA18I SXEA1I +#define SXEA19I SXEA1I +#define SXEA20I SXEA2I +#define SXEA21I SXEA2I +#define SXEA22I SXEA2I +#define SXEA23I SXEA2I +#define SXEA24I SXEA2I +#define SXEA25I SXEA2I +#define SXEA26I SXEA2I +#define SXEA27I SXEA2I +#define SXEA28I SXEA2I +#define SXEA29I SXEA2I +#define SXEA30I SXEA3I +#define SXEA31I SXEA3I +#define SXEA32I SXEA3I +#define SXEA33I SXEA3I +#define SXEA34I SXEA3I +#define SXEA35I SXEA3I +#define SXEA36I SXEA3I +#define SXEA37I SXEA3I +#define SXEA38I SXEA3I +#define SXEA39I SXEA3I +#define SXEA40I SXEA4I +#define SXEA41I SXEA4I +#define SXEA42I SXEA4I +#define SXEA43I SXEA4I +#define SXEA44I SXEA4I +#define SXEA45I SXEA4I +#define SXEA46I SXEA4I +#define SXEA47I SXEA4I +#define SXEA48I SXEA4I +#define SXEA49I SXEA4I +#define SXEA50I SXEA5I +#define SXEA51I SXEA5I +#define SXEA52I SXEA5I +#define SXEA53I SXEA5I +#define SXEA54I SXEA5I +#define SXEA55I SXEA5I +#define SXEA56I SXEA5I +#define SXEA57I SXEA5I +#define SXEA58I SXEA5I +#define SXEA59I SXEA5I + +#define SXEV10I SXEV1I +#define SXEV11I SXEV1I +#define SXEV12I SXEV1I +#define SXEV13I SXEV1I +#define SXEV14I SXEV1I +#define SXEV15I SXEV1I +#define SXEV16I SXEV1I +#define SXEV17I SXEV1I +#define SXEV18I SXEV1I +#define SXEV19I SXEV1I +#define SXEV20I SXEV2I +#define SXEV21I SXEV2I +#define SXEV22I SXEV2I +#define SXEV23I SXEV2I +#define SXEV24I SXEV2I +#define SXEV25I SXEV2I +#define SXEV26I SXEV2I +#define SXEV27I SXEV2I +#define SXEV28I SXEV2I +#define SXEV29I SXEV2I +#define SXEV30I SXEV3I +#define SXEV31I SXEV3I +#define SXEV32I SXEV3I +#define SXEV33I SXEV3I +#define SXEV34I SXEV3I +#define SXEV35I SXEV3I +#define SXEV36I SXEV3I +#define SXEV37I SXEV3I +#define SXEV38I SXEV3I +#define SXEV39I SXEV3I +#define SXEV40I SXEV4I +#define SXEV41I SXEV4I +#define SXEV42I SXEV4I +#define SXEV43I SXEV4I +#define SXEV44I SXEV4I +#define SXEV45I SXEV4I +#define SXEV46I SXEV4I +#define SXEV47I SXEV4I +#define SXEV48I SXEV4I +#define SXEV49I SXEV4I +#define SXEV50I SXEV5I +#define SXEV51I SXEV5I +#define SXEV52I SXEV5I +#define SXEV53I SXEV5I +#define SXEV54I SXEV5I +#define SXEV55I SXEV5I +#define SXEV56I SXEV5I +#define SXEV57I SXEV5I +#define SXEV58I SXEV5I +#define SXEV59I SXEV5I + +#define SXED10I SXED1I +#define SXED20I SXED2I +#define SXED30I SXED3I +#define SXED40I SXED4I +#define SXED50I SXED5I + +#define SXEL60I SXEL6I +#define SXEL61I SXEL6I +#define SXEL62I SXEL6I +#define SXEL63I SXEL6I +#define SXEL64I SXEL6I +#define SXEL65I SXEL6I +#define SXEL66I SXEL6I +#define SXEL67I SXEL6I +#define SXEL68I SXEL6I +#define SXEL69I SXEL6I +#define SXEE60I SXEE6I +#define SXEE61I SXEE6I +#define SXEE62I SXEE6I +#define SXEE63I SXEE6I +#define SXEE64I SXEE6I +#define SXEE65I SXEE6I +#define SXEE66I SXEE6I +#define SXEE67I SXEE6I +#define SXEE68I SXEE6I +#define SXEE69I SXEE6I +#define SXER60I SXER6I +#define SXER61I SXER6I +#define SXER62I SXER6I +#define SXER63I SXER6I +#define SXER64I SXER6I +#define SXER65I SXER6I +#define SXER66I SXER6I +#define SXER67I SXER6I +#define SXER68I SXER6I +#define SXER69I SXER6I +#define SXEA60I SXEA6I +#define SXEA61I SXEA6I +#define SXEA62I SXEA6I +#define SXEA63I SXEA6I +#define SXEA64I SXEA6I +#define SXEA65I SXEA6I +#define SXEA66I SXEA6I +#define SXEA67I SXEA6I +#define SXEA68I SXEA6I +#define SXEA69I SXEA6I +#define SXEV60I SXEV6I +#define SXEV61I SXEV6I +#define SXEV62I SXEV6I +#define SXEV63I SXEV6I +#define SXEV64I SXEV6I +#define SXEV65I SXEV6I +#define SXEV66I SXEV6I +#define SXEV67I SXEV6I +#define SXEV68I SXEV6I +#define SXEV69I SXEV6I +#define SXED60I SXED6I + +#define SXEL70I SXEL7I +#define SXEL71I SXEL7I +#define SXEL72I SXEL7I +#define SXEL73I SXEL7I +#define SXEL74I SXEL7I +#define SXEL75I SXEL7I +#define SXEL76I SXEL7I +#define SXEL77I SXEL7I +#define SXEL78I SXEL7I +#define SXEL79I SXEL7I +#define SXEE70I SXEE7I +#define SXEE71I SXEE7I +#define SXEE72I SXEE7I +#define SXEE73I SXEE7I +#define SXEE74I SXEE7I +#define SXEE75I SXEE7I +#define SXEE76I SXEE7I +#define SXEE77I SXEE7I +#define SXEE78I SXEE7I +#define SXEE79I SXEE7I +#define SXER70I SXER7I +#define SXER71I SXER7I +#define SXER72I SXER7I +#define SXER73I SXER7I +#define SXER74I SXER7I +#define SXER75I SXER7I +#define SXER76I SXER7I +#define SXER77I SXER7I +#define SXER78I SXER7I +#define SXER79I SXER7I +#define SXEA70I SXEA7I +#define SXEA71I SXEA7I +#define SXEA72I SXEA7I +#define SXEA73I SXEA7I +#define SXEA74I SXEA7I +#define SXEA75I SXEA7I +#define SXEA76I SXEA7I +#define SXEA77I SXEA7I +#define SXEA78I SXEA7I +#define SXEA79I SXEA7I +#define SXEV70I SXEV7I +#define SXEV71I SXEV7I +#define SXEV72I SXEV7I +#define SXEV73I SXEV7I +#define SXEV74I SXEV7I +#define SXEV75I SXEV7I +#define SXEV76I SXEV7I +#define SXEV77I SXEV7I +#define SXEV78I SXEV7I +#define SXEV79I SXEV7I +#define SXED70I SXED7I + +#define SXEL80 SXEL8 +#define SXEL81 SXEL8 +#define SXEL82 SXEL8 +#define SXEL83 SXEL8 +#define SXEL84 SXEL8 +#define SXEL85 SXEL8 +#define SXEL86 SXEL8 +#define SXEL87 SXEL8 +#define SXEL88 SXEL8 +#define SXEL89 SXEL8 +#define SXEE80 SXEE8 +#define SXEE81 SXEE8 +#define SXEE82 SXEE8 +#define SXEE83 SXEE8 +#define SXEE84 SXEE8 +#define SXEE85 SXEE8 +#define SXEE86 SXEE8 +#define SXEE87 SXEE8 +#define SXEE88 SXEE8 +#define SXEE89 SXEE8 +#define SXER80 SXER8 +#define SXER81 SXER8 +#define SXER82 SXER8 +#define SXER83 SXER8 +#define SXER84 SXER8 +#define SXER85 SXER8 +#define SXER86 SXER8 +#define SXER87 SXER8 +#define SXER88 SXER8 +#define SXER89 SXER8 +#define SXEA80 SXEA8 +#define SXEA81 SXEA8 +#define SXEA82 SXEA8 +#define SXEA83 SXEA8 +#define SXEA84 SXEA8 +#define SXEA85 SXEA8 +#define SXEA86 SXEA8 +#define SXEA87 SXEA8 +#define SXEA88 SXEA8 +#define SXEA89 SXEA8 +#define SXEV80 SXEV8 +#define SXEV81 SXEV8 +#define SXEV82 SXEV8 +#define SXEV83 SXEV8 +#define SXEV84 SXEV8 +#define SXEV85 SXEV8 +#define SXEV86 SXEV8 +#define SXEV87 SXEV8 +#define SXEV88 SXEV8 +#define SXEV89 SXEV8 +#define SXED80 SXED8 + +#define SXEL90 SXEL9 +#define SXEL91 SXEL9 +#define SXEL92 SXEL9 +#define SXEL93 SXEL9 +#define SXEL94 SXEL9 +#define SXEL95 SXEL9 +#define SXEL96 SXEL9 +#define SXEL97 SXEL9 +#define SXEL98 SXEL9 +#define SXEL99 SXEL9 +#define SXEE90 SXEE9 +#define SXEE91 SXEE9 +#define SXEE92 SXEE9 +#define SXEE93 SXEE9 +#define SXEE94 SXEE9 +#define SXEE95 SXEE9 +#define SXEE96 SXEE9 +#define SXEE97 SXEE9 +#define SXEE98 SXEE9 +#define SXEE99 SXEE9 +#define SXER90 SXER9 +#define SXER91 SXER9 +#define SXER92 SXER9 +#define SXER93 SXER9 +#define SXER94 SXER9 +#define SXER95 SXER9 +#define SXER96 SXER9 +#define SXER97 SXER9 +#define SXER98 SXER9 +#define SXER99 SXER9 +#define SXED90 SXED9 + +#define SXEL80I SXEL8I +#define SXEL81I SXEL8I +#define SXEL82I SXEL8I +#define SXEL83I SXEL8I +#define SXEL84I SXEL8I +#define SXEL85I SXEL8I +#define SXEL86I SXEL8I +#define SXEL87I SXEL8I +#define SXEL88I SXEL8I +#define SXEL89I SXEL8I +#define SXEE80I SXEE8I +#define SXEE81I SXEE8I +#define SXEE82I SXEE8I +#define SXEE83I SXEE8I +#define SXEE84I SXEE8I +#define SXEE85I SXEE8I +#define SXEE86I SXEE8I +#define SXEE87I SXEE8I +#define SXEE88I SXEE8I +#define SXEE89I SXEE8I +#define SXER80I SXER8I +#define SXER81I SXER8I +#define SXER82I SXER8I +#define SXER83I SXER8I +#define SXER84I SXER8I +#define SXER85I SXER8I +#define SXER86I SXER8I +#define SXER87I SXER8I +#define SXER88I SXER8I +#define SXER89I SXER8I +#define SXEA80I SXEA8I +#define SXEA81I SXEA8I +#define SXEA82I SXEA8I +#define SXEA83I SXEA8I +#define SXEA84I SXEA8I +#define SXEA85I SXEA8I +#define SXEA86I SXEA8I +#define SXEA87I SXEA8I +#define SXEA88I SXEA8I +#define SXEA89I SXEA8I +#define SXEV80I SXEV8I +#define SXEV81I SXEV8I +#define SXEV82I SXEV8I +#define SXEV83I SXEV8I +#define SXEV84I SXEV8I +#define SXEV85I SXEV8I +#define SXEV86I SXEV8I +#define SXEV87I SXEV8I +#define SXEV88I SXEV8I +#define SXEV89I SXEV8I +#define SXED80I SXED8I + +#define SXEL90I SXEL9I +#define SXEL91I SXEL9I +#define SXEL92I SXEL9I +#define SXEL93I SXEL9I +#define SXEL94I SXEL9I +#define SXEL95I SXEL9I +#define SXEL96I SXEL9I +#define SXEL97I SXEL9I +#define SXEL98I SXEL9I +#define SXEL99I SXEL9I +#define SXEE90I SXEE9I +#define SXEE91I SXEE9I +#define SXEE92I SXEE9I +#define SXEE93I SXEE9I +#define SXEE94I SXEE9I +#define SXEE95I SXEE9I +#define SXEE96I SXEE9I +#define SXEE97I SXEE9I +#define SXEE98I SXEE9I +#define SXEE99I SXEE9I +#define SXER90I SXER9I +#define SXER91I SXER9I +#define SXER92I SXER9I +#define SXER93I SXER9I +#define SXER94I SXER9I +#define SXER95I SXER9I +#define SXER96I SXER9I +#define SXER97I SXER9I +#define SXER98I SXER9I +#define SXER99I SXER9I +#define SXED90I SXED9I + +#endif /* __SXE_LOG_LEGACY_H__ */ diff --git a/lib-sxe-log/sxe-log.c b/lib-sxe-log/sxe-log.c new file mode 100644 index 0000000..d21e232 --- /dev/null +++ b/lib-sxe-log/sxe-log.c @@ -0,0 +1,958 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include /* for PATH_MAX */ +#include /* for va_list */ +#include +#include /* for exit() & getenv() */ +#include +#include + +#ifdef _WIN32 +#include +#include /* for GetCurrentThreadId() */ +#include /* for _set_error_mode() */ +#else +#include /* for openlog(), syslog() */ +#include /* for syscall(), SYS_gettid */ +#include /* for gettimeofday() */ +#ifdef __FreeBSD__ +# include +#endif +#include /* for getpid() */ +#include "kit-mock.h" /* allow mocking openlog() and syslog() */ +#endif + +#include "sxe-log.h" + +/* Undefine mocks for timeGetTime() and gettimeofday(), so that we always use the real ones. + * This prevents an infinite loop in other tests, which call SXE[RLEA]... macros inside of their + * mocked timeGetTime and getimeofday functions! + */ +#ifdef _WIN32 +#undef timeGetTime +#else +#undef gettimeofday +#endif + +#define SXE_LOG_BUFFER_SIZE_ASSERT SXE_LOG_BUFFER_SIZE + +#define SXE_LOG_LEVEL_MINIMUM (SXE_LOG_LEVEL_UNDER_MINIMUM + 1) +#define SXE_LOG_LEVEL_MAXIMUM (SXE_LOG_LEVEL_OVER_MAXIMUM - 1) + +#ifndef SXE_LOG_LEVEL_DEFAULT +#define SXE_LOG_LEVEL_DEFAULT SXE_LOG_LEVEL_MAXIMUM +#endif + +#define SXE_LOG_LEVEL_VALIDATE(level, maximum_level) \ + do { \ + if (!((level) >= SXE_LOG_LEVEL_MINIMUM && (level) <= (maximum_level))) { \ + sxe_log_assert(NULL, __FILE__, ~0U, __LINE__, #level " >= SXE_LOG_LEVEL_MINIMUM && " #level " <= " #maximum_level, \ + "%s(): Detected invalid level %d", __func__, (level)); \ + } \ + } while (0) + +/* Global variables used by header in-lines for performance */ + +#ifdef __APPLE__ +pthread_key_t sxe_log_stack_key; +pthread_key_t sxe_log_indent_maximum_key; +unsigned sxe_log_transaction_id = SXE_LOG_NO_ID; // Apple should support __thread. For now, global on Apple. +#else +__thread SXE_LOG_STACK sxe_log_stack; +__thread unsigned sxe_log_indent_maximum = 0; // To allow release mode tests to link, always define +__thread unsigned sxe_log_transaction_id = SXE_LOG_NO_ID; // Value displayed in id column if no id passed. +#endif + +/* Local variables */ + +static volatile SXE_LOG_LEVEL sxe_log_level = SXE_LOG_LEVEL_OVER_MAXIMUM; +static volatile SXE_LOG_CONTROL * sxe_log_control_list = NULL; +static volatile unsigned sxe_log_setting_era = 0; +static unsigned sxe_log_options = 0; + +#define SXE_RETURN_CASE(ret) case SXE_RETURN_ ## ret: return #ret + +void sxe_log_assert_cb_default(void) { /* do nothing in the name of code coverage */ } + +SXE_LOG_ASSERT_CB sxe_log_assert_cb = &sxe_log_assert_cb_default; /* NULL or function to call before abort() in sxe_log_assert() */ + +/** + * Set options for logging + * + * @param options Flags that control the log. + */ +void +sxe_log_set_options(unsigned options) +{ + sxe_log_options = options; +} + +/** + * Set the id field per thread + * + * @param id An integer used as an id that identifies a transaction (connection, etc) or SXE_LOG_NO_ID to clear it + * + * @note The SXE[ELR]#I macros allow the identifier to be overridden with a more specific value. + */ +void +sxe_log_set_thread_id(unsigned id) +{ + sxe_log_transaction_id = id; +} + +const char * +sxe_return_to_string(SXE_RETURN ret) +{ + /* Convert to string. If any enumerand is missing, should get a compiler error (works with gcc). + */ + switch (ret) { + SXE_RETURN_CASE(OK); + SXE_RETURN_CASE(NO_SUCH_PROCESS); + SXE_RETURN_CASE(ERROR_INTERRUPTED); + SXE_RETURN_CASE(ERROR_WRITE_FAILED); + SXE_RETURN_CASE(ERROR_COMMAND_NOT_RUN); + SXE_RETURN_CASE(WARN_ALREADY_CLOSED); + SXE_RETURN_CASE(WARN_WOULD_BLOCK); + SXE_RETURN_CASE(ERROR_ALLOC); + SXE_RETURN_CASE(ERROR_INVALID); + SXE_RETURN_CASE(OUT_OF_RANGE); + SXE_RETURN_CASE(DEADLOCK_WOULD_OCCUR); + SXE_RETURN_CASE(ERROR_LOCK_NOT_TAKEN); + SXE_RETURN_CASE(END_OF_FILE); + SXE_RETURN_CASE(ERROR_BAD_MESSAGE); + SXE_RETURN_CASE(ERROR_INCORRECT_STATE); + SXE_RETURN_CASE(ERROR_ADDRESS_IN_USE); + SXE_RETURN_CASE(NO_UNUSED_ELEMENTS); + SXE_RETURN_CASE(ERROR_ALREADY_CONNECTED); + SXE_RETURN_CASE(ERROR_NO_CONNECTION); + SXE_RETURN_CASE(ERROR_TIMED_OUT); + SXE_RETURN_CASE(WARN_ALREADY_INITIALIZED); + SXE_RETURN_CASE(IN_PROGRESS); + SXE_RETURN_CASE(ERROR_NOT_INITIALIZED); + SXE_RETURN_CASE(UNCATEGORIZED); + SXE_RETURN_CASE(EXPIRED_VALUE); + SXE_RETURN_CASE(ERROR_INTERNAL); + SXE_RETURN_CASE(INVALID_VALUE); /* Required to make compiler happy */ + } + + return NULL; // Could fall back to errno enums as strings if strerror_name_np was available +} + +static bool +sxe_log_safe_append(char * log_buffer, unsigned * index_ptr, int appended) +{ + /* If output was truncated, Win32 returns negative result while Linux returns positive result! + */ + if ((appended < 0) || ((unsigned)appended >= SXE_LOG_BUFFER_SIZE - *index_ptr)) + { + log_buffer[SXE_LOG_BUFFER_SIZE - 4] = '.'; + log_buffer[SXE_LOG_BUFFER_SIZE - 3] = '.'; + log_buffer[SXE_LOG_BUFFER_SIZE - 2] = '\n'; + log_buffer[SXE_LOG_BUFFER_SIZE - 1] = '\0'; + return false; + } + + *index_ptr += appended; + return true; +} + +static void +sxe_log_line_out_default(SXE_LOG_LEVEL level, const char * line) +{ + SXE_UNUSED_PARAMETER(level); + + fputs(line, stderr); + fflush(stderr); +} + +/* Log level indicators, corresponding to the SXE_LOG_LEVEL enum + */ +static const char *level_to_str[] = { "???", "FAT", "ERR", "WAR", "INF", "DEB", "TRA", "DUM" }; + +static unsigned +sxe_log_prefix_default(char * log_buffer, unsigned id, SXE_LOG_LEVEL level) +{ + size_t length; + int ret; +#if defined(WIN32) + SYSTEMTIME st; + DWORD ThreadId; +#else + struct timeval mytv; + struct tm mytm; +# if defined(__APPLE__) + pid_t ThreadId; + pid_t ProcessId; +# elif defined(__FreeBSD__) + long ThreadId; + pid_t ProcessId; +#else + pid_t ThreadId; +#endif +#endif + +#if defined(WIN32) + GetSystemTime(&st); + ThreadId = GetCurrentThreadId(); + + ret = snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "%04d%02d%02d %02d%02d%02d.%03d T%08x ", st.wYear, st.wMonth, st.wDay, + st.wHour, st.wMinute, st.wSecond, st.wMilliseconds, ThreadId); +#else + gettimeofday(&mytv, NULL); + gmtime_r(&mytv.tv_sec, &mytm); +# if defined(__APPLE__) + ThreadId = syscall(SYS_thread_selfid); + ProcessId = getpid(); + ret = snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "%04d%02d%02d %02d%02d%02d.%03ld P% 10d T% 10d ", mytm.tm_year + 1900, + mytm.tm_mon + 1, mytm.tm_mday, mytm.tm_hour, mytm.tm_min, mytm.tm_sec, (long)mytv.tv_usec / 1000, + ProcessId, ThreadId); +# elif defined(__FreeBSD__) + thr_self(&ThreadId); + ProcessId = getpid(); + ret = snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "%04d%02d%02d %02d%02d%02d.%03ld P% 10d T% 10ld ", + mytm.tm_year + 1900, mytm.tm_mon + 1, mytm.tm_mday, mytm.tm_hour, mytm.tm_min, mytm.tm_sec, + (long)mytv.tv_usec / 1000, ProcessId, ThreadId); +#else + ThreadId = gettid(); + ret = snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "%04d%02d%02d %02d%02d%02d.%03ld T% 10d ", mytm.tm_year + 1900, + mytm. tm_mon + 1, mytm.tm_mday, mytm.tm_hour, mytm.tm_min, mytm.tm_sec, mytv.tv_usec / 1000, ThreadId); +#endif /* !__APPLE__ && !__FreeBSD__ */ +#endif + + SXEA6(ret >= 0 && ret < SXE_LOG_BUFFER_SIZE, "Error or timestamp and thread id overflowed the log line buffer"); + + if (id == SXE_LOG_NO_ID || id == (~0U - 1)) { + if (sxe_log_options & SXE_LOG_OPTION_ID_HEX) { + length = strlcpy(&log_buffer[ret], "-------- ", SXE_LOG_BUFFER_SIZE - ret); + SXEA6(length == sizeof("-------- ") - 1, "Id overflowed the log line buffer"); + } + else { + length = strlcpy(&log_buffer[ret], "------ ", SXE_LOG_BUFFER_SIZE - ret); + SXEA6(length == sizeof("------ ") - 1, "Id overflowed the log line buffer"); + } + } + else { + length = ret; + + if (sxe_log_options & SXE_LOG_OPTION_ID_HEX) + ret = snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "%08x ", id); + else + ret = snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "%6u ", id); + + SXEA6(ret >= 0 && length + ret < SXE_LOG_BUFFER_SIZE, "Error or id overflowed the log line buffer"); + } + + length += ret; + + if (sxe_log_options & SXE_LOG_OPTION_LEVEL_TEXT) { + ret = snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "%s ", + level_to_str[level < SXE_LOG_LEVEL_OVER_MAXIMUM ? level : 0]); + } + else { + ret = snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "%c ", '0' + level); + } + + SXEA6(ret >= 0 && length + ret < SXE_LOG_BUFFER_SIZE, "Error or level overflowed the log line buffer"); + return (unsigned)(length + ret); +} + +static SXE_LOG_LINE_OUT_PTR sxe_log_line_out = &sxe_log_line_out_default; +static SXE_LOG_BUFFER_PREFIX_PTR sxe_log_buffer_prefix = &sxe_log_prefix_default; + +/** + * Override the default logging (to stderr) function + * + * @param line_out Pointer to a function that takes two arguments, a log level and a pointer to a log line + * + * @return Pointer to the previous logging function + */ +SXE_LOG_LINE_OUT_PTR +sxe_log_hook_line_out(void (*line_out)(SXE_LOG_LEVEL level, const char * line)) +{ + SXE_LOG_LINE_OUT_PTR previous_line_out = sxe_log_line_out; + + if (line_out == NULL) { + sxe_log_line_out = sxe_log_line_out_default; + } + else { + sxe_log_line_out = line_out; + } + + return previous_line_out; +} + +/** + * Override the default buffer prefix function + * + * @param line_out = Pointer to a function that takes three arguments, a log buffer, an object id, and a log level and returns + * the number of bytes saved in the buffer + * + * @return Pointer to the previous buffer prefix function + */ +SXE_LOG_BUFFER_PREFIX_PTR +sxe_log_hook_buffer_prefix(unsigned (*buffer_prefix)(char * log_buffer, unsigned id, SXE_LOG_LEVEL level)) +{ + SXE_LOG_BUFFER_PREFIX_PTR previous_buffer_prefix = sxe_log_buffer_prefix; + + if (buffer_prefix == NULL) { + sxe_log_buffer_prefix = sxe_log_prefix_default; + } + else { + sxe_log_buffer_prefix = buffer_prefix; + } + + return previous_buffer_prefix; +} + +static void +sxe_log_line_out_escaped(SXE_LOG_LEVEL level, const char * line) +{ + unsigned from; + unsigned to; + char line_escaped[SXE_LOG_BUFFER_SIZE * 4]; /* Worse case, all hex encoded */ + + for (from = 0; isprint(line[from]); from++) + /* Skip printable characters */; + + if (line[from] != '\0') { + memcpy(line_escaped, line, from); + + for (to = from; line[from] != '\0'; from++) { + if (line[from] == '\\' ) { + line_escaped[to++] = '\\'; + line_escaped[to++] = '\\'; + } + else if (line[from] == '\n' ) { + line_escaped[to++] = '\\'; + line_escaped[to++] = 'n' ; + } + else if (line[from] == '\r' ) { + line_escaped[to++] = '\\'; + line_escaped[to++] = 'r' ; + } + else if (isprint((unsigned char)line[from])) { + line_escaped[to++] = line[from]; + } + else { + to += snprintf(&line_escaped[to], 5, "\\x%02X", (unsigned)(unsigned char)line[from]); + } + } + + if (to > SXE_LOG_BUFFER_SIZE) { + to = SXE_LOG_BUFFER_SIZE; + line_escaped[to - 4] = '.'; + line_escaped[to - 3] = '.'; + } + + line_escaped[to - 2] = '\n'; + line_escaped[to - 1] = '\0'; + line = line_escaped; + } + + (*sxe_log_line_out)(level, line); +} + +/* Set all control levels back to OVER_MAXIMUM (i.e. unknown); do this after any change to log levels + */ +void +sxe_log_control_forget_all_levels(void) +{ + volatile SXE_LOG_CONTROL * control = sxe_log_control_list; + + if (control == NULL) { + return; + } + + for (;;) { + if (!(control->next != NULL)) { + sxe_log_assert(NULL, __FILE__, ~0U, __LINE__, "control->next != NULL", /* Coverage Exclusion: Can't happen */ + "sxe_log_control_forget_all_levels(): Log control object in list is not fully initialized"); + } + + SXE_LOG_LEVEL_VALIDATE(control->level, SXE_LOG_LEVEL_OVER_MAXIMUM); + control->level = SXE_LOG_LEVEL_OVER_MAXIMUM; + + if (control == control->next) { + break; + } + + control = control->next; + } +} + +/** + * Set the global log level + * + * @param level = Level to set (e.g. SXE_LOG_LEVEL_INFO) + * + * @return Previous global log level or SXE_LOG_LEVEL_OVER_MAXIMUM if log level has not been set/read from environment + */ + +SXE_LOG_LEVEL +sxe_log_set_level(SXE_LOG_LEVEL level) +{ + SXE_LOG_LEVEL level_previous = sxe_log_level; + + SXE_LOG_LEVEL_VALIDATE(level_previous, SXE_LOG_LEVEL_OVER_MAXIMUM); + SXE_LOG_LEVEL_VALIDATE(level, SXE_LOG_LEVEL_OVER_MAXIMUM); + sxe_log_level = level; + sxe_log_setting_era++; + sxe_log_control_forget_all_levels(); + return level_previous; +} + +/** + * Get the global log level + * + * @return Global log level or SXE_LOG_LEVEL_OVER_MAXIMUM if log level has not been set/read from environment + */ + +SXE_LOG_LEVEL +sxe_log_get_level(void) +{ + return sxe_log_level; +} + +/** + * Decrease the global log level + * + * @param level = Level to decrease to (e.g. SXE_LOG_LEVEL_WARN); If log level is already less than this value, no action is taken + * + * @return Previous global log level or SXE_LOG_LEVEL_OVER_MAXIMUM if log level has not been set/read from environment + * + * @note To restore the log level, set it to the value returned by this function. + */ +SXE_LOG_LEVEL +sxe_log_decrease_level(SXE_LOG_LEVEL level) +{ + SXE_LOG_LEVEL level_previous = sxe_log_level; + + SXE_LOG_LEVEL_VALIDATE(level_previous, SXE_LOG_LEVEL_OVER_MAXIMUM); + SXE_LOG_LEVEL_VALIDATE(level, SXE_LOG_LEVEL_MAXIMUM); + + if (level < level_previous) { + sxe_log_level = level; + sxe_log_control_forget_all_levels(); + } + + return level_previous; +} + +/* TODO: Add a global function that allows programmatic setting of fine grained levels */ + +static SXE_RETURN +sxe_log_level_getenv(SXE_LOG_LEVEL * out_level, const char * variable_name) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + SXE_LOG_LEVEL level; + const char * level_string; + + if ((level_string = getenv(variable_name)) == NULL) { + goto SXE_ERROR_OUT; + } + + level = atoi(level_string); + + if (level < SXE_LOG_LEVEL_MINIMUM || level > SXE_LOG_LEVEL_MAXIMUM) { + /* TODO: Should log an error about the bad level value here */ + goto SXE_ERROR_OUT; /* COVERAGE EXCLUSION: todo: Nothing tests setting bad SXE_LOG_LEVEL values */ + } + + *out_level = level; + result = SXE_RETURN_OK; + +SXE_ERROR_OUT: + return result; +} + +#ifdef _WIN32 +static void +sxe_log_suppress_crash_handlers(void) +{ + _set_error_mode(_OUT_TO_STDERR); /* pre-msvcrt80 mechanism */ + +#ifdef MAKE_MINGW + // mingw doesn't support msvcrt80? +#else + // When the app crashes, don't print the abort message and don't call Dr. Watson to make a crash dump + _set_abort_behavior( 0, _WRITE_ABORT_MSG | _CALL_REPORTFAULT ); +#endif +} + +static bool sxe_log_crash_handlers_supression_initialized = false; +#endif + +/* Determine the log level based on the source file name. Levels can be controlled via environment variables. + */ +static SXE_LOG_LEVEL +sxe_log_control_learn_level(volatile SXE_LOG_CONTROL * control) +{ + SXE_LOG_LEVEL level; + volatile SXE_LOG_CONTROL * control_head; + char variable_name[PATH_MAX]; + unsigned i; + char * end_of_component = NULL; + char * end_of_package = NULL; + + /* If this is the first time this control structure has been accessed, link it into the list + */ + if (control->next == NULL) { + /* Keep trying to make our control structure the first in the list until we don't race with another thread. + */ + do { + control_head = sxe_log_control_list; + } while (__sync_val_compare_and_swap(&sxe_log_control_list, control_head, control) != control_head); + + control->next = control_head == NULL ? control : control_head; + +#ifdef _WIN32 + /* This is a great place to do a one-time disabling of the crash message-box on Windows + */ + if (!sxe_log_crash_handlers_supression_initialized) { + sxe_log_crash_handlers_supression_initialized = true; + + if (getenv("SXE_WINNT_ASSERT_MSGBOX") == NULL) { + sxe_log_suppress_crash_handlers(); + } + } +#endif + } + + if ((level = control->level) <= SXE_LOG_LEVEL_MAXIMUM) { /* Has the control's level already been learned? */ + return level; + } + + strlcpy(variable_name, "SXE_LOG_LEVEL_", sizeof(variable_name)); + + /* Construct an environment variable name from the component/package/file name + * - if SXE_FILE is supplied by the make: e.g. libsxe/lib-sxe/sxe.c + * - if not, __FILE__ is used: e.g. sxe.c + */ + for (i = 0; sizeof("SXE_LOG_LEVEL_") + i < sizeof(variable_name); i++) { + if (control->file[i] == '-' || control->file[i] == '/') { + variable_name[sizeof("SXE_LOG_LEVEL_") - 1 + i] = '_'; + + if (control->file[i] == '/') { + if (end_of_component == NULL) { + end_of_component = &variable_name[sizeof("SXE_LOG_LEVEL_") - 1 + i]; + } + else if (end_of_package == NULL) { + end_of_package = &variable_name[sizeof("SXE_LOG_LEVEL_") - 1 + i]; + } + } + } + else if (control->file[i] == '.' || control->file[i] == '\0') { + variable_name[sizeof("SXE_LOG_LEVEL_") - 1 + i] = '\0'; + break; + } + else { + variable_name[sizeof("SXE_LOG_LEVEL_") - 1 + i] = toupper(control->file[i]); + } + } + + /* TODO: Print a warning if end_of_component or end_of_package is not set */ + + variable_name[sizeof(variable_name) - 1] = '\0'; + + if (control->file[i] != '\0' && sxe_log_level_getenv(&level, variable_name) == SXE_RETURN_OK) { /* Has a file specific level been set? */ + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: TODO: add coverage for this line */ + } + + if (end_of_package != NULL) { + *end_of_package = '\0'; + + if (sxe_log_level_getenv(&level, variable_name) == SXE_RETURN_OK) { /* Has a package specific level been set? */ + goto SXE_EARLY_OUT; + } + } + + if (end_of_component != NULL) { + *end_of_component = '\0'; + + if (sxe_log_level_getenv(&level, variable_name) == SXE_RETURN_OK) { /* Has a component specific level been set? */ + goto SXE_EARLY_OUT; + } + } + + /* TODO: Check more specific level settings */ + + if ((level = sxe_log_level) <= SXE_LOG_LEVEL_MAXIMUM) { /* Has a global level has been set programmatically? */ + goto SXE_EARLY_OUT; + } + + if (sxe_log_level_getenv(&level, "SXE_LOG_LEVEL") == SXE_RETURN_OK) { /* Has a global level has been set in the environment? */ + sxe_log_level = level; + goto SXE_EARLY_OUT; + } + + level = SXE_LOG_LEVEL_DEFAULT; /* Otherwise, use the default level.*/ + +SXE_EARLY_OUT: + control->level = level; + return level; +} + +static unsigned +sxe_log_get_indent(volatile SXE_LOG_CONTROL * control, unsigned id, int line) +{ + SXE_LOG_STACK * stack = sxe_log_get_stack(control->file, id, line); + unsigned depth = 0; + SXE_LOG_FRAME * frame; + + SXE_UNUSED_PARAMETER(id); + SXE_UNUSED_PARAMETER(line); + +#if SXE_DEBUG + if (stack->era > sxe_log_setting_era) { + sxe_log_assert(NULL, control->file, id, line, "stack->era <= sxe_log_setting_era", + "Previous stack era %u exceeds current setting era %u", stack->era, sxe_log_setting_era); + } +#endif + + while (stack->era < sxe_log_setting_era) { + stack->era = sxe_log_setting_era; + + for (frame = stack->top; frame != &stack->bottom; frame = frame->caller) { + if (frame->level <= sxe_log_control_learn_level(control)) { + depth++; + } + } + + for (frame = stack->top; frame != &stack->bottom; frame = frame->caller) { + frame->indent = depth; + + if (frame->level <= sxe_log_control_learn_level(control)) { + depth--; + } + } + } + + return stack->top->indent; +} + +/** + * Output a line to the log. Normally, this is called via one of the SXEL#[I] wrapper macros in sxe-log.h + * + * @param control Per source file control block. These are automatically created by the sxe-log.h file for each source file. + * @param func Function name. These are passed by the macros using the __func__ builtin. + * @param id Transaction identifier or SXE_LOG_NO_ID to use the per thread id. If both are SXE_LOG_NO_ID, uses '------'. + * @param line Line number. These are passed by the macros using the __LINE__ builtin. + * @param level SXE_LOG_LEVEL value. One of ERROR, WARNING, INFORMATION, DEBUG, TRACE, or DUMP. + * @param format Printf-like format string followed by optional arguments + * + * @note In previous versions, file was passed instead of func. It is now required to be set in the source file control object. + */ +__printflike(6, 7) void +sxe_log(volatile SXE_LOG_CONTROL * control, const char * func, unsigned id, int line, SXE_LOG_LEVEL level, const char * format, ...) +{ + char log_buffer[SXE_LOG_BUFFER_SIZE]; + va_list ap; + unsigned i; + + if (level > sxe_log_control_learn_level(control)) { + return; + } + + if (id == SXE_LOG_NO_ID) // If no id specified, use the per thread value + id = sxe_log_transaction_id; + + i = (*sxe_log_buffer_prefix)(log_buffer, id, level); + va_start(ap, format); + + if (sxe_log_safe_append(log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%*s%s", + 2 * sxe_log_get_indent(control, id, line), "", (id == ~0U - 1) ? "" : "- ")) + && ((format[0] != ':' && format[0] != '(') + || sxe_log_safe_append(log_buffer, &i, (int)strlcpy(&log_buffer[i], func, SXE_LOG_BUFFER_SIZE - i))) + && sxe_log_safe_append(log_buffer, &i, vsnprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, format, ap))) + { + sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "\n")); + } + + va_end(ap); + sxe_log_line_out_escaped(level, log_buffer); +} + +__printflike(5, 6) void +sxe_log_entry(SXE_LOG_FRAME * frame, volatile SXE_LOG_CONTROL * control, unsigned id, SXE_LOG_LEVEL level, + const char * format, ...) +{ + char log_buffer[SXE_LOG_BUFFER_SIZE]; + va_list ap; + unsigned i; + unsigned prefix_length; + + if (level > sxe_log_control_learn_level(control)) { + sxe_log_frame_push(frame, false); + return; + } + + if (id == SXE_LOG_NO_ID) // If no id specified, use the per thread value + id = sxe_log_transaction_id; + + sxe_log_frame_push(frame, true); + prefix_length = (*sxe_log_buffer_prefix)(log_buffer, id, level); + i = prefix_length; + va_start(ap, format); + + if (sxe_log_safe_append(log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%*s+ ", + 2 * sxe_log_get_indent(control, id, frame->line) - 2, "")) + && (format[0] != '(' + || sxe_log_safe_append(log_buffer, &i, (int)strlcpy(&log_buffer[i], frame->function, SXE_LOG_BUFFER_SIZE - i))) + && sxe_log_safe_append(log_buffer, &i, vsnprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, format, ap))) + { + sxe_log_safe_append(log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "\n")); + } + + va_end(ap); + sxe_log_line_out_escaped(level, log_buffer); + i = prefix_length; + sxe_log_safe_append(log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%*s { // %s:%d\n", + 2 * sxe_log_get_indent(control, id, frame->line) - 2, "", frame->file, frame->line)); + sxe_log_line_out_escaped(level, log_buffer); +} + +void +sxe_log_return(volatile SXE_LOG_CONTROL * control, const SXE_LOG_FRAME * frame, SXE_LOG_LEVEL level) +{ + char log_buffer[SXE_LOG_BUFFER_SIZE]; + unsigned id; + unsigned i; + + sxe_log_frame_pop(frame); + + if (level > sxe_log_control_learn_level(control)) { + return; + } + + id = frame->id == SXE_LOG_NO_ID ? sxe_log_transaction_id : frame->id; // If no id specified, use the per thread value + i = (*sxe_log_buffer_prefix)(log_buffer, id, level); + sxe_log_safe_append(log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%*s} // %s:%d\n", + 2 * sxe_log_get_indent(control, id, frame->line) + 2, "", frame->file, + frame->line)); + sxe_log_line_out_escaped(level, log_buffer); +} + +/** + * Output a fatal error and abort. This is normally called via the SXEA#[I] macros defined in sxe-log.h + * + * @param control Per source file control object (not used) + * @param file Full file name. __FILE__ is passed by the macros. + * @param id Transaction identifier or SXE_LOG_NO_ID to use the per thread id. If both are SXE_LOG_NO_ID, uses '------'. + * @param line Line number. These are passed by the macros using the __LINE__ builtin. + * @param con Text of the conditional expression that failed + * @param format Printf-like format string followed by optional arguments + */ +__printflike(6, 7) __noreturn void +sxe_log_assert(const volatile SXE_LOG_CONTROL * control, const char * file, unsigned id, int line, const char * con, const char * format, ...) +{ + char log_buffer[SXE_LOG_BUFFER_SIZE_ASSERT]; + va_list ap; + unsigned length = 0; + const char * where = "in"; + SXE_LOG_STACK * stack; + SXE_LOG_FRAME * frame; + + SXE_UNUSED_PARAMETER(control); + + (*sxe_log_assert_cb)(); + + if (id == SXE_LOG_NO_ID) // If no id specified, use the per thread value + id = sxe_log_transaction_id; + + length = (*sxe_log_buffer_prefix)(log_buffer, id, 1); + va_start(ap, format); + + if (sxe_log_safe_append(log_buffer, &length, snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, + "ERROR: assertion '%s' failed at %s:%d; ", con, file, line)) + && sxe_log_safe_append(log_buffer, &length, vsnprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, format, ap))) { + sxe_log_safe_append(log_buffer, &length, snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "\n")); + } + + va_end(ap); + +SXE_EARLY_OR_ERROR_OUT: + + sxe_log_line_out_escaped(SXE_LOG_LEVEL_FATAL, log_buffer); + stack = sxe_log_get_stack(file, id, line); + + /* Stack traceback + */ + for (frame = stack->top; frame && frame != &stack->bottom; frame = frame->caller) { + length = 0; + sxe_log_safe_append(log_buffer, &length, snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, + " %s function %s() at %s:%d\n", where, frame->function, frame->file, frame->line)); + sxe_log_line_out_escaped(SXE_LOG_LEVEL_FATAL, log_buffer); + where = "called from"; + } + +#ifdef _WIN32 + if (getenv("SXE_WINNT_ASSERT_MSGBOX") == NULL) { + length = 0; + sxe_log_safe_append(log_buffer, &length, snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, + "NOTE: set SXE_WINNT_ASSERT_MSGBOX=1 to assert into the Microsoft visual debugger\n")); + sxe_log_line_out_escaped(SXE_LOG_LEVEL_FATAL, log_buffer); + } +#endif + + abort(); +} + +void +sxe_log_dump_memory(volatile SXE_LOG_CONTROL * control, const char * func, unsigned id, int line, SXE_LOG_LEVEL level, + const void * pointer, unsigned length) +{ + char log_buffer[SXE_LOG_BUFFER_SIZE]; + unsigned prefix_length; + const unsigned char * memory; + unsigned c; + unsigned i; + unsigned j; + unsigned k; + + SXE_UNUSED_PARAMETER(func); + + if (level > sxe_log_control_learn_level(control)) { + return; + } + + prefix_length = (*sxe_log_buffer_prefix)(log_buffer, id, level); + memory = (const unsigned char *)pointer; + + /* TODO: pretty up dumping :-) */ + /* - 0x12345678 56 78 12 34 56 78 12 34 56 78 .......... */ + /* - 0x12345678 12 34 56 78 12 34 56 78 12 34 56 78 12 34 56 78 ................ */ + /* - 0x12345678 12 34 56 78 .... */ + + for (k = 0; k <= length / 16; k++) + { + i = prefix_length; + + if (!sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%*s- ", + 2 * sxe_log_get_indent(control, id, line), "")) + || !sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, + sizeof(unsigned long) > 4 ? "%016lx" : "%08lx", + (unsigned long)(memory + (k * 16))))) + { + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: Line truncation in sxe_log_dump_memory function */ + } + + /* print 16 bytes unless we get to the end */ + for(j = 0; j < 16; j++) { + if (((k * 16) + j) < length) + { + if (!sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, " %02x", memory[(k * 16) + j]))) { + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: Line truncation in sxe_log_dump_memory function */ + } + } + else + { + if (!sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, " "))) { + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: Line truncation in sxe_log_dump_memory function */ + } + } + } + + if (!sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, " "))) { + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: Line truncation in sxe_log_dump_memory function */ + } + + for (j = 0; (j < 16) && (((k * 16) + j) < length); j++) { + c = memory[(k * 16) + j]; + c = c < 0x20 ? '.' : c; + c = c > 0x7E ? '.' : c; + + if (!sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "%c", c))) { + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION: Line truncation in sxe_log_dump_memory function */ + } + } + + sxe_log_safe_append( log_buffer, &i, snprintf(&log_buffer[i], SXE_LOG_BUFFER_SIZE - i, "\n")); + +SXE_EARLY_OR_ERROR_OUT: + sxe_log_line_out_escaped(level, log_buffer); + } /* for (k = ... */ +} /* sxe_debug_dump_memory() */ + +#ifndef _WIN32 +static void +sxe_log_line_to_syslog(SXE_LOG_LEVEL level, const char *line) +{ + int syslog_level; + + switch (level) { + case SXE_LOG_LEVEL_FATAL: syslog_level = LOG_ERR; break; + case SXE_LOG_LEVEL_ERROR: syslog_level = LOG_WARNING; break; + case SXE_LOG_LEVEL_WARNING: syslog_level = LOG_NOTICE; break; + case SXE_LOG_LEVEL_INFORMATION: syslog_level = LOG_INFO; break; + default: syslog_level = LOG_DEBUG; break; + } + + syslog(syslog_level, "%s", line); +} + +static unsigned +sxe_log_buffer_prefix_syslog(char * log_buffer, unsigned id, SXE_LOG_LEVEL level) +{ + size_t length; +#if defined(__APPLE__) + pid_t ThreadId = syscall(SYS_thread_selfid); + + snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "T=%d ", ThreadId); +#elif defined(__FreeBSD__) + long ThreadId; + thr_self(&ThreadId); + + snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "T=%ld ", ThreadId); +#else + pid_t ThreadId = gettid(); + + snprintf(log_buffer, SXE_LOG_BUFFER_SIZE, "T=%d ", ThreadId); +#endif + + if (id == ~0U || id == (~0U - 1)) { + strlcat(log_buffer, "------ ", SXE_LOG_BUFFER_SIZE); + } + else { + length = strlen(log_buffer); + snprintf(&log_buffer[length], SXE_LOG_BUFFER_SIZE - length, "%6u ", id); + } + + if ((length = strlen(log_buffer)) < SXE_LOG_BUFFER_SIZE - 2) { + log_buffer[length++] = (char)('0' + level); + log_buffer[length++] = ' '; + log_buffer[length] = '\0'; + } + + return (unsigned)length; +} + +void +sxe_log_use_syslog(const char *ident, int option, int facility) +{ + openlog(ident, option, facility); + sxe_log_hook_buffer_prefix(sxe_log_buffer_prefix_syslog); + sxe_log_hook_line_out(sxe_log_line_to_syslog); +} +#endif + +/* TODO: refactor to form: sxe_debug_map_enum_to_string_ */ diff --git a/lib-sxe-log/sxe-log.h b/lib-sxe-log/sxe-log.h new file mode 100644 index 0000000..b0ca7c2 --- /dev/null +++ b/lib-sxe-log/sxe-log.h @@ -0,0 +1,526 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* TODO: Have exit automatically use the same level as entry, or error on missmatch */ +/* TODO: Auto add "return " on exit calls if not already there? */ + +#ifndef __SXE_LOG_H__ +#define __SXE_LOG_H__ + +#ifdef __linux__ // Under Linux, pull in extra string functions from BSD +# include +#else +# include +#endif + +#include +#include +#include +#include +#include /* For __thread on Windows */ + +#ifdef __APPLE__ +# include /* pthread_getspecific() */ +#endif + +#ifndef SXE_DEBUG +# define SXE_DEBUG 0 +#endif + +#define SXE_LOG_BUFFER_SIZE 1024 // Maximum size of a log line +#define SXE_LOG_NO_ID ~0U // Value indicating no identifier for sxe_log_set_id + +/* + * Compiler-dependent macros to declare that functions take printf-like + * or scanf-like arguments. They are null except for versions of gcc + * that are known to support the features properly (old versions of gcc-2 + * didn't permit keeping the keywords out of the application namespace). + */ +#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __deprecated +# define __noreturn +# define __printflike(fmtarg, firstvararg) +#else +# define __deprecated __attribute__((__deprecated__)) +# define __noreturn __attribute__((__noreturn__)) +# undef __printflike +# define __printflike(fmtarg, firstvararg) __attribute__((__format__ (__printf__, fmtarg, firstvararg))) +#endif + +/* Define the __FALLTHROUGH attribute (as done in IANA's lib-tzcode/private.h) + */ +#if HAVE___HAS_C_ATTRIBUTE +# if __has_c_attribute(fallthrough) +# define __FALLTHROUGH [[fallthrough]] +# endif +#endif + +#ifndef __FALLTHROUGH +# if 7 <= __GNUC__ +# define __FALLTHROUGH __attribute__((fallthrough)) +# else +# define __FALLTHROUGH ((void) 0) +# endif +#endif + +/* Options that can be set with sxe_log_set_options + */ +#define SXE_LOG_OPTION_LEVEL_TEXT 0x00000001 // Three letter log levels instead of numbers +#define SXE_LOG_OPTION_ID_HEX 0x00000002 // Display ids in hex instead of decimal + +/* The following log levels are supported + */ +typedef enum SXE_LOG_LEVEL { + SXE_LOG_LEVEL_UNDER_MINIMUM, /* Guard value: do not use this */ + SXE_LOG_LEVEL_FATAL, /* Release mode assertions */ + SXE_LOG_LEVEL_ERROR, + SXE_LOG_LEVEL_WARNING, + SXE_LOG_LEVEL_INFORMATION, /* e.g. SXLd transactions */ + SXE_LOG_LEVEL_DEBUG, /* e.g. External function entry/return */ +/* -------------------------------- only compiled into debug build below this line -------- */ + SXE_LOG_LEVEL_TRACE, /* e.g. Internal function entry/return and further logging */ + SXE_LOG_LEVEL_DUMP, /* e.g. Packet/structure dumps, periodic idle behaviour */ + SXE_LOG_LEVEL_OVER_MAXIMUM /* Guard value: do not use this */ +} SXE_LOG_LEVEL; + +/* The following macros and types are here because sxe-log.h is the lowest level package */ + +#define SXE_UNUSED_PARAMETER(param) (void)(param) +#define SXE_EARLY_OR_ERROR_OUT goto SXE_EARLY_OUT; SXE_EARLY_OUT: goto SXE_ERROR_OUT; SXE_ERROR_OUT + +#if SXE_DEBUG +# define SXE_USED_IN_DEBUG(variable) +#else +# define SXE_USED_IN_DEBUG(variable) (void)(variable) // Suppress unused warnings only for non-debug builds +#endif + +/* For those who like their bools to be SXE + */ +typedef bool SXE_BOOL; +#define SXE_FALSE false +#define SXE_TRUE true + +typedef enum SXE_RETURN { + SXE_RETURN_OK = 0, + SXE_RETURN_NO_SUCH_PROCESS = ESRCH, // 3 + SXE_RETURN_ERROR_INTERRUPTED = EINTR, // 4 + SXE_RETURN_ERROR_WRITE_FAILED = EIO, // 5 + SXE_RETURN_ERROR_COMMAND_NOT_RUN = ENOEXEC, // 8 + SXE_RETURN_WARN_ALREADY_CLOSED = EBADF, // 9 + SXE_RETURN_WARN_WOULD_BLOCK = EAGAIN, // 11 + SXE_RETURN_ERROR_ALLOC = ENOMEM, // 12 + SXE_RETURN_ERROR_INVALID = EINVAL, // 22 + SXE_RETURN_OUT_OF_RANGE = ERANGE, // 34 + SXE_RETURN_DEADLOCK_WOULD_OCCUR = EDEADLK, // 35 + SXE_RETURN_ERROR_LOCK_NOT_TAKEN = ENOLCK, // 37 + SXE_RETURN_END_OF_FILE = ENODATA, // 61 + SXE_RETURN_ERROR_BAD_MESSAGE = EBADMSG, // 74 + SXE_RETURN_ERROR_INCORRECT_STATE = EBADFD, // 77 + SXE_RETURN_ERROR_ADDRESS_IN_USE = EADDRINUSE, // 98 + SXE_RETURN_NO_UNUSED_ELEMENTS = ENOBUFS, // 105 + SXE_RETURN_ERROR_ALREADY_CONNECTED = EISCONN, // 106 + SXE_RETURN_ERROR_NO_CONNECTION = ENOTCONN, // 107 + SXE_RETURN_ERROR_TIMED_OUT = ETIMEDOUT, // 110 + SXE_RETURN_WARN_ALREADY_INITIALIZED = EALREADY, // 114 + SXE_RETURN_IN_PROGRESS = EINPROGRESS, // 115 + SXE_RETURN_ERROR_NOT_INITIALIZED = EUCLEAN, // 117 + SXE_RETURN_UNCATEGORIZED = EMEDIUMTYPE, // 124 + SXE_RETURN_EXPIRED_VALUE = EKEYEXPIRED, // 127 + SXE_RETURN_ERROR_INTERNAL = ENOTRECOVERABLE, // 131 + SXE_RETURN_INVALID_VALUE = INT_MAX // This must be the last value. +} SXE_RETURN; + +/* Aliases for backward compatibility + */ +#define SXE_RETURN_WARN_CACHE_DOUBLE_INITIALIZED SXE_RETURN_WARN_ALREADY_INITIALIZED +#define SXE_RETURN_ERROR_BAD_MESSAGE_RECEIVED SXE_RETURN_ERROR_BAD_MESSAGE +#define SXE_RETURN_ERROR_CACHE_UNINITIALIZED SXE_RETURN_ERROR_NOT_INITIALIZED +#define SXE_RETURN_ERROR_INVALID_URI SXE_RETURN_ERROR_INVALID + +/* Private type used to allow per file dynamic modification of log levels + */ +typedef struct SXE_LOG_CONTROL { + SXE_LOG_LEVEL level; + volatile struct SXE_LOG_CONTROL * next; + const char * file; +} SXE_LOG_CONTROL; + +/* Private type used to declare space for the log package on the stack of each function that calls SXEE## + */ +typedef struct SXE_LOG_FRAME { + struct SXE_LOG_FRAME * caller; + unsigned indent; + SXE_LOG_LEVEL level; + const char * file; + unsigned line; + const char * function; + unsigned id; /* SXE identifier */ +} SXE_LOG_FRAME; + +/* Per thread stack. + */ +typedef struct SXE_LOG_STACK { + SXE_LOG_FRAME bottom; + SXE_LOG_FRAME * top; + unsigned era; +} SXE_LOG_STACK; + +/* Types of pointers to functions which can be hooked + */ +typedef void (*SXE_LOG_LINE_OUT_PTR)( SXE_LOG_LEVEL level, const char *); /* Log line out function */ +typedef unsigned (*SXE_LOG_BUFFER_PREFIX_PTR)(char * log_buffer, unsigned id, SXE_LOG_LEVEL level); /* Buffer prefix function */ +typedef void (*SXE_LOG_ASSERT_CB)( void); + +/* Globals used by macros/inlines + */ +#ifdef __APPLE__ +extern pthread_key_t sxe_log_stack_key; /* Top of stack; initialized to NULL in sxe-log.c */ +#else +extern __thread SXE_LOG_STACK sxe_log_stack; /* Top of stack; initialized to NULL in sxe-log.c */ +#endif + +extern SXE_LOG_ASSERT_CB sxe_log_assert_cb; /* NULL or function to call before abort() in sxe_log_assert() */ + +#define SXE_LOG_ASSERT_SET_CALLBACK(cb) sxe_log_assert_cb = cb + +#ifndef SXE_FILE +# define SXE_FILE __FILE__ /* Normally, the make system defines this as //.c */ +#endif + +/* Per module (file) log control structure + */ +static volatile SXE_LOG_CONTROL sxe_log_control = {SXE_LOG_LEVEL_OVER_MAXIMUM, NULL, SXE_FILE}; + +/** + * - Logging macros + * - SXE + * - Where: + * - is one of: + * - 'E' procedure (e)ntry + * - 'L' regular (l)og line + * - 'R' procedure (r)eturn + * - 'A' (a)assertion + * - 'V' (v)erify (assertion whose side effects are honored in release mode) + * - is one of: + * - 1 fatal (critical errors, assertions) + * - 2 error (high severity errors) + * - 3 warning (medium serverity errors) + * - 4 info (transactions, low severity errors) + * - 5 debug (release mode debug, used for OEM integration in SXL2) + * - 6 trace (function entry/exit, tell the "story" of what is going on) + * - 7 dump (packet dumps, more detailed and verbose debug information) + * - is one of: + * - 1 to 9 + * - Notes: + * - Use 'make SXE_DEBUG=1' to use instrumentation macros + * - Log type Verify keeps the condition if 'make SXE_DEBUG=0' + */ + +/** + * - Example C source code: + * - void + * - foo1 ( int a, int b ) + * - { + * - int result; + * - SXEE6("foo1(a=%d, b=%d)", a, b); + * - SXEL1("foo1 example critical:1\n"); + * - SXEL2("foo1 example high:2\n"); + * - SXEL3("foo1 example medium:3\n"); + * - SXEL4("foo1 example low:4\n"); + * - SXEL5("foo1 example oem:5\n"); + * - SXEL6("foo1 example debug:6\n"); + * - SXEV6((result = foo2(b,a)),== 10,"result=%d",result); + * - SXER6("return"); + * - } + * - + * - int + * - foo2 ( int c, int d ) + * - { + * - SXEE6("foo2(c=%d, d=%d)", c, d); + * - SXEL1("foo2 example critical:1\n"); + * - SXEL2("foo2 example high:2\n"); + * - SXEL3("foo2 example medium:3\n"); + * - SXEL4("foo2 example low:4\n"); + * - SXEL5("foo2 example oem:5\n"); + * - SXEL6("foo2 example debug:6\n"); + * - SXER6("return"); + * - return (c+d); + * - } + * - Example output with 'make SXE_DEBUG=1': + * - + foo1(a=1, b=2) { // foo.c:32 + * - - foo1 example critical:1 + * - - foo1 example high:2 + * - - foo1 example medium:3 + * - - foo1 example low:4 + * - - foo1 example oem:5 + * - - foo1 example debug:6 + * - + foo2(c=2, d=1) { // foo.c:46 + * - - foo2 example critical:1 + * - - foo2 example high:2 + * - - foo2 example medium:3 + * - - foo2 example low:4 + * - - foo2 example oem:5 + * - - foo2 example debug:6 + * - } // foo.c:53 + * - ERROR: debug assertion '(result = foo2(b,a))==10' failed + * at sxe.c:39; result=3 + * - Example output with 'make SXE_DEBUG=0': + * - - foo1 example critical:1 + * - - foo2 example critical:1 + */ + +#ifndef SXE_IF_LEVEL_GE +#define SXE_IF_LEVEL_GE(line_level) if ((line_level) <= sxe_log_control.level) +#endif + +#ifndef SXE_FILE +# ifdef MAK_FILE +# define SXE_FILE MAK_FILE // Normally, the mak system defines this as //.c +# else +# define SXE_FILE __FILE__ +# endif +#endif + +#define SXE_LOG_FRAME_CREATE(entry_level) SXE_LOG_FRAME frame; frame.level=(entry_level); frame.file=SXE_FILE; \ + frame.line=__LINE__; frame.function=__func__; frame.id=~0U; +#define SXE_LOG_FRAME_SET_ID(this) frame.id=((this) != NULL ? (this)->id : ~0U) +#define SXE_LOG_NO &sxe_log_control, __func__, ~0U, __LINE__ +#define SXE_LOG_NO_ASSERT &sxe_log_control, __FILE__, ~0U, __LINE__ +#define SXE_LOG_FRAME_NO &frame, &sxe_log_control, ~0U + +#define SXEL1(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_FATAL ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_FATAL ,__VA_ARGS__);} } while (0) +#define SXEL2(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_ERROR ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_ERROR ,__VA_ARGS__);} } while (0) +#define SXEL3(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_WARNING ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_WARNING ,__VA_ARGS__);} } while (0) +#define SXEL4(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_INFORMATION) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_INFORMATION,__VA_ARGS__);} } while (0) +#define SXEL5(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DEBUG ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_DEBUG ,__VA_ARGS__);} } while (0) + +#if SXE_DEBUG +#define SXEL1A6(...) SXEA6(__VA_ARGS__) +#define SXEL2A6(...) SXEA6(__VA_ARGS__) +#define SXEL3A6(...) SXEA6(__VA_ARGS__) +#define SXEL4A6(...) SXEA6(__VA_ARGS__) +#define SXEL5A6(...) SXEA6(__VA_ARGS__) +#else +#define SXEL1A6(expr, ...) do { if (!(expr)) SXEL1(__VA_ARGS__); } while (0) +#define SXEL2A6(expr, ...) do { if (!(expr)) SXEL2(__VA_ARGS__); } while (0) +#define SXEL3A6(expr, ...) do { if (!(expr)) SXEL3(__VA_ARGS__); } while (0) +#define SXEL4A6(expr, ...) do { if (!(expr)) SXEL4(__VA_ARGS__); } while (0) +#define SXEL5A6(expr, ...) do { if (!(expr)) SXEL5(__VA_ARGS__); } while (0) +#endif + +#define SXEA1(con,...) do { if(( con)==0){sxe_log_assert(SXE_LOG_NO_ASSERT,#con,__VA_ARGS__ );} } while (0) +#define SXEV1(func,con,...) do { if((func con)==0){sxe_log_assert(SXE_LOG_NO_ASSERT,#func " " #con, __VA_ARGS__);} } while (0) + +#define SXED1(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_FATAL ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_FATAL ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_FATAL, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED2(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_ERROR ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_ERROR ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_ERROR, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED3(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_WARNING ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_WARNING ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_WARNING, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED4(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_INFORMATION) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_INFORMATION,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_INFORMATION, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED5(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DEBUG ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_DEBUG ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_DEBUG, "Attempted to dump memory with len <= 0");}} } while (0) + +#define SXEE5(...) do {SXE_LOG_FRAME_CREATE(SXE_LOG_LEVEL_DEBUG); do {SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) { sxe_log_entry( SXE_LOG_FRAME_NO,SXE_LOG_LEVEL_DEBUG,__VA_ARGS__);} else {sxe_log_frame_push(&frame,false);}} while (0) +#define SXER5(...) SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_DEBUG,__VA_ARGS__); sxe_log_return(&sxe_log_control,&frame,SXE_LOG_LEVEL_DEBUG );} else {sxe_log_frame_pop(&frame);}} while(0) + +#if SXE_DEBUG + +#define SXEL6(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_TRACE,__VA_ARGS__);} } while (0) +#define SXEL7(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__);} } while (0) + +#define SXEE6(...) {SXE_LOG_FRAME_CREATE(SXE_LOG_LEVEL_TRACE); SXE_IF_LEVEL_GE(6) {sxe_log_entry(SXE_LOG_FRAME_NO,SXE_LOG_LEVEL_TRACE,__VA_ARGS__);} else {sxe_log_frame_push(&frame,false);} +#define SXEE7(...) {SXE_LOG_FRAME_CREATE(SXE_LOG_LEVEL_DUMP ); SXE_IF_LEVEL_GE(7) {sxe_log_entry(SXE_LOG_FRAME_NO,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__);} else {sxe_log_frame_push(&frame,false);} + +#define SXER6(...) SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_TRACE,__VA_ARGS__); sxe_log_return(&sxe_log_control,&frame,SXE_LOG_LEVEL_TRACE);} else {sxe_log_frame_pop(&frame);}} +#define SXER7(...) SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log(SXE_LOG_NO,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__); sxe_log_return(&sxe_log_control,&frame,SXE_LOG_LEVEL_DUMP );} else {sxe_log_frame_pop(&frame);}} + +#define SXEA6(con,...) do { if(( con)==0){sxe_log_assert(SXE_LOG_NO_ASSERT,#con,__VA_ARGS__);} } while (0) +#define SXEV6(func,con,...) do { if((func con)==0){sxe_log_assert(SXE_LOG_NO_ASSERT,#func " " #con,__VA_ARGS__);} } while (0) + +#define SXED6(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_TRACE,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_TRACE, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED7(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_NO,SXE_LOG_LEVEL_DUMP ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_DUMP, "Attempted to dump memory with len <= 0");}} } while (0) + +#else /* SXE_DEBUG == 0 */ + +#define SXEL6(...) do { } while (0) +#define SXEL7(...) do { } while (0) + +#define SXEE6(...) do { } while (0) +#define SXEE7(...) do { } while (0) + +#define SXER6(...) do { } while (0) +#define SXER7(...) do { } while (0) + +#define SXEA6(con,...) do { } while (0) +#define SXEV6(func,con,...) do { ((void)(func)); } while (0) + +#define SXED6(ptr,len) do { } while (0) +#define SXED7(ptr,len) do { } while (0) + +#endif + +#define SXE_LOG_ID &sxe_log_control, __func__, ((this) != NULL ? (this)->id : ~0U), __LINE__ +#define SXE_LOG_ID_ASSERT &sxe_log_control, __FILE__, ((this) != NULL ? (this)->id : ~0U), __LINE__ +#define SXE_LOG_FRAME_ID &frame, &sxe_log_control, ((this) != NULL ? (this)->id : ~0U) + +#define SXEL1I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_FATAL ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_FATAL ,__VA_ARGS__);} } while (0) +#define SXEL2I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_ERROR ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_ERROR ,__VA_ARGS__);} } while (0) +#define SXEL3I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_WARNING ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_WARNING ,__VA_ARGS__);} } while (0) +#define SXEL4I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_INFORMATION) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_INFORMATION,__VA_ARGS__);} } while (0) +#define SXEL5I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DEBUG ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_DEBUG ,__VA_ARGS__);} } while (0) + +#define SXEA1I(con,...) do { if(( con)==0){sxe_log_assert(SXE_LOG_ID_ASSERT, #con,__VA_ARGS__);} } while (0) +#define SXEV1I(func,con,...) do { if((func con)==0){sxe_log_assert(SXE_LOG_ID_ASSERT,#func " " #con,__VA_ARGS__);} } while (0) + +#define SXED1I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_FATAL ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_FATAL ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_FATAL, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED2I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_ERROR ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_ERROR ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_ERROR, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED3I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_WARNING ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_WARNING ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_WARNING, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED4I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_INFORMATION) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_INFORMATION,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_INFORMATION, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED5I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DEBUG ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_DEBUG ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_DEBUG, "Attempted to dump memory with len <= 0");}} } while (0) + +#if SXE_DEBUG + +#define SXEL6I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_TRACE,__VA_ARGS__);} } while (0) +#define SXEL7I(...) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__);} } while (0) + +#define SXEE6I(...) {SXE_LOG_FRAME_CREATE(SXE_LOG_LEVEL_TRACE); SXE_LOG_FRAME_SET_ID(this); SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {sxe_log_entry(SXE_LOG_FRAME_ID,SXE_LOG_LEVEL_TRACE,__VA_ARGS__);} else {sxe_log_frame_push(&frame,false);} +#define SXEE7I(...) {SXE_LOG_FRAME_CREATE(SXE_LOG_LEVEL_DUMP ); SXE_LOG_FRAME_SET_ID(this); SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log_entry(SXE_LOG_FRAME_ID,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__);} else {sxe_log_frame_push(&frame,false);} + +#define SXER6I(...) SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_TRACE,__VA_ARGS__); sxe_log_return(&sxe_log_control,&frame,SXE_LOG_LEVEL_TRACE);} else {sxe_log_frame_pop(&frame);}} +#define SXER7I(...) SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {sxe_log(SXE_LOG_ID,SXE_LOG_LEVEL_DUMP ,__VA_ARGS__); sxe_log_return(&sxe_log_control,&frame,SXE_LOG_LEVEL_DUMP );} else {sxe_log_frame_pop(&frame);}} + +#define SXEA6I(con,...) do { if(( con)==0){sxe_log_assert(SXE_LOG_ID_ASSERT, #con,__VA_ARGS__);} } while (0) +#define SXEV6I(func,con,...) do { if((func con)==0){sxe_log_assert(SXE_LOG_ID_ASSERT,#func " " #con,__VA_ARGS__);} } while (0) + +#define SXED6I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_TRACE) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_TRACE,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_TRACE, "Attempted to dump memory with len <= 0");}} } while (0) +#define SXED7I(ptr,len) do { SXE_IF_LEVEL_GE(SXE_LOG_LEVEL_DUMP ) {if (len > 0) {sxe_log_dump_memory(SXE_LOG_ID,SXE_LOG_LEVEL_DUMP ,ptr,len);} else {sxe_log(SXE_LOG_NO, SXE_LOG_LEVEL_DUMP, "Attempted to dump memory with len <= 0");}} } while (0) + +#else /* SXE_DEBUG == 0 */ + +#define SXEL6I(...) do { } while (0) +#define SXEL7I(...) do { } while (0) + +#define SXEE6I(...) do { } while (0) +#define SXEE7I(...) do { } while (0) + +#define SXER6I(...) do { } while (0) +#define SXER7I(...) do { } while (0) + +#define SXEA6I(con,...) do { } while (0) +#define SXEV6I(func,con,...) do { ((void)(func)); } while (0) + +#define SXED6I(ptr,len) do { } while (0) +#define SXED7I(ptr,len) do { } while (0) + +#endif + +#include "sxe-log-proto.h" +#include "sxe-log-legacy.h" +#include "sxe-str-encode-proto.h" + +#ifdef _WIN32 // Platforms that don't provide the BSD string functions need these + size_t strlcpy(char * destination, const char * source, size_t size); + size_t strlcat(char * destination, const char * source, size_t size); + char *strnstr(const char * buf, const char * str, size_t n); +#endif + +/* Deprecated. These macros are provided for backward compatibility + */ +#define sxe_strlcpy(destination, source, size) strlcpy((destination), (source), (size)) +#define sxe_strlcat(destination, source, size) strlcat((destination), (source), (size)) + +/* Pushing and popping function call information is done inline for performance */ + +static inline SXE_LOG_STACK * +sxe_log_get_stack(const char * file, unsigned id, int line) +{ + SXE_LOG_STACK * stack; + +#ifdef __APPLE__ + if (!sxe_log_stack_key) { + pthread_key_create(&sxe_log_stack_key, free); // CONVENTION EXCLUSION: Allow libc free function here + } + + stack = pthread_getspecific(sxe_log_stack_key); + if (stack == NULL) { + if ((stack = calloc(1, sizeof(SXE_LOG_STACK))) == NULL) { // CONVENTION EXCLUSION: Allow libc free function here + sxe_log_assert(NULL, file, id, line, "calloc(1, sizeof(SXE_LOG_STACK) != NULL", "Failed to allocate thread's log stack"); + } + + pthread_setspecific(sxe_log_stack_key, stack); + } + + stack = pthread_getspecific(sxe_log_stack_key); +#else + SXE_UNUSED_PARAMETER(file); + SXE_UNUSED_PARAMETER(id); + SXE_UNUSED_PARAMETER(line); + + stack = &sxe_log_stack; +#endif + + if (stack->top == NULL) { + stack->top = &stack->bottom; + } + + return stack; +} + + +static inline void +sxe_log_frame_push(SXE_LOG_FRAME * frame, bool visible) +{ + SXE_LOG_STACK * stack = sxe_log_get_stack(frame->file, frame->id, frame->line); + +/* CAREFUL!! things such as lib-sxe-log/test/test-log-levels.c define SXE_DEBUG to 1 before including us!! */ +#if !defined(SXE_RELEASE) || !SXE_RELEASE || (defined(SXE_COVERAGE) && SXE_COVERAGE) + /* Only checked for debug & coverage builds, to allow us to use -O2 with -pie for release builds. With both of these flags, + * the optimizer can place lower-level stack frames at higher memory addresses!! + */ + if (frame >= stack->top && stack->top != &stack->bottom) { + sxe_log_assert(NULL, frame->file, frame->id, frame->line, "frame < sxe_stack.top", + "New stack frame %p is not lower than previous %p; maybe SXEE## was called and SXER## was not?", + frame, stack->top); + } +#endif + + frame->indent = stack->top->indent; + + /* If this is a visible entry, increase the indent + */ + if (visible) + frame->indent++; + + frame->caller = stack->top; + stack->top = frame; +} + +static inline void +sxe_log_frame_pop(const SXE_LOG_FRAME * frame) +{ + SXE_LOG_STACK * stack = sxe_log_get_stack(frame->file, frame->id, frame->line); + + if (stack->top != frame) { + sxe_log_assert(NULL, frame->file, frame->id, frame->line, "sxe_log_stack.top != frame", + "%s: The frame is not on the top of the log stack!\n", __func__); + } + + stack->top = stack->top->caller; +} + +#endif /* __SXE_LOG_H__ */ diff --git a/lib-sxe-log/sxe-str-encode.c b/lib-sxe-log/sxe-str-encode.c new file mode 100644 index 0000000..37dc23d --- /dev/null +++ b/lib-sxe-log/sxe-str-encode.c @@ -0,0 +1,67 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include /* For snprintf on Windows */ + +#include "sxe-log.h" + +char * +sxe_strn_encode(char * buffer, unsigned size, const char * string, unsigned length) +{ + unsigned i; + unsigned j; + + SXEE6("sxe_strn_encode(buffer=%p,size=%u,string='%.*s',length=%u)", buffer, size, length, string, length); + + for (i = 0, j = 0; (j < length) && (string[j] != '\0'); j++) { + if (string[j] == ' ') { + buffer[i++] = '_'; + } + else if ((string[j] == '_') || (string[j] == '=') || isspace(string[j]) || !isprint(string[j])) { + if (i + 3 >= size) { + break; + } + + snprintf(&buffer[i], 4, "=%02X", string[j]); + i += 3; + } + else { + buffer[i++] = string[j]; + } + + if (i == size - 1) { + break; + } + } + + assert(i < size); + buffer[i] = '\0'; + + if ((j < length) && (string[j] != '\0')) { + buffer = NULL; + } + + SXER6("return buffer=%s", buffer); + return buffer; +} diff --git a/lib-sxe-log/sxe-strlcpy.c b/lib-sxe-log/sxe-strlcpy.c new file mode 100644 index 0000000..64c91a4 --- /dev/null +++ b/lib-sxe-log/sxe-strlcpy.c @@ -0,0 +1,121 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This module implements strlcpy, strlcat, and strnstr functions equivalent to those from FreeBSD libc and linux libbsd + * - See the rationale for these functions here: http://www.gratisoft.us/todd/papers/strlcpy.html + * - These functions are implemented here due their use in the log package + */ + +#ifdef _WIN32 + +#include +#include "sxe-log.h" + +/** + * Length limited string copy that sucks less than strncpy + * + * @param destination Pointer to destination memory + * @param source Pointer to NUL terminated source string to copy + * @param size Size of destination memory + * + * @return The length of the source string; if this is greater than size, the string has been truncated to length (size - 1) + */ +size_t +strlcpy(char * destination, const char * source, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + destination[i] = source[i]; + + if (destination[i] == '\0') { + goto SXE_EARLY_OUT; + } + } + + if (size) + destination[size - 1] = '\0'; + + while (source[i]) + i++; + +SXE_EARLY_OUT: + return i; +} + +/** + * Length limited string concatenation that sucks less than strncat + * + * @param destination Pointer to destination memory + * @param source Pointer to NUL terminated source string to concatenate + * @param size Size of destination memory + * + * @return The length of the resulting string, if this is less than size; otherwise, the length the concatenated string would + * have been if it wasn't truncated + */ +size_t +strlcat(char * destination, const char * source, size_t size) +{ + size_t length; + size_t i; + + for (length = 0; length < size && destination[length] != '\0'; length++) + /* Skip over the existing string */; + + for (i = 0; i < size - length; i++) { + destination[i + length] = source[i]; + + if (source[i] == '\0') { + goto SXE_EARLY_OUT; + } + } + + if (length < size) + destination[size - 1] = '\0'; + + while (source[i]) + i++; + +SXE_EARLY_OUT: + return length + i; +} + +char * +strnstr(const char * buf, const char * str, size_t n) +{ + size_t length = strlen(str); // SonarQube False Positive + const char *end; + + if (n < length) + return NULL; + + for (end = &buf[n - length]; buf <= end; buf++) { + if (*buf == '\0') + break; + + if (strncmp(buf, str, length) == 0) + return SXE_CAST_NOCONST(char *, buf); + } + + return NULL; +} + +#endif diff --git a/lib-sxe-log/sxe-test-leak.h b/lib-sxe-log/sxe-test-leak.h new file mode 100644 index 0000000..ee4a3b3 --- /dev/null +++ b/lib-sxe-log/sxe-test-leak.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Cisco Systems + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_TEST_LEAK__ +#define __SXE_TEST_LEAK__ 1 + +static struct { + unsigned num; + void **mem; +} test_bucket; + +static inline void * +test_leak(void *v) +{ + test_bucket.mem = realloc(test_bucket.mem, sizeof(*test_bucket.mem) * ++test_bucket.num); // CONVENTION EXCLUSION: OK to use in test code + return test_bucket.mem[test_bucket.num - 1] = v; +} + +static inline void +test_plug(void) +{ + unsigned i; + + for (i = 0; i < test_bucket.num; i++) + free(test_bucket.mem[i]); // CONVENTION EXCLUSION: OK to use in test code + free(test_bucket.mem); // CONVENTION EXCLUSION: OK to use in test code + test_bucket.mem = NULL; + test_bucket.num = 0; +} + +#endif diff --git a/lib-sxe-log/test/sxe-log-levels.c b/lib-sxe-log/test/sxe-log-levels.c new file mode 100644 index 0000000..e0052b8 --- /dev/null +++ b/lib-sxe-log/test/sxe-log-levels.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* These must be separate functions from main and eachother so they have their own "frame" objects; see the SXEE## macros. + * In addition, they are separated into this helper module to give them a separate sxe_log_control object. + */ + +#if SXE_DEBUG != 0 +#define LOCAL_SXE_DEBUG 1 +#endif + +#undef SXE_DEBUG /* Since we are testing diagnostic functions, this test program forces debug mode */ +#define SXE_DEBUG 1 + +#include "sxe-log.h" +#include "tap.h" + +void test_level_six(SXE_LOG_LEVEL level); + +static void +test_level_seven(SXE_LOG_LEVEL level) +{ + SXEE7("(level=%u)", level); /* Function name should be automatically inserted */ + + if (level == SXE_LOG_LEVEL_DUMP) { + SXEL2("No indent"); + sxe_log_set_level(SXE_LOG_LEVEL_DEBUG); + SXEL2("Indented by 2"); + } + + sxe_log_set_level(level); + SXEL2("Set log level to %u", level); + SXER7("return // seven"); +} + +void +test_level_six(SXE_LOG_LEVEL level) +{ + SXEE6("(level=%u)", level); /* Function name should be automatically inserted */ + test_level_seven(level); + SXER6("return // six"); +} diff --git a/lib-kit/test/test-kit-time.c b/lib-sxe-log/test/test-kit-time.c similarity index 93% rename from lib-kit/test/test-kit-time.c rename to lib-sxe-log/test/test-kit-time.c index d7084df..869d75f 100644 --- a/lib-kit/test/test-kit-time.c +++ b/lib-sxe-log/test/test-kit-time.c @@ -21,19 +21,16 @@ * SPDX-License-Identifier: MIT */ -#include +#include #include -#include "kit.h" +#include "kit-time.h" int -main(int argc, char **argv) +main(void) { - int64_t nsec1, nsec2; - int32_t sec1, sec2; - - SXE_UNUSED_PARAMETER(argc); - SXE_UNUSED_PARAMETER(argv); + int64_t nsec1, nsec2; + uint32_t sec1, sec2; plan_tests(10); diff --git a/lib-sxe-log/test/test-kit-timestamp.c b/lib-sxe-log/test/test-kit-timestamp.c new file mode 100644 index 0000000..926251b --- /dev/null +++ b/lib-sxe-log/test/test-kit-timestamp.c @@ -0,0 +1,103 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "kit-mock.h" +#include "kit-timestamp.h" + +static struct timeval test_tv; + +static int +test_mock_gettimeofday(struct timeval * __restrict tv, __timezone_ptr_t tz) +{ + (void)tz; + memcpy(tv, &test_tv, sizeof(test_tv)); + return 0; +} + +static void +test_timeval_conversions(unsigned tv_sec, unsigned tv_usec, unsigned tv_sec_expected, unsigned tv_usec_expected) +{ + kit_timestamp_t timestamp; + + test_tv.tv_sec = tv_sec; + test_tv.tv_usec = tv_usec; + timestamp = kit_timestamp_from_timeval(&test_tv); + kit_timestamp_to_timeval(timestamp, &test_tv); + is(test_tv.tv_sec , tv_sec_expected , "Convert to and from timeval: timeval seconds is %u", tv_sec ); + is(test_tv.tv_usec, tv_usec_expected, "Convert to and from timeval: timeval useconds is %u", tv_usec); +} + +int +main(void) +{ + time_t expected; + time_t actual; + kit_timestamp_t timestamp; + char buffer[KIT_TIMESTAMP_STRING_SIZE]; + + plan_tests(21); + + time(&expected); + actual = kit_timestamp_to_unix_time(kit_timestamp_get()); + ok((actual == expected) || (actual == expected + 1), "Actual time (%lu) is as expected (%lu)", actual, expected); + + is_eq(kit_timestamp_to_buf(0xFFFFFUL, buffer, sizeof(buffer)), "19700101000000.999999", + "Formatted kit timestamp as expected (%s)", buffer); + is_eq(kit_timestamp_to_buf(0x7FFFFUL, buffer, sizeof(buffer)), "19700101000000.499999", + "Formatted kit timestamp as expected (%s)", buffer); + kit_timestamp_to_buf(987654UL << KIT_TIMESTAMP_BITS_IN_FRACTION, buffer, sizeof(buffer)); + is_eq(&buffer[sizeof("YYYYmmDDHHMMSS") - 1], ".000000", + "Zeroed fractional part formatted as expected (string=%s, timestamp=%" PRIx64 ")", buffer, + 987654UL << KIT_TIMESTAMP_BITS_IN_FRACTION); + + MOCK_SET_HOOK(gettimeofday, test_mock_gettimeofday); + + test_tv.tv_sec = 0; + test_tv.tv_usec = 0; + timestamp = kit_timestamp_get(); + is(timestamp, 0, "tv(0,0) -> timestamp 0"); + + test_tv.tv_usec = 999999; + timestamp = kit_timestamp_get(); + ok(KIT_TIMESTAMP_1_SEC - 1 - timestamp < KIT_TIMESTAMP_1_SEC, + "tv(0,999999) -> timestamp ~%" PRIu64 " (got %" PRIu64 ", epsilon %" PRIu64 ")", + KIT_TIMESTAMP_1_SEC - 1, timestamp, KIT_TIMESTAMP_1_SEC); + + test_tv.tv_sec = 1; + test_tv.tv_usec = 0; + timestamp = kit_timestamp_get(); + is(timestamp, KIT_TIMESTAMP_1_SEC, "tv(1, 0) -> timestamp 2^20"); + + test_timeval_conversions(1, 500000, 1, 500000); + test_timeval_conversions(2, 0, 2, 0); + test_timeval_conversions(3, 1, 3, 0); /* rounds down; challenge for you, figure out why! */ + test_timeval_conversions(4, 999999, 4, 999998); /* rounds down; challenge for you, figure out why! */ + test_timeval_conversions(5, 5, 5, 4); /* rounds down; challenge for you, figure out why! */ + test_timeval_conversions(6, 4294, 6, 4293); /* rounds down; challenge for you, figure out why! */ + test_timeval_conversions(7, 4295, 7, 4294); /* rounds down; challenge for you, figure out why! */ + + return exit_status(); +} diff --git a/lib-sxe-log/test/test-sxe-log-format.c b/lib-sxe-log/test/test-sxe-log-format.c new file mode 100644 index 0000000..23fddcb --- /dev/null +++ b/lib-sxe-log/test/test-sxe-log-format.c @@ -0,0 +1,93 @@ +#include +#include + +#include "sxe-log.h" +#include "tap.h" + +#if defined(__APPLE__) || defined(__FreeBSD__) +# define PREFIX_FORMAT "YYYYMMDD HHmmSS.mmm P01234567789 T0123456789" +#else // Linux and Windows +# define PREFIX_FORMAT "YYYYMMDD HHmmSS.mmm T0123456789" +#endif + +static SXE_LOG_LEVEL last_level; +static char * last_line; + +static void +test_log_line_out(SXE_LOG_LEVEL level, const char * line) +{ + static char buf[SXE_LOG_BUFFER_SIZE]; + + last_level = level; + line += sizeof(PREFIX_FORMAT); + last_line = buf; + strlcpy(buf, line, sizeof(buf)); + SXEA1(strlen(buf) == 0 || buf[strlen(buf) - 1] == '\n', "Line is not newline terminated"); + buf[strlen(buf) - 1] = '\0'; +} + +static void +test_token(char **line, const char *token, const char *message) +{ + char * end; + + if (**line == ' ') // Skip leading blanks + *line += strspn(*line, " "); + + if ((end = strchr(*line, ' '))) { + *end = '\0'; + is_eq(*line, token, "%s", message); + *line = end + 1; + } + else + fail("No blanks left in line '%s'", *line); +} + +static void * +other_thread_log_warning(void *dummy) +{ + SXE_UNUSED_PARAMETER(dummy); + SXEL3("No id expected"); + return NULL; +} + +int +main(void) +{ + pthread_t thread; + void * retval; + + plan_tests(18); + sxe_log_hook_line_out(test_log_line_out); + + SXEL1("booga, booga!"); + is(last_level, SXE_LOG_LEVEL_FATAL, "SXEL1 level is FATAL"); + test_token(&last_line, "------", "No transaction id set"); + test_token(&last_line, "1", "Level indicator is 1 (FATAL)"); + is_eq(last_line, "- booga, booga!", "Line is as expected"); + + sxe_log_set_options(SXE_LOG_OPTION_LEVEL_TEXT); + sxe_log_set_thread_id(666); + SXEL1("booga, booga!"); + is(last_level, SXE_LOG_LEVEL_FATAL, "SXEL1 level is FATAL"); + test_token(&last_line, "666", "Transaction id is 666"); + test_token(&last_line, "FAT", "Level indicator is FAT (FATAL)"); + is_eq(last_line, "- booga, booga!", "Line is as expected"); + + sxe_log_set_options(SXE_LOG_OPTION_LEVEL_TEXT | SXE_LOG_OPTION_ID_HEX); + sxe_log_set_thread_id(0x666); + SXEL2(": booga, booga!"); + is(last_level, SXE_LOG_LEVEL_ERROR, "SXEL2 level is ERROR"); + test_token(&last_line, "00000666", "No transaction id is 00000666"); + test_token(&last_line, "ERR", "Level indicator is ERR (ERROR)"); + is_eq(last_line, "- main: booga, booga!", "Line is as expected"); + + is(pthread_create(&thread, NULL, other_thread_log_warning, NULL), 0, "Ceated a thread"); + is(pthread_join(thread, &retval), 0, "Joined the thread"); + is(last_level, SXE_LOG_LEVEL_WARNING, "SXEL3 level is WARNING"); + test_token(&last_line, "--------", "No transaction id"); + test_token(&last_line, "WAR", "Level indicator is WAR (wARNING)"); + is_eq(last_line, "- No id expected", "Line is as expected"); + + return exit_status(); +} diff --git a/lib-sxe-log/test/test-sxe-log-levels.c b/lib-sxe-log/test/test-sxe-log-levels.c new file mode 100644 index 0000000..4c6e639 --- /dev/null +++ b/lib-sxe-log/test/test-sxe-log-levels.c @@ -0,0 +1,181 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#undef SXE_DEBUG /* Since we are testing diagnostic functions, this test program forces debug mode */ +#define SXE_DEBUG 1 + +#include "sxe-log.h" + +static const char * test_program_name; +static const char * test_program_arg; +static FILE * test_output_pipe = NULL; + +static void +test_log_line_out_to_stdout(SXE_LOG_LEVEL level, const char * line) +{ + SXE_UNUSED_PARAMETER(level); + fputs(line, stdout); + fflush(stdout); +} + +static char * +test_log_next(void) +{ + static char line[512]; + char * result; + + SXEA1(test_output_pipe != NULL, "test_log_next: No test program is running: trying running '%s %s' manually", + test_program_name, test_program_arg); + + if ((result = fgets(line, sizeof(line), test_output_pipe)) == NULL) { + SXEA1(pclose(test_output_pipe) >= 0, "%s: Failed to pclose: %s", test_program_name, strerror(errno)); + test_output_pipe = NULL; + } + + return result; +} + +static char * +test_log_first(const char * arg) +{ + char buffer[512]; + + if (test_output_pipe != NULL) { + SXEA1(pclose(test_output_pipe) >= 0, "%s: Failed to pclose: %s", test_program_name, strerror(errno)); + } + + test_program_arg = arg; + buffer[sizeof(buffer) - 1] = '\0'; + strncpy(buffer, test_program_name, sizeof(buffer) - 1); + strncat(buffer, " ", sizeof(buffer) - 1); + strncat(buffer, arg, sizeof(buffer) - 1); + SXEA1((test_output_pipe = popen(buffer, "r")) != NULL, "Failed to popen '%s': %s", buffer, strerror(errno)); + return test_log_next(); +} + +#define TEST_LINES_EXPECTED (sizeof(test_expected) / sizeof(test_expected[0])) + +static const char * test_expected[] = { + "2 - No indent", + "2 - Indented by 2", + "2 - Set log level to 7", + "7 - return // seven", + "6 - return // six", + "5 - return // five", + "5 + level_five()", + "6 + test_level_six(", + "7 + test_level_seven(", + "2 - Set log level to 3" +}; + +void test_level_six(SXE_LOG_LEVEL level); /* Function defined in the test/sxe-log-levels.c helper module */ + +int +main(int argc, char ** argv) +{ + const char * line; + unsigned i; + + if (argc > 1) { + test_program_arg = argv[1]; + sxe_log_hook_line_out(test_log_line_out_to_stdout); + SXEA1(sxe_log_hook_line_out(test_log_line_out_to_stdout) == test_log_line_out_to_stdout, + "sxe_log_hook_line_out failed to hook test_log_line_out_to_stdout"); + + if (strcmp(argv[1], "1") == 0) { + if (getenv("SXE_LOG_LEVEL") != NULL) { + SXED6("should not see this (level too high)", strlen("should not see this (level too high)")); /* Cover early out in dump */ + } + + SXEL4("BOO"); + } + else if (strcmp(argv[1], "2") == 0) { + sxe_log_set_level(SXE_LOG_LEVEL_WARNING); + SXEE5("level_five()"); + test_level_six(SXE_LOG_LEVEL_DUMP); + SXER5("return // five"); + SXEE5("level_five()"); + test_level_six(SXE_LOG_LEVEL_WARNING); + SXER5("return // five"); + SXEL2("that's all, folks"); + } + + exit(0); + } + + test_program_name = argv[0]; + plan_tests(3 * TEST_LINES_EXPECTED + 3); + SXEL6("Testing, 1, 2, 3"); // Cover the default log line output function + + /* Tests for different log level settings */ + + tap_test_case_name("Level settings"); + ok((line = test_log_first("1")) != NULL, "Test log at default level wrote a line"); + diag("line = %s", line); + + SXEA1(putenv((char *)(intptr_t)"SXE_LOG_LEVEL=2") >= 0, "%s: Failed to putenv: %s", test_program_name, strerror(errno)); + ok(test_log_first("1") == NULL, "Test log with SXE_LOG_LEVEL=2 failed to write a line"); + + /* TODO: Replace putenvs with calls to the TBD function that allows setting fine grained levels programmatically. */ + + SXEA1(putenv((char *)(intptr_t)"SXE_LOG_LEVEL_LIBKIT=5") >= 0, "%s: Failed to putenv: %s", test_program_name, strerror(errno)); + ok((line = test_log_first("1")) != NULL, "Test log with SXE_LOG_LEVEL_LIBKIT=5 wrote a line"); + diag("line = %s", line); + SXEA1(putenv((char *)(intptr_t)"SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG=2") >= 0, "%s: Failed to setenv: %s", test_program_name, strerror(errno)); + ok(test_log_first("1") == NULL, "Test log with SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG=2 failed to write a line"); + SXEA1(putenv((char *)(intptr_t)"SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG_TEST_TEST_SXE_LOG_LEVELS=6") >= 0, "%s: Failed to putenv: %s", test_program_name, strerror(errno)); + ok((line = test_log_first("1")) != NULL, "Test log with SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG_TEST_TEST_SXE_LOG_LEVELS=6 wrote a line"); + diag("line = %s", line); + + /* Remove the more specific environment variables + */ + SXEA1(unsetenv("SXE_LOG_LEVEL_LIBKIT") == 0, "%s: unsetenv failed: %s", test_program_name, strerror(errno)); + SXEA1(unsetenv("SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG") == 0, "%s: unsetenv failed: %s", test_program_name, strerror(errno)); + SXEA1(unsetenv("SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LOG_TEST_TEST_SXE_LOG_LEVELS") == 0, "%s: unsetenv failed: %s", test_program_name, strerror(errno)); + + /* Tests for indentation interacting with log level */ + + tap_test_case_name("Indentation"); + line = test_log_first("2"); + + for (i = 0; i < TEST_LINES_EXPECTED; i++) { + ok(line != NULL, "Got line %u", 2 * i + 1); + skip_if(!line, 1 + !!(i > 2), "No line") { + ok(strstr(line, test_expected[i]) != NULL, "Line %u: Found '%s' in '%.*s'", 2 * i + 1, test_expected[i], (int)strlen(line) - 1, line); + + if (i > 2) + ok(test_log_next() != NULL, "Got line %u", 2 * i + 2); + + line = test_log_next(); + } + } + + ok(line == NULL, "Got EOF"); + return exit_status(); +} diff --git a/lib-sxe-log/test/test-sxe-log-syslog.c b/lib-sxe-log/test/test-sxe-log-syslog.c new file mode 100644 index 0000000..b5af97f --- /dev/null +++ b/lib-sxe-log/test/test-sxe-log-syslog.c @@ -0,0 +1,200 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#endif + +#if defined(__APPLE__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +#undef SXE_DEBUG /* Since we are testing diagnostic functions, the test program forces debug mode */ +#define SXE_DEBUG 1 +#include "sxe-log.h" +#include "sxe-test-leak.h" + +#include "kit-mock.h" + +#ifdef _WIN32 +static unsigned +test_sxe_log_buffer_prefix(char * log_buffer, unsigned id, SXE_LOG_LEVEL level) +{ + unsigned length = 0; + SXE_UNUSED_PARAMETER(log_buffer); + SXE_UNUSED_PARAMETER(id); + SXE_UNUSED_PARAMETER(level); + return length; +} + +int main(void) +{ + plan_skip_all("No syslog() on windows"); + + sxe_log_hook_buffer_prefix(NULL); /* For coverage */ + sxe_log_hook_buffer_prefix(test_sxe_log_buffer_prefix); /* For coverage */ + return exit_status(); +} +#else + +static tap_ev_queue q_syslog; +struct object +{ + unsigned id; +}; + +static void +t_openlog(const char * ident, int option, int facility) +{ + tap_ev_queue_push(q_syslog, "openlog", 3, + "ident", test_leak(tap_dup(ident, strlen(ident))), + "option", option, + "facility", facility); +} + +static void +t_syslog(int priority, const char * format, ...) +{ + va_list ap; + const char *logline; + + assert(strcmp(format, "%s") == 0); + + va_start(ap, format); + logline = va_arg(ap, const char *); + tap_ev_queue_push(q_syslog, "syslog", 2, + "priority", priority, + "logline", test_leak(tap_dup(logline, strlen(logline)))); + va_end(ap); +} + +int +main(int argc, char *argv[]) { + tap_ev ev; + struct object self; + struct object * this = &self; + char expected[1024]; +#if defined(__FreeBSD__) + long tid; + thr_self(&tid); +#elif defined(__APPLE__) + pid_t tid = syscall(SYS_thread_selfid); +#else + pid_t tid = getpid(); +#endif + + plan_tests(28); + + MOCK_SET_HOOK(openlog, t_openlog); + MOCK_SET_HOOK(syslog, t_syslog); + q_syslog = tap_ev_queue_new(); + + sxe_log_use_syslog("my-program", LOG_NDELAY|LOG_PID, LOG_USER); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "openlog", "sxe_log_use_syslog() calls openlog()"); + is_eq(tap_ev_arg(ev, "ident"), "my-program", "sxe_log_use_syslog() calls openlog() with correct 'ident' parameter"); + is((int)(uintptr_t)tap_ev_arg(ev, "option"), LOG_NDELAY|LOG_PID, "sxe_log_use_syslog() calls openlog() with correct 'option' parameter"); + is((int)(uintptr_t)tap_ev_arg(ev, "facility"), LOG_USER, "sxe_log_use_syslog() calls openlog() with correct 'facility' parameter"); + tap_ev_free(ev); + + SXEL1("SXEL1"); + snprintf(expected, sizeof expected, "T=%ld ------ 1 - SXEL1\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL1() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_ERR, "SXEL1() maps to LOG_ERR syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL1() is logged correctly"); + tap_ev_free(ev); + + SXEL2("SXEL2(%s)", "arg1"); + snprintf(expected, sizeof expected, "T=%ld ------ 2 - SXEL2(arg1)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL2() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_WARNING, "SXEL2() maps to LOG_WARNING syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL2() is logged correctly"); + tap_ev_free(ev); + + SXEL3("SXEL3(%s,%d)", "arg1", 22); + snprintf(expected, sizeof expected, "T=%ld ------ 3 - SXEL3(arg1,22)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL3() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_NOTICE, "SXEL3() maps to LOG_NOTICE syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL3() is logged correctly"); + tap_ev_free(ev); + + SXEL4("SXEL4(%s,%d,%u)", "arg1", 22, 44); + snprintf(expected, sizeof expected, "T=%ld ------ 4 - SXEL4(arg1,22,44)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL4() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_INFO, "SXEL4() maps to LOG_INFO syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL4() is logged correctly"); + tap_ev_free(ev); + + SXEL5("SXEL5(%s,%d,%u,%x)", "arg1", 22, 44, 64); + snprintf(expected, sizeof expected, "T=%ld ------ 5 - SXEL5(arg1,22,44,40)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL5() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_DEBUG, "SXEL5() maps to LOG_DEBUG syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL5() is logged correctly"); + tap_ev_free(ev); + + SXEL6("SXEL6(%s,%d,%u,%x,%.2f)", "arg1", 22, 44, 64, 3.1415926); + snprintf(expected, sizeof expected, "T=%ld ------ 6 - SXEL6(arg1,22,44,40,3.14)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL6() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_DEBUG, "SXEL6() maps to LOG_DEBUG syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL6() is logged correctly"); + tap_ev_free(ev); + + this->id = 99; + SXEL1I("SXEL1I"); + snprintf(expected, sizeof expected, "T=%ld 99 1 - SXEL1I\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL1I() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_ERR, "SXEL1I() maps to LOG_ERR syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL1I() is logged correctly"); + tap_ev_free(ev); + + this->id = 98; + SXEL6I("SXEL6I(%s,%d,%u,%x,%.2f)", "arg1", 22, 44, 64, 3.1415926); + snprintf(expected, sizeof expected, "T=%ld 98 6 - SXEL6I(arg1,22,44,40,3.14)\n", (long)tid); + ev = tap_ev_queue_shift(q_syslog); + is_eq(tap_ev_identifier(ev), "syslog", "SXEL6I() calls syslog()"); + is(tap_ev_arg(ev, "priority"), LOG_DEBUG, "SXEL6I() maps to LOG_DEBUG syslog level"); + is_eq(tap_ev_arg(ev, "logline"), expected, "SXEL6I() is logged correctly"); + tap_ev_free(ev); + + sxe_log_hook_buffer_prefix(NULL); /* For coverage */ + (void)argc; + (void)argv; + + test_plug(); + + return exit_status(); +} + +#endif /* _WIN32 */ diff --git a/lib-sxe-log/test/test-sxe-log.c b/lib-sxe-log/test/test-sxe-log.c new file mode 100644 index 0000000..ecc1ab7 --- /dev/null +++ b/lib-sxe-log/test/test-sxe-log.c @@ -0,0 +1,264 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#if SXE_DEBUG != 0 +#define LOCAL_SXE_DEBUG 1 +#endif + +#undef SXE_DEBUG /* Since we are testing diagnostic functions, the test program forces debug mode */ +#define SXE_DEBUG 1 + +#include "sxe-log.h" + +#define SXE_LOG_BUFFER_SIZE 1024 /* Copied from sxe-log.c */ +#define TEST_MAX_LINE (SXE_LOG_BUFFER_SIZE - 2) /* 2 == '\r\0' */ + +/* Make putenv STFU about const strings + */ +#define PUTENV(string) putenv((char *)(uintptr_t)(string)) + +struct object +{ + unsigned id; +}; + +static unsigned test_state = 0; +static const char entering[] = "Entering the log test"; +static const char escape[] = "Line 1: high bit character \x80\r\nLine 2: This is a hex character (BS): \b\r\nLine 3: This is a backslash: \\\r\n"; +static const char escaped[] = "Line 1: high bit character \\x80\\r\\nLine 2: This is a hex character (BS): \\x08\\r\\nLine 3: This is a backslash: \\\\\\r\\n"; +#define LOGGING ": Logging a normal line with function name prefix" +static const char verylong[] = "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789" + "0123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789"; +static const char hextrunc[] = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; +static const char idlevin[] = " 99 6 - "; +static const char dumpdata[] = "test"; +static const char exiting[] = "Exiting the log test"; +static const char dumphex[] = "74 65 73 74"; + +static void +log_line(SXE_LOG_LEVEL level, const char * line_ro) +{ + char * line = (char *)(uintptr_t)line_ro; + char buf[256]; + + if (line[strlen(line) - 1] != '\n') { + fail("Expected 0x0a at end of line, not 0x%02x", line[strlen(line) - 1]); + exit(1); + } + + line[strlen(line) - 1] = '\0'; + + switch (test_state) { + case 0: + is(level, SXE_LOG_LEVEL_TRACE, "Line was logged at TRACE level"); + is_strncmp(&line[strlen(line) - strlen(entering)], entering, strlen(entering), "Test line 0 contains '%s': '%s'", entering, line); + break; + + case 1: + ok(strstr(line, __FILE__) != NULL, "Test line 1 includes file name '%s': '%s'", __FILE__, line); + break; + + case 2: + is_strncmp(&line[strlen(line) - strlen("main" LOGGING)], "main" LOGGING, strlen(LOGGING), "Test line 2 ends with '%s': '%s'", "main" LOGGING, line); + is_strncmp(&line[strlen(line) - strlen("main" LOGGING) - strlen(idlevin)], idlevin, strlen(idlevin), "Test line 2 has id 99, level 6 and indent 2"); + break; + + case 3: + ok(strstr(line, dumpdata) != NULL, + "Test line 3 contains '%s'", dumpdata); + ok(strstr(line, dumphex) != NULL, + "Test line 3 contains hex '%s'", dumphex); + break; + + case 4: + is_strncmp(&line[strlen(line) - strlen(exiting)], exiting, strlen(exiting), "Test line 4 ends with '%s': '%s'", exiting, line); + break; + + case 5: + strcpy(buf, "} // "); + strcat(buf, SXE_FILE); + ok(strstr(line, buf) != NULL, "Test line 5 includes end of block with file name '%s': '%s'", buf, line); + break; + + case 6: + is(strlen(line), TEST_MAX_LINE, "Very long log line truncated"); + is_eq(&line[TEST_MAX_LINE - 2], "..", "Truncation indicator found"); + break; + + case 7: + is(strlen(line), TEST_MAX_LINE, "Very long entry line truncated"); + break; + + case 8: + ok(strstr(line, __FILE__) != NULL, "Test line 8 includes file name '%s': '%s'", __FILE__, line); + break; + + case 9: + is_eq(&line[strlen(line) - strlen(escaped)], escaped, "Test line 9 ends with '%.*s': '%s'", (int)strlen(escaped) - 1, + escaped, line); + break; + + case 10: + is(strlen(line), TEST_MAX_LINE, "Line of 300 backspaces escaped and truncated"); + break; + + case 11: + /* Doesn't crash */ + break; + + case 12: + is(level, SXE_LOG_LEVEL_FATAL, "Assertions are logged at level FATAL"); + ok(strstr(line, "ERROR: assertion 'this != &self' failed at ") != NULL, + "Assertion line includes expected stringized test: '%s'", line); + break; + + case 13: + is(level, SXE_LOG_LEVEL_FATAL, "Second line of assertion is also logged at level FATAL"); + ok(strstr(line, "in function main() at libkit/lib-sxe-log/test/test-sxe-log.c:") != NULL, + "Second assertion line has expected function 'main()' and file name 'libkit/lib-sxe-log/test/test-sxe-log.c': '%s'", + line); +#ifdef WINDOWS_NT + diag("info: You can expect and ignore a message saying:"); + diag(" > This application has requested the Runtime to terminate it in an unusual way."); + diag(" > Please contact the application's support team for more information."); +#endif + break; + +#ifdef WINDOWS_NT + case 14: + /* ignore line: NOTE: set SXE_WINNT_ASSERT_MSGBOX=1 to assert into the Microsoft visual debugger*/ + break; +#endif + + default: + diag("Unexpected test sequence number %u.\n", test_state); + exit(1); + } + + test_state++; +} + +static void +test_abort_handler(int sig) +{ + (void)sig; +#ifdef WINDOWS_NT + is(test_state, 15, "All log lines/signals received"); +#else + is(test_state, 14, "All log lines/signals received"); +#endif + exit(exit_status()); +} + +#define TEST_CASE_RETURN_TO_STRING(id) is_eq(sxe_return_to_string(SXE_RETURN_##id), #id, "test return string " #id) + +int +main(void) { + struct object self; + struct object * this = &self; + + plan_tests(53); + + /* Test sxe_return_to_string() + */ + is_eq(sxe_return_to_string(SXE_RETURN_OK), "OK" , "sxe_return_to_string(SXE_RETURN_OK) eq \"OK\""); + is_eq(sxe_return_to_string(SXE_RETURN_ERROR_INTERNAL), "ERROR_INTERNAL", "sxe_return_to_string(SXE_RETURN_ERROR_INTERNAL) eq \"ERROR_INTERNAL\""); + is( sxe_return_to_string(~0U), NULL, "sxe_return_to_string(~0U) == NULL"); + TEST_CASE_RETURN_TO_STRING(DEADLOCK_WOULD_OCCUR); + TEST_CASE_RETURN_TO_STRING(EXPIRED_VALUE); + TEST_CASE_RETURN_TO_STRING(OUT_OF_RANGE); + TEST_CASE_RETURN_TO_STRING(NO_SUCH_PROCESS); + TEST_CASE_RETURN_TO_STRING(NO_UNUSED_ELEMENTS); + TEST_CASE_RETURN_TO_STRING(IN_PROGRESS); + TEST_CASE_RETURN_TO_STRING(UNCATEGORIZED); + TEST_CASE_RETURN_TO_STRING(END_OF_FILE); + TEST_CASE_RETURN_TO_STRING(WARN_ALREADY_INITIALIZED); + TEST_CASE_RETURN_TO_STRING(WARN_WOULD_BLOCK); + TEST_CASE_RETURN_TO_STRING(WARN_ALREADY_CLOSED); + TEST_CASE_RETURN_TO_STRING(ERROR_NOT_INITIALIZED); + TEST_CASE_RETURN_TO_STRING(ERROR_ALLOC); + TEST_CASE_RETURN_TO_STRING(ERROR_NO_CONNECTION); + TEST_CASE_RETURN_TO_STRING(ERROR_ALREADY_CONNECTED); + TEST_CASE_RETURN_TO_STRING(ERROR_INVALID); + TEST_CASE_RETURN_TO_STRING(ERROR_BAD_MESSAGE); + TEST_CASE_RETURN_TO_STRING(ERROR_ADDRESS_IN_USE); + TEST_CASE_RETURN_TO_STRING(ERROR_INTERRUPTED); + TEST_CASE_RETURN_TO_STRING(ERROR_COMMAND_NOT_RUN); + TEST_CASE_RETURN_TO_STRING(ERROR_LOCK_NOT_TAKEN); + TEST_CASE_RETURN_TO_STRING(ERROR_INCORRECT_STATE); + TEST_CASE_RETURN_TO_STRING(ERROR_TIMED_OUT); + TEST_CASE_RETURN_TO_STRING(ERROR_WRITE_FAILED); + TEST_CASE_RETURN_TO_STRING(INVALID_VALUE); /* Just for coverage */ + + ok(signal(SIGABRT, test_abort_handler) != SIG_ERR, "Caught abort signal"); + sxe_log_hook_line_out(NULL); /* for coverage */ + sxe_log_hook_line_out(log_line); + PUTENV("SXE_LOG_LEVEL=6"); /* Trigger processing of the level in the first call to the log */ + SXEE6(entering); + PUTENV("SXE_LOG_LEVEL=1"); /* This should be ignored. If it is not, the tests will fail */ + this->id = 99; + SXEL6I(LOGGING); + SXEA6(1, "Asserting true"); + SXED6(dumpdata, 4); + SXER6(exiting); + SXEL6(verylong); + SXEE6("really long entry message: %s", verylong); + SXEL6(escape); + SXEL6(hextrunc); + SXED6(dumpdata, 0); /* Edge case */ + SXEA6(1, "We should not get this, because level 8 is too low!"); + + is(sxe_log_decrease_level(SXE_LOG_LEVEL_ERROR), SXE_LOG_LEVEL_TRACE, "Level decreased to ERROR (2) from TRACE (6)"); + is(sxe_log_set_level( SXE_LOG_LEVEL_INFORMATION), SXE_LOG_LEVEL_ERROR, "Level set to INFO, was ERROR"); + is(sxe_log_get_level(), SXE_LOG_LEVEL_INFORMATION, "level is INFO"); + is(sxe_log_decrease_level(SXE_LOG_LEVEL_TRACE), SXE_LOG_LEVEL_INFORMATION, "Level was INFO, TRACE is not a decrease"); + +#if defined(_WIN32) && defined(LOCAL_SXE_DEBUG) + skip(5, "Can't test aborts in a Windows debug build, due to pop up Window stopping the build"); +#else + SXEA6(this != &self, "This is not self"); /* Abort - must be the last thing we do*/ + fail("Did not catch an abort signal"); +#endif + + } /* Oog! Close the brace opened in the SXEE6 macro above */ + return exit_status(); +} diff --git a/lib-sxe-log/test/test-sxe-str-encode.c b/lib-sxe-log/test/test-sxe-str-encode.c new file mode 100644 index 0000000..bfc9315 --- /dev/null +++ b/lib-sxe-log/test/test-sxe-str-encode.c @@ -0,0 +1,40 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-log.h" +#include "tap.h" + +int +main(void) +{ + char buffer[80]; + + plan_tests(8); + is_eq(sxe_strn_encode(buffer, 8, "", 0), "", "Returns an empty encoded string on empty input string"); + ok(sxe_strn_encode(buffer, 8, "Too long for buffer", 32) == NULL, "Correctly returns NULL if string too long"); + is_eq(buffer, "Too_lon", "Correctly truncates string if too long"); + ok(sxe_strn_encode(buffer, 8, "Too long for buffer", 8) == NULL, "String limit (strNishness) respected"); + is(sxe_strn_encode(buffer, sizeof(buffer), "blank tab\tdel\x7Funderscore_equals=0xABCD", 64), buffer, + "Correctly returns buffer if string not too long"); + is_eq(buffer, "blank_tab=09del=7Funderscore=5Fequals=3D0xABCD", "Correctly encodes all special cases"); + ok(sxe_strn_encode(buffer, 8, "\t\x7f_=", 32) == NULL, "Correctly returns NULL if encoded string too long"); + is_eq(buffer, "=09=7F", "Correctly truncates encoded string at end of a character"); +} diff --git a/lib-sxe-log/test/test-sxe-strlcpy.c b/lib-sxe-log/test/test-sxe-strlcpy.c new file mode 100644 index 0000000..d78d35a --- /dev/null +++ b/lib-sxe-log/test/test-sxe-strlcpy.c @@ -0,0 +1,61 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This program is only useful for testing the strlcpy/strlcat implementation on platforms like _WIN32 that don't provide native + * implementations. Otherwise, it tests the native implementations, which presumably work. + */ + +#include + +#include "sxe-log.h" +#include "tap.h" + +int +main(void) +{ + char buffer[16]; + + tap_plan(14, 0, NULL); + + tap_test_case_name("sxe_strlcpy"); + is(sxe_strlcpy(buffer, "Hello, world", sizeof(buffer)), sizeof("Hello, world") - 1, "Length of 'Hello, world' returned"); + is(strlen(buffer), sizeof("Hello, world") - 1, "Length of buffer correct"); + is(sxe_strlcpy(buffer, "Goodbye, cruel world", sizeof(buffer)), sizeof("Goodbye, cruel world") - 1, "Length of 'Goodbye, cruel world' returned"); + is(strlen(buffer), sizeof(buffer) - 1, "Length of buffer shows truncation occurred"); + strcpy(buffer, "XXX"); + is(sxe_strlcpy(buffer + 1, "nothing", 0), sizeof("nothing") - 1, "Length of 'nothing' returned"); + is_eq(buffer, "XXX", "Buffer content is the same"); + + tap_test_case_name("sxe_strlcat"); + sxe_strlcpy(buffer, "Hello, ", sizeof(buffer)); + is(sxe_strlcat(buffer, "world", sizeof(buffer)), sizeof("Hello, world") - 1, "Length of 'Hello, world' returned"); + is(strlen(buffer), sizeof("Hello, world") - 1, "Length of buffer correct"); + is(sxe_strlcat(buffer, ". Goodbye", sizeof(buffer)), sizeof("Hello, world. Goodbye") - 1, "Length of 'Hello, world. Goodbye' returned"); + is(strlen(buffer), sizeof(buffer) - 1, "Length of buffer shows truncation occurred"); + strcpy(buffer, "XXX"); + is(sxe_strlcat(buffer + 1, "nothing", 1), sizeof("nothing"), "Length of 'X' + 'nothing' returned"); + is_eq(buffer, "XXX", "Buffer content is the same"); + strcpy(buffer, "XXX"); + is(sxe_strlcat(buffer + 1, "nothing", 0), sizeof("nothing") - 1, "Length of 'nothing' returned"); + is_eq(buffer, "XXX", "Buffer content is the same"); + + return exit_status(); +} diff --git a/lib-sxe-md5/GNUmakefile b/lib-sxe-md5/GNUmakefile new file mode 100644 index 0000000..f5c7ae0 --- /dev/null +++ b/lib-sxe-md5/GNUmakefile @@ -0,0 +1,3 @@ +LIBRARIES = sxe-md5 + +include ../dependencies.mak diff --git a/lib-sxe-md5/sxe-md5-from-hex.c b/lib-sxe-md5/sxe-md5-from-hex.c new file mode 100644 index 0000000..f74f05f --- /dev/null +++ b/lib-sxe-md5/sxe-md5-from-hex.c @@ -0,0 +1,53 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-md5.h" +#include "sxe-util.h" + +#ifdef WHEN_I_NEED_IT +SXE_RETURN +sxe_md5_from_hex(uint8_t md5[SXE_MD5_SIZE], const char *md5_in_hex) +{ + SXE_RETURN result; + + SXEE6("(md5=%p,md5_in_hex='%s'", md5, md5_in_hex); + result = sxe_hex_to_bytes((unsigned char *)md5, md5_in_hex, SXE_MD5_IN_HEX_LENGTH); + SXER6("return %s", sxe_return_to_string(result)); + return result; +} +#endif + +SXE_RETURN +sxe_md5_to_hex(const uint8_t md5[SXE_MD5_SIZE], char *md5_in_hex, unsigned md5_in_hex_length) +{ + SXE_RETURN result = SXE_RETURN_OK; + + SXEE6("(md5=%p,md5_in_hex='%p',md5_in_hex_length='%u'", md5, md5_in_hex, md5_in_hex_length); + SXEA1(md5_in_hex_length == (SXE_MD5_IN_HEX_LENGTH + 1), "Incorrect length of char * for md5_to_hex(): '%u'", md5_in_hex_length); + + sxe_hex_from_bytes(md5_in_hex, md5, SXE_MD5_SIZE); + md5_in_hex[SXE_MD5_IN_HEX_LENGTH] = '\0'; + + SXEL6("md5_in_hex: '%.*s'", SXE_MD5_IN_HEX_LENGTH, md5_in_hex); + + SXER6("return %s", sxe_return_to_string(result)); + return result; +} diff --git a/lib-sxe-md5/sxe-md5.c b/lib-sxe-md5/sxe-md5.c new file mode 100644 index 0000000..587b9cf --- /dev/null +++ b/lib-sxe-md5/sxe-md5.c @@ -0,0 +1,295 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#include + +#include "sxe-md5.h" + +/* + * The basic MD5 functions. + * + * F and G are optimized compared to their RFC 1321 definitions for + * architectures that lack an AND-NOT instruction, just like in Colin Plumb's + * implementation. + */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +/* + * The MD5 transformation for all four rounds. + */ +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +/* + * SET reads 4 input bytes in little-endian byte order and stores them + * in a properly aligned word in host byte order. + * + * The check for little-endian architectures that tolerate unaligned + * memory accesses is just an optimization. Nothing will break if it + * doesn't work. + */ +#if defined(__i386__) || defined(__x86_64__) || defined(__vax__) +#define SET(n) \ + (*(const SXE_MD5_u32plus *)&ptr[(n) * 4]) +#define GET(n) \ + SET(n) +#else +#define SET(n) \ + (md5->block[(n)] = \ + (SXE_MD5_u32plus)ptr[(n) * 4] | \ + ((SXE_MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ + ((SXE_MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ + ((SXE_MD5_u32plus)ptr[(n) * 4 + 3] << 24)) +#define GET(n) \ + (md5->block[(n)]) +#endif + +/* + * This processes one or more 64-byte data blocks, but does NOT update + * the bit counters. There are no alignment requirements. + */ +static const void * +body(SXE_MD5 *md5, const void *data, unsigned long size) +{ + const unsigned char *ptr; + SXE_MD5_u32plus a, b, c, d; + SXE_MD5_u32plus saved_a, saved_b, saved_c, saved_d; + + ptr = data; + + a = md5->a; + b = md5->b; + c = md5->c; + d = md5->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + + /* Round 1 */ + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + + /* Round 2 */ + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + + /* Round 3 */ + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23) + + /* Round 4 */ + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (size -= 64); + + md5->a = a; + md5->b = b; + md5->c = c; + md5->d = d; + + return ptr; +} + +void +SXE_MD5_Init(SXE_MD5 *md5) +{ + md5->a = 0x67452301; + md5->b = 0xefcdab89; + md5->c = 0x98badcfe; + md5->d = 0x10325476; + + md5->lo = 0; + md5->hi = 0; +} + +void +SXE_MD5_Update(SXE_MD5 *md5, const void *data, unsigned long size) +{ + SXE_MD5_u32plus saved_lo; + unsigned long used, free_; + + saved_lo = md5->lo; + if ((md5->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) + md5->hi++; /* coverage exclusion: don't know how to hit this? */ + md5->hi += size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + free_ = 64 - used; + + if (size < free_) { + memcpy(&md5->buffer[used], data, size); + return; + } + + memcpy(&md5->buffer[used], data, free_); + data = (const unsigned char *)data + free_; + size -= free_; + body(md5, md5->buffer, 64); + } + + if (size >= 64) { + data = body(md5, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(md5->buffer, data, size); +} + +void +SXE_MD5_Final(uint8_t *result, SXE_MD5 *md5) +{ + unsigned long used, free_; + + used = md5->lo & 0x3f; + + md5->buffer[used++] = 0x80; + + free_ = 64 - used; + + if (free_ < 8) { + memset(&md5->buffer[used], 0, free_); + body(md5, md5->buffer, 64); + used = 0; + free_ = 64; + } + + memset(&md5->buffer[used], 0, free_ - 8); + + md5->lo <<= 3; + md5->buffer[56] = (unsigned char)md5->lo; + md5->buffer[57] = (unsigned char)(md5->lo >> 8); + md5->buffer[58] = (unsigned char)(md5->lo >> 16); + md5->buffer[59] = (unsigned char)(md5->lo >> 24); + md5->buffer[60] = (unsigned char)md5->hi; + md5->buffer[61] = (unsigned char)(md5->hi >> 8); + md5->buffer[62] = (unsigned char)(md5->hi >> 16); + md5->buffer[63] = (unsigned char)(md5->hi >> 24); + + body(md5, md5->buffer, 64); + + result[0] = (unsigned char)md5->a; + result[1] = (unsigned char)(md5->a >> 8); + result[2] = (unsigned char)(md5->a >> 16); + result[3] = (unsigned char)(md5->a >> 24); + result[4] = (unsigned char)md5->b; + result[5] = (unsigned char)(md5->b >> 8); + result[6] = (unsigned char)(md5->b >> 16); + result[7] = (unsigned char)(md5->b >> 24); + result[8] = (unsigned char)md5->c; + result[9] = (unsigned char)(md5->c >> 8); + result[10] = (unsigned char)(md5->c >> 16); + result[11] = (unsigned char)(md5->c >> 24); + result[12] = (unsigned char)md5->d; + result[13] = (unsigned char)(md5->d >> 8); + result[14] = (unsigned char)(md5->d >> 16); + result[15] = (unsigned char)(md5->d >> 24); + + memset(md5, 0, sizeof(*md5)); +} diff --git a/lib-sxe-md5/sxe-md5.h b/lib-sxe-md5/sxe-md5.h new file mode 100644 index 0000000..a1de255 --- /dev/null +++ b/lib-sxe-md5/sxe-md5.h @@ -0,0 +1,58 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See sxe-md5.c for more information. + */ + +#ifndef __SXE_MD5_H__ +#define __SXE_MD5_H__ + +#include +#include + +#include "sxe-log.h" + +#define SXE_MD5_SIZE 16 +#define SXE_MD5_IN_HEX_LENGTH (2 * SXE_MD5_SIZE) + +typedef unsigned int SXE_MD5_u32plus; /* Any 32-bit or wider unsigned integer data type will do */ + +/* This is the MD5 context defined by Openwall. It must be at least as big as openssl's to be compatible + */ +typedef struct SXE_MD5 { + SXE_MD5_u32plus lo, hi; + SXE_MD5_u32plus a, b, c, d; + unsigned char buffer[64]; + SXE_MD5_u32plus block[16]; +} SXE_MD5; + + +void SXE_MD5_Init(SXE_MD5 *md5); +void SXE_MD5_Update(SXE_MD5 *md5, const void *data, unsigned long size); +void SXE_MD5_Final(uint8_t *result, SXE_MD5 *md5); + +#ifdef WHEN_I_NEED_IT +SXE_RETURN sxe_md5_from_hex(uint8_t md5[SXE_MD5_SIZE], const char *md5_in_hex); +#endif +SXE_RETURN sxe_md5_to_hex(const uint8_t md5[SXE_MD5_SIZE], char *md5_in_hex, unsigned md5_in_hex_length); + +#endif diff --git a/lib-sxe-md5/test/test-sxe-md5.c b/lib-sxe-md5/test/test-sxe-md5.c new file mode 100644 index 0000000..63c9e29 --- /dev/null +++ b/lib-sxe-md5/test/test-sxe-md5.c @@ -0,0 +1,96 @@ +#include +#include +#include + +#include "sxe-md5.h" + +/* + * Define patterns for testing + */ +#define TESTA "abc" +#define TESTB_1 "abcdbcdecdefdefgefghfghighij" +#define TESTB_2 "hijkijkljklmklmnlmnomnopnopq" +#define TESTB TESTB_1 TESTB_2 +#define TESTC "a" +#define TESTQ "The quick brown fox jumps over the lazy dog" +#define TESTL TESTQ TESTQ TESTQ TESTQ TESTQ TESTQ + +unsigned char testa_md5[] = {0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, 0xd6, 0x96, 0x3f, 0x7d, 0x28, 0xe1, 0x7f, 0x72}; +unsigned char testb_md5[] = {0x82, 0x15, 0xef, 0x07, 0x96, 0xa2, 0x0b, 0xca, 0xaa, 0xe1, 0x16, 0xd3, 0x87, 0x6c, 0x66, 0x4a}; +unsigned char testc_md5[] = {0x77, 0x07, 0xd6, 0xae, 0x4e, 0x02, 0x7c, 0x70, 0xee, 0xa2, 0xa9, 0x35, 0xc2, 0x29, 0x6f, 0x21}; +unsigned char testq_md5[] = {0x9e, 0x10, 0x7d, 0x9d, 0x37, 0x2b, 0xb6, 0x82, 0x6b, 0xd8, 0x1d, 0x35, 0x42, 0xa4, 0x19, 0xd6}; +unsigned char testl_md5[] = {0x5b, 0x61, 0x37, 0x72, 0xd2, 0xe9, 0x07, 0x7d, 0x2d, 0x40, 0x9b, 0xb6, 0x62, 0x9f, 0xdb, 0x09}; + +static void +is_md5(void *expected, void *got, const char *str) +{ + if (memcmp(expected, got, SXE_MD5_SIZE) == 0) { + pass("%s", str); + return; + } + + diag("Expected MD5:"); + SXED1(expected, SXE_MD5_SIZE); + diag("Got MD5:"); + SXED1(got, SXE_MD5_SIZE); + fail("%s", str); +} + +int +main(void) +{ + char Message_Digest_Hex[SXE_MD5_IN_HEX_LENGTH + 1]; + uint8_t Message_Digest[SXE_MD5_SIZE]; + SXE_MD5 md5; + int i; + + plan_tests(6); + + /* + * Perform test A + */ + SXE_MD5_Init( &md5); + SXE_MD5_Update(&md5, (const unsigned char *)TESTA, strlen(TESTA)); + SXE_MD5_Final(Message_Digest, &md5); + is_md5(testa_md5, Message_Digest, "Test A: MD5 as expected"); + sxe_md5_to_hex(Message_Digest, Message_Digest_Hex, sizeof(Message_Digest_Hex)); + is_eq(Message_Digest_Hex, "900150983cd24fb0d6963f7d28e17f72", "Hex version of digest is correct"); + + /* + * Perform test B + */ + SXE_MD5_Init( &md5); + SXE_MD5_Update(&md5, (const unsigned char *)TESTB, strlen(TESTB)); + SXE_MD5_Final(Message_Digest, &md5); + is_md5(testb_md5, Message_Digest, "Test B: MD5 as expected"); + + /* + * Perform test C + */ + SXE_MD5_Init(&md5); + + for(i = 1; i <= 1000000; i++) { + SXE_MD5_Update(&md5, (const unsigned char *)TESTC, 1); + } + + SXE_MD5_Final(Message_Digest, &md5); + is_md5(testc_md5, Message_Digest, "Test C: MD5 as expected"); + + /* + * Perform quick brown fox test + */ + SXE_MD5_Init( &md5); + SXE_MD5_Update(&md5, (const unsigned char *)TESTQ, strlen(TESTQ)); + SXE_MD5_Final(Message_Digest, &md5); + is_md5(testq_md5, Message_Digest, "Quick brown fox test: MD5 as expected"); + + /* + * Perform long string test (>64) + */ + SXE_MD5_Init( &md5); + SXE_MD5_Update(&md5, (const unsigned char *)TESTL, strlen(TESTL)); + SXE_MD5_Final(Message_Digest, &md5); + is_md5(testl_md5, Message_Digest, "Long test: MD5 as expected"); + + return exit_status(); +} diff --git a/lib-sxe-mmap/GNUmakefile b/lib-sxe-mmap/GNUmakefile new file mode 100644 index 0000000..8c14160 --- /dev/null +++ b/lib-sxe-mmap/GNUmakefile @@ -0,0 +1,3 @@ +LIBRARIES = sxe-mmap + +include ../dependencies.mak diff --git a/lib-sxe-mmap/sxe-mmap.c b/lib-sxe-mmap/sxe-mmap.c new file mode 100644 index 0000000..70bf604 --- /dev/null +++ b/lib-sxe-mmap/sxe-mmap.c @@ -0,0 +1,116 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This package is loosely based on code from http://homepages.inf.ed.ac.uk/s0450736/maxent.html + * The license in the source file is MIT + * See: http://www.google.com/codesearch/p?hl=en#YZclbNWHoEk/s0450736/software/maxent/maxent-20061005.tar.bz2|Xqc-gC9Batk/maxent-20061005/src/mmapfile.h + */ + +#include +#include + +#include "sxe-log.h" +#include "sxe-mmap.h" + +unsigned sxe_spinlock_count_max = 1000000; /* number of yields before spinlock fails */ + +#ifndef _WIN32 + +#include +#include +#include +#include +#include + +void +sxe_mmap_open(SXE_MMAP * memmap, const char * file) { + struct stat st; + + SXEE6("sxe_mmap_open(memmap=%p, file=%s)", memmap, file); + SXEA1(stat(file, &st) >= 0, "Cannot stat size of file %s", file); + memmap->size = st.st_size; + SXEA1((memmap->fd = open(file, O_RDWR, 0666)) >= 0, "Failed to open file %s: %s", file, strerror(errno)); + SXEA1((memmap->addr = mmap(NULL, memmap->size, PROT_READ | PROT_WRITE, MAP_SHARED, memmap->fd, 0)) != MAP_FAILED, + "Failed to mmap file %s: %s", file, strerror(errno)); + SXER6( "return // sxe_mmap_open()" ); +} + +void +sxe_mmap_close(SXE_MMAP* memmap) { + SXEE6("sxe_mmap_close(memmap=%p)", memmap); + SXEA1(munmap(memmap->addr, memmap->size) >= 0, "Fail to munmap file: %s", strerror(errno)); + close(memmap->fd); + SXER6("return // sxe_mmap_close()"); +} + +#else /* _WIN32 is defined */ + +#include +#include +#include +#include + +void +sxe_mmap_open(SXE_MMAP * memmap, const char * file) +{ + DWORD access_mode = GENERIC_READ | GENERIC_WRITE; + DWORD map_mode = PAGE_READWRITE; + DWORD view_mode = FILE_MAP_READ | FILE_MAP_WRITE; + HANDLE fh; + HANDLE view; + struct stat st; + + SXEE6("sxe_mmap_open(memmap=%p, file=%s)", memmap, file); + SXEA1(0 == stat (file, &st), "failed to stat file: %s", file); + memmap->size = st.st_size; + + fh = CreateFile ( file, access_mode, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL ); + SXEA1( fh != INVALID_HANDLE_VALUE, "fail to open file %s", file ); + + view = CreateFileMapping ( fh, NULL , map_mode, 0 /* len >> 32 */, 0 /* len & 0xFFFFFFFF */, /* low-order DWORD of size */ 0 ); + SXEA1( view != NULL, "fail to create file mapping for file %s", file ); + + memmap->addr = 0; + memmap->addr = MapViewOfFile ( view, view_mode, 0, 0, memmap->size ); + SXEA1( 0 != memmap->addr, "fail to map view of file for file %s", file ); + + memmap->win32_fh = fh ; + memmap->win32_view = view; + SXEL6("file mapped to address: %p", memmap->addr ); + SXER6("return // sxe_mmap_open()" ); +} + +void +sxe_mmap_close(SXE_MMAP * memmap) +{ + SXEE6("sxe_mmap_close(memmap=%p)", memmap); + + SXEA1(memmap != NULL, "invalid parameter"); + SXEA1(memmap->addr != NULL, "invalid struct member"); + + SXEA1(UnmapViewOfFile(memmap->addr ), "fail to unmap view of file"); + SXEA1(CloseHandle (memmap->win32_view), "fail to close view handle" ); + SXEA1(CloseHandle (memmap->win32_fh ), "fail to close file handle" ); + + SXER6("return // sxe_mmap_close()" ); +} + +#endif /* _WIN32 */ diff --git a/lib-sxe-mmap/sxe-mmap.h b/lib-sxe-mmap/sxe-mmap.h new file mode 100644 index 0000000..13b7baa --- /dev/null +++ b/lib-sxe-mmap/sxe-mmap.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This package is loosely based on code from http://homepages.inf.ed.ac.uk/s0450736/maxent.html + * The license in the source file is MIT + * See: http://www.google.com/codesearch/p?hl=en#YZclbNWHoEk/s0450736/software/maxent/maxent-20061005.tar.bz2|Xqc-gC9Batk/maxent-20061005/src/mmapfile.h + */ + +#ifndef __SXE_MMAP__ +#define __SXE_MMAP__ + +#ifdef _WIN32 + #include + #include +#endif + +typedef struct { + unsigned long size; + int fd; + void * addr; + int flags; +#ifdef _WIN32 + HANDLE win32_fh; + HANDLE win32_view; +#endif +} SXE_MMAP; + +#define SXE_MMAP_ADDR(memmap) ((volatile void *)(memmap)->addr) + +#include "sxe-mmap-proto.h" + +#endif diff --git a/lib-sxe-mmap/sxe-spinlock.h b/lib-sxe-mmap/sxe-spinlock.h new file mode 100644 index 0000000..e868a14 --- /dev/null +++ b/lib-sxe-mmap/sxe-spinlock.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_SPINLOCK__ +#define __SXE_SPINLOCK__ + +#include +#include "sxe-log.h" + +/* If using Windows + */ +#ifdef _WIN32 + +#include +#include + +/* + * LONG __cdecl InterlockedCompareExchange( + * __inout LONG volatile *Destination, + * __in LONG Exchange, + * __in LONG Comparand); + */ + +#define SXE_GETTID() GetCurrentThreadId() +#define SXE_YIELD() SwitchToThread() + +#else + +#include +#include +#include + +#ifdef __FreeBSD__ +#include +#endif + +/* The following inline function is based on ReactOS; the license in the source file is MIT + * See: http://www.google.com/codesearch/p?hl=en#S3vzerue4i0/trunk/reactos/include/crt/mingw32/intrin_x86.h + */ + +#define __INTRIN_INLINE extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) + +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 + +static inline long +InterlockedCompareExchange(volatile long * const Destination, const long Exchange, const long Comperand) +{ + return __sync_val_compare_and_swap(Destination, Comperand, Exchange); +} + +static inline long +InterlockedExchangeAdd(long volatile * Addend, long Value) +{ + return __sync_add_and_fetch(Addend, Value); +} + +#ifdef __APPLE__ +#define SXE_GETTID() syscall(SYS_thread_selfid) +#elif defined(__FreeBSD__) +static inline long +sxe_gettid(void) +{ + long tid; + thr_self(&tid); + return tid; +} +#define SXE_GETTID() sxe_gettid() +#else +#define SXE_GETTID() syscall(SYS_gettid) +#endif + +#define SXE_YIELD() sched_yield() + +#else + +#error "Time to upgrade your compiler - gcc --version must be > 4.1.0" + +#endif +#endif + +extern unsigned sxe_spinlock_count_max; /* number of yields before spinlock fails */ + +typedef enum SXE_SPINLOCK_STATUS { + SXE_SPINLOCK_STATUS_NOT_TAKEN, /* Unable to take lock (maximum spin count reached) */ + SXE_SPINLOCK_STATUS_TAKEN, /* Took the lock */ + SXE_SPINLOCK_STATUS_ALREADY_TAKEN /* This thread already has the lock - don't double unlock! */ +} SXE_SPINLOCK_STATUS; + +typedef struct SXE_SPINLOCK { + volatile long lock; +} SXE_SPINLOCK; + +static inline void +sxe_spinlock_construct(SXE_SPINLOCK * spinlock) +{ + spinlock->lock = 0; +} + +static inline void +sxe_spinlock_force(SXE_SPINLOCK * spinlock, long tid) +{ + InterlockedCompareExchange(&spinlock->lock, tid, InterlockedCompareExchange(&spinlock->lock, tid, tid)); +} + +static inline SXE_SPINLOCK_STATUS +sxe_spinlock_take(SXE_SPINLOCK * spinlock) +{ + unsigned count = 0; + long our_tid = SXE_GETTID(); + long old_tid; + + while ((count < sxe_spinlock_count_max) && ((old_tid = InterlockedCompareExchange(&spinlock->lock, our_tid, 0)) != 0)) { + if (old_tid == our_tid) { + SXEL5("sxe_spinlock_take: Spinlock %p already held by our tid %ld", &spinlock->lock, our_tid); + return SXE_SPINLOCK_STATUS_ALREADY_TAKEN; + } + + count++; + SXE_YIELD(); + } + + if (count >= sxe_spinlock_count_max) { + SXEL3("sxe_spinlock_take failed: reached sxe_spinlock_count_max (%u)", sxe_spinlock_count_max); + return SXE_SPINLOCK_STATUS_NOT_TAKEN; + } + + return SXE_SPINLOCK_STATUS_TAKEN; +} + +static inline void +sxe_spinlock_give(SXE_SPINLOCK * spinlock) +{ + long our_tid = SXE_GETTID(); + long old_tid; + + SXEA1((old_tid = InterlockedCompareExchange(&spinlock->lock, 0, our_tid)) == our_tid, + "sxe_spinlock_give: Lock %p is held by thread %ld, not our tid %ld", &spinlock->lock, old_tid, our_tid); +} + +#endif diff --git a/lib-sxe-mmap/test/test-sxe-mmap.c b/lib-sxe-mmap/test/test-sxe-mmap.c new file mode 100644 index 0000000..66121a0 --- /dev/null +++ b/lib-sxe-mmap/test/test-sxe-mmap.c @@ -0,0 +1,244 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include /* for PATH_MAX on WIN32 */ +#include /* for PATH_MAX not on WIN32 */ +#include +#include + +#include "kit-process.h" +#include "kit-timestamp.h" +#include "sxe-log.h" +#include "sxe-mmap.h" +#include "sxe-spinlock.h" +#include "sxe-test-get-temp-file-name.h" +#include "sxe-util.h" +#include "tap.h" + +#define TEST_MMAP_INSTANCES 16 +#define TEST_FILE_SIZE (1024 * 1024 * 64) +#define TEST_WAIT KIT_TIMESTAMP_FROM_UNIX_TIME(5) + +#if SXE_COVERAGE +#define TEST_ITERATIONS 3000 +#else +#define TEST_ITERATIONS 5000 +//#define TEST_PING_PONGS 50000 // + 450000 /* uncomment this to play even more ping pong */ +#endif + +int +main(int argc, char ** argv) +{ + kit_timestamp_t start_time; + int fd; + unsigned instance = 0; /* e.g. master=0, slaves=1, 2, 3, etc */ + char *unique_memmap_path_and_file; /* e.g. /tmp/test-sxe-mmap-pid-1234.bin */ + char unique_memmap_path_and_file_master_buffer[PATH_MAX]; + unsigned unique_memmap_path_and_file_master_buffer_used; + SXE_MMAP memmap; + volatile unsigned *shared; + SXE_SPINLOCK *shared_spinlock; + intptr_t hell_spawn[TEST_MMAP_INSTANCES]; + unsigned i; + unsigned total; + unsigned count_hi = 0; +#ifdef TEST_PING_PONGS + kit_timestampt_t time_before; + kit_timestampt_t time_after; +#endif + + sxe_log_decrease_level(SXE_LOG_LEVEL_WARNING); /* A lot of stuff going on here, don't want logging to slow us down */ + + if (argc > 1) { + instance = atoi(argv[1]); + unique_memmap_path_and_file = argv[2] ; + + SXEL1("Instance %2u unique memmap path and file: %s", instance, unique_memmap_path_and_file); + sxe_mmap_open(&memmap, unique_memmap_path_and_file); + shared = SXE_MMAP_ADDR(&memmap); + SXEL1("Instance %2u about to set shared memory", instance); + shared[instance] = instance; + SXED1(SXE_CAST(char *, SXE_MMAP_ADDR(&memmap)), sizeof(long) * (TEST_MMAP_INSTANCES + 2)); + SXEL1("Instance %2u just set shared memory", instance); + SXEA1(shared[instance] == instance, "WTF! Thought I wrote %u", instance); + + shared_spinlock = SXE_CAST(SXE_SPINLOCK *, shared + 1024); + + InterlockedExchangeAdd(SXE_CAST(long *, &shared[0]), 1); + start_time = kit_timestamp_get(); + + while (shared[0] != TEST_MMAP_INSTANCES) { + SXEA1(kit_timestamp_get() < TEST_WAIT + start_time, "Timeout after %lu seconds", TEST_WAIT); + usleep(10000); + } + + SXEL1("Instance %2u ready to rumble", instance); + start_time = kit_timestamp_get(); + + for (i = 0; i < TEST_ITERATIONS; i++) { + SXEA1(kit_timestamp_get() < start_time + TEST_WAIT, "Unexpected timeout i=%u... is the hardware too slow?", i); + SXEA1(sxe_spinlock_take(&shared_spinlock[0]) == SXE_SPINLOCK_STATUS_TAKEN, + "Instance %2u failed to take lock", instance); + shared_spinlock[1 ].lock++; + shared_spinlock[1 + instance].lock++; + sxe_spinlock_give(&shared_spinlock[0]); + } + + start_time = kit_timestamp_get(); + + while (shared[0] != 0xDEADBABE) { + SXEA1(kit_timestamp_get() < start_time + TEST_WAIT, "Unexpected timeout... is the hardware too slow?"); + usleep(10000); + } + +#if defined TEST_PING_PONGS + /* Instance 1 will stick around to play some ping pong. + */ + if (instance == 1) { + for (i = 0; i < TEST_PING_PONGS; ) { + unsigned flag; + + SXEA1(sxe_spinlock_take(&shared_spinlock[0]) != SXE_SPINLOCK_STATUS_NOT_TAKEN, "Child failed to take lock"); + flag = shared[1]; + + if (flag == 0) { + shared[1] = 1; + i++; + } + + sxe_spinlock_give(&shared_spinlock[0]); + kit_timestamp_get(); /* causes child to delay very slightly */ + } + } +#endif + + sxe_mmap_close(&memmap); + SXEL1("Instance %2u exiting // count_hi %u", instance, count_hi); + return 0; + } + + plan_tests(3); + + sxe_test_get_temp_file_name("test-sxe-mmap-pool", unique_memmap_path_and_file_master_buffer, sizeof(unique_memmap_path_and_file_master_buffer), &unique_memmap_path_and_file_master_buffer_used); + unique_memmap_path_and_file = &unique_memmap_path_and_file_master_buffer[0]; + SXEL1("Instance %2d unique memmap path and file: %s", instance, unique_memmap_path_and_file); + + /* TODO: make creating the file faster on Windows! */ + + SXEL1("Instance %2d creating memory mapped file contents", instance); + SXEA1((fd = open(unique_memmap_path_and_file, O_CREAT | O_TRUNC | O_WRONLY, 0666)) >= 0, "Failed to create file '%s': %s" , unique_memmap_path_and_file, strerror(errno)); + SXEA1(ftruncate(fd, TEST_FILE_SIZE) >= 0, "Failed to extend the file to %u bytes: %s", TEST_FILE_SIZE, strerror(errno)); + close(fd); + + SXEL1("Instance %2d memory mapping file", instance); + sxe_mmap_open(&memmap, unique_memmap_path_and_file); + shared = SXE_MMAP_ADDR(&memmap); + + shared_spinlock = SXE_CAST(SXE_SPINLOCK *, shared + 1024); + sxe_spinlock_construct(&shared_spinlock[0]); + sxe_spinlock_construct(&shared_spinlock[1]); + shared[0] = 0; + + SXEL1("Instance %2d spawning slaves", instance); + for (instance = 1; instance <= TEST_MMAP_INSTANCES; instance++) { + char buffer[12]; + + SXEL1("Launching %d of %d", instance, TEST_MMAP_INSTANCES); + snprintf(buffer, sizeof(buffer), "%u", instance); + hell_spawn[instance - 1] = kit_spawnl(P_NOWAIT, argv[0], argv[0], buffer, unique_memmap_path_and_file, NULL); + SXEA1(hell_spawn[instance - 1] != -1, "Failed to spawn (%d of %d) '%s %s': %s", instance, TEST_MMAP_INSTANCES, argv[0], buffer, strerror(errno)); + } + + i = 0; + start_time = kit_timestamp_get(); + + while (i < TEST_MMAP_INSTANCES) { + SXEA1((TEST_WAIT + start_time ) > kit_timestamp_get(), "Unexpected timeout... is the hardware too slow?"); + usleep(10000); + + for (i = 0, instance = 1; instance <= TEST_MMAP_INSTANCES; instance++) { + i += shared[instance] == instance ? 1 : 0; + } + } + + is(i, TEST_MMAP_INSTANCES, "All %u children have set their instance numbers", TEST_MMAP_INSTANCES); + start_time = kit_timestamp_get(); + + while (shared_spinlock[1].lock < (TEST_MMAP_INSTANCES * TEST_ITERATIONS)) { + SXEA1((TEST_WAIT + start_time ) > kit_timestamp_get(), "Unexpected timeout... is the hardware too slow?"); + usleep(10000); + } + + total = 0; + + for (instance = 1; instance <= TEST_MMAP_INSTANCES; instance++) { + total += shared_spinlock[1 + instance].lock; + SXEL1("Instance %2u incremented %ld times", instance, shared_spinlock[1 + instance].lock); + } + + is(total , (TEST_MMAP_INSTANCES * TEST_ITERATIONS), "Total of counts is as expected"); + is(shared_spinlock[1].lock, (TEST_MMAP_INSTANCES * TEST_ITERATIONS), "Shared count is as expected"); + shared[0] = 0xDEADBABE; + +#if defined TEST_PING_PONGS + /* Play some ping pong; Instance 1 will go first. + */ + time_before = sxe_get_time_in_seconds(); + + for (i = 0; i < TEST_PING_PONGS; ) { + unsigned flag; + + SXEA1(sxe_spinlock_take(&shared_spinlock[0]) != SXE_SPINLOCK_STATUS_NOT_TAKEN, "Parent failed to take lock"); + flag = shared[1]; + + if (flag == 1) { + shared[1] = 0; + i++; + } + + sxe_spinlock_give(&shared_spinlock[0]); + } + + time_after = kit_timestamp_get(); + SXEL1("Just switched back and forth %u times in %lu seconds, or %lu times/second", TEST_PING_PONGS, + kit_timestamp_to_unix_time(time_after - time_before), + kit_timestamp_to_unix_time(TEST_PING_PONGS / (time_after - time_before))); +#endif + + start_time = kit_timestamp_get(); + + for (instance = 0; instance < TEST_MMAP_INSTANCES; instance++) { + SXEA1(TEST_WAIT + start_time > kit_timestamp_get(), "Unexpected timeout... is the hardware too slow?"); + SXEA1(kit_cwait(NULL, hell_spawn[instance], WAIT_CHILD) == hell_spawn[instance], + "Unexpected return from cwait for process %"PRIdPTR": %s", hell_spawn[instance], strerror(errno)); + } + + sxe_mmap_close(&memmap); + SXEL1("Instance %02d unlinking: %s", instance, unique_memmap_path_and_file); + unlink(unique_memmap_path_and_file); + SXEL1("Instance %02d exiting // master", instance); + return exit_status(); +} diff --git a/lib-sxe-pool/GNUmakefile b/lib-sxe-pool/GNUmakefile new file mode 100644 index 0000000..0377a59 --- /dev/null +++ b/lib-sxe-pool/GNUmakefile @@ -0,0 +1,6 @@ +LIBRARIES = sxe-pool +include ../dependencies.mak + +ifneq ($(OS),Windows_NT) + LINK_FLAGS += -lpthread +endif diff --git a/lib-sxe-pool/sxe-pool-private.h b/lib-sxe-pool/sxe-pool-private.h new file mode 100644 index 0000000..1d81e6f --- /dev/null +++ b/lib-sxe-pool/sxe-pool-private.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/** + * This file includes details of the pool implementation that are shared between its modules only. + */ + +#include "sxe-pool.h" +#include "sxe-spinlock.h" +#include "sxe-util.h" + +#define SXE_POOL_ARRAY_TO_IMPL(array) ((SXE_POOL_IMPL *)(void *)(array) - 1) +#define SXE_POOL_IMPL_TO_ARRAY(impl) ((void *)((SXE_POOL_IMPL *)(impl) + 1)) +#define SXE_POOL_NODES(impl) SXE_PTR_FIX(impl, SXE_POOL_NODE *, (impl)->nodes) +#define SXE_POOL_QUEUE(impl) SXE_PTR_FIX(impl, SXE_LIST *, (impl)->queue) + +typedef struct SXE_POOL_NODE { + SXE_LIST_NODE list_node; + union { + kit_timestamp_t time; + uint64_t count; + } last; +} SXE_POOL_NODE; + +typedef struct SXE_POOL_IMPL { + SXE_SPINLOCK spinlock; + char name[SXE_POOL_NAME_MAXIMUM_LENGTH + 1]; + unsigned options; + unsigned number; + size_t size; + unsigned states; + SXE_POOL_NODE *nodes; + SXE_LIST *queue; + SXE_POOL_EVENT_TIMEOUT event_timeout; + void *caller_info; + kit_timestamp_t *state_timeouts; + SXE_LIST_NODE timeout_node; + uint64_t next_count; + const char * (*state_to_string)(unsigned state); +} SXE_POOL_IMPL; + +static inline SXE_POOL_NODE * +sxe_pool_node_from_list_node(SXE_LIST_NODE * list_node) +{ + return (SXE_POOL_NODE *)(void *)((char *)list_node - offsetof(SXE_POOL_NODE, list_node)); +} + +/* Locking primitives - danger Will Robinson! */ + +static inline unsigned +sxe_pool_lock(SXE_POOL_IMPL * pool) +{ + unsigned result = SXE_POOL_LOCK_TAKEN; + + if (!(pool->options & SXE_POOL_OPTION_LOCKED)) { /* Not locked - take it and go! */ + return SXE_POOL_LOCK_TAKEN; + } + + SXEE6("sxe_pool_lock(pool->name=%s)", pool->name); + + if (sxe_spinlock_take(&pool->spinlock) != SXE_SPINLOCK_STATUS_TAKEN) { + result = SXE_POOL_LOCK_NOT_TAKEN; + } + +SXE_EARLY_OR_ERROR_OUT: + SXER6("return %s", result == SXE_POOL_LOCK_NOT_TAKEN ? "SXE_POOL_LOCK_NOT_TAKEN" : "SXE_POOL_LOCK_TAKEN"); + return result; +} + +static inline void +sxe_pool_unlock(SXE_POOL_IMPL * pool) +{ + if (!(pool->options & SXE_POOL_OPTION_LOCKED)) { /* Not locked - GTFO! */ + return; + } + + SXEE6("sxe_pool_unlock(pool->name=%s)", pool->name); + sxe_spinlock_give(&pool->spinlock); + SXER6("return"); +} + +static inline const char * +sxe_pool_return_to_string(unsigned result) +{ + static char string[12]; + + switch (result) { + case SXE_POOL_NO_INDEX: return "SXE_POOL_NO_INDEX"; + case SXE_POOL_LOCK_TAKEN: return "SXE_POOL_LOCK_TAKEN"; + case SXE_POOL_LOCK_NOT_TAKEN: return "SXE_POOL_LOCK_NOT_TAKEN"; + default: snprintf(string, sizeof(string), "%u", result); return string; + } +} diff --git a/lib-sxe-pool/sxe-pool-walker.c b/lib-sxe-pool/sxe-pool-walker.c new file mode 100644 index 0000000..a5fab3b --- /dev/null +++ b/lib-sxe-pool/sxe-pool-walker.c @@ -0,0 +1,135 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-pool-private.h" + +/** + * Construct a pool state walker (AKA iterator) + * + * @param walker Pointer to the walker + * @param array Pointer to the pool array + * @param state State to walk + * + * @exception If the pool is both locked and timed, it cannot be walked safely + */ +void +sxe_pool_walker_construct(SXE_POOL_WALKER * walker, void * array, unsigned state) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXEE6("sxe_pool_walker_construct(walker=%p,pool=%s,state=%s)", walker, pool->name, (*pool->state_to_string)(state)); + SXEA1(!((pool->options & SXE_POOL_OPTION_LOCKED) && (pool->options & SXE_POOL_OPTION_TIMED)), + "sxe_pool_walker_construct: Can't walk thread safe timed pool %s safely", pool->name); + sxe_list_walker_construct(&walker->list_walker, &SXE_POOL_QUEUE(pool)[state]); + walker->pool = pool; + walker->state = state; + + if (pool->options & SXE_POOL_OPTION_TIMED) { + walker->last.time = 0.0; + } + else { + walker->last.count = 0; + } + + SXER6("return"); +} + +/** + * Step to the next object in a pool state + * + * @param walker Pointer to the pool state walker + * + * @return Index of the next object or SXE_POOL_NO_INDEX if the end of the state queue has been reached. + * + * @note Thread safety is implemented by verifying that the last node stepped to is still in the same state queue. If it is not, + * the state queue is rewalked to find a node with a time or count greater than or equal to the time that the last stepped + * to node had when it was stepped to. + */ +unsigned +sxe_pool_walker_step(SXE_POOL_WALKER * walker) +{ + const SXE_POOL_NODE *node; + SXE_POOL_IMPL *pool = walker->pool; + unsigned result; + + SXEE6("sxe_pool_walker_step(walker=%p)", walker); + + if ((result = sxe_pool_lock(pool)) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; + } + + /* If not at the head of the state queue and the current object has been moved to another state. + */ + if (((node = sxe_list_walker_find(&walker->list_walker)) != NULL) + && (SXE_LIST_NODE_GET_ID(&node->list_node) != walker->state)) + /* TODO: Check for touching */ + { + SXEL6("sxe_pool_walker_step: node %td moved from state %s to state %s by another thread", node - SXE_POOL_NODES(pool), + (*pool->state_to_string)(walker->state), (*pool->state_to_string)(SXE_LIST_NODE_GET_ID(&node->list_node))); + + /* If there is a previous object and it has not been moved, get the new next one. + */ + if (((node = sxe_list_walker_back(&walker->list_walker)) != NULL) + && (SXE_LIST_NODE_GET_ID(&node->list_node) == walker->state)) + /* TODO: Check for touching */ + { + node = sxe_list_walker_step(&walker->list_walker); + } + else { + sxe_list_walker_construct(&walker->list_walker, &SXE_POOL_QUEUE(pool)[walker->state]); + + while ((node = sxe_list_walker_step(&walker->list_walker)) != NULL) { + if (pool->options & SXE_POOL_OPTION_TIMED) { + if (node->last.time >= walker->last.time) { /* Coverage Exclusion: TODO refactor SXE_POOL_TIME */ + break; /* Coverage Exclusion: TODO refactor SXE_POOL_TIME */ + } + } + else { + if (node->last.count >= walker->last.count) { + break; + } + } + } + } + } + else { + node = sxe_list_walker_step(&walker->list_walker); + } + + result = SXE_POOL_NO_INDEX; + + if (node != NULL) { + result = (unsigned)(node - SXE_POOL_NODES(pool)); + + if (pool->options & SXE_POOL_OPTION_TIMED) { + walker->last.time = node->last.time; + } + else { + walker->last.count = node->last.count; + } + } + + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %s", sxe_pool_return_to_string(result)); + return result; +} diff --git a/lib-sxe-pool/sxe-pool.c b/lib-sxe-pool/sxe-pool.c new file mode 100644 index 0000000..2d7b9b6 --- /dev/null +++ b/lib-sxe-pool/sxe-pool.c @@ -0,0 +1,743 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mock.h" +#include "sxe-list.h" +#include "sxe-log.h" +#include "sxe-pool-private.h" +#include "sxe-spinlock.h" +#include "sxe-util.h" + +#define SXE_POOL_ON_INCORRECT_STATE_RETURN_FALSE 0 +#define SXE_POOL_ON_INCORRECT_STATE_ABORT 1 +#define SXE_POOL_ASSERT_ARRAY_INITIALIZED(array) SXEA6((array) != NULL, "%s(array=NULL): Uninitialized pool?", __func__) + +static SXE_LIST sxe_pool_timeout_list; +static unsigned sxe_pool_timeout_count = 0; + +/* Diagnostic function to convert state to string if none is supplied by the user + */ +static const char * +sxe_pool_state_to_string(unsigned state) +{ +#define MAX_NUMBER_AS_STRING_LENGTH 12 +#define MAX_STRINGS_IN_SINGLE_ASSERT 10 + static char string[MAX_STRINGS_IN_SINGLE_ASSERT][MAX_NUMBER_AS_STRING_LENGTH]; + static unsigned which = 0; + char * result; + + result = string[which]; + which = (which + 1) % (sizeof(string) / sizeof(string[which])); + snprintf(result, sizeof(string[which]), "%u", state); + return result; +} + +const char * +sxe_pool_get_name(void * array) +{ + /* No diagnostics, thanks, since this is only used for diagnostics */ + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + return SXE_POOL_ARRAY_TO_IMPL(array)->name; +} + +unsigned +sxe_pool_get_number_in_state(void * array, unsigned state) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + unsigned count; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_get_number_in_state(pool=%s,state=%s)", pool->name, (*pool->state_to_string)(state)); + count = SXE_LIST_GET_LENGTH(&SXE_POOL_QUEUE(pool)[state]); + SXER6("return %u", count); + return count; +} + +unsigned +sxe_pool_index_to_state(void * array, unsigned id) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + unsigned state; + + SXEE6("sxe_pool_index_to_state(name=%s,id=%u)", pool->name, id); + SXEA1(id < pool->number, "sxe_pool_index_to_state(pool=%s,id=%u): Index is too big for pool (max index=%u)", + pool->name, id, pool->number); + state = SXE_LIST_NODE_GET_ID(&SXE_POOL_NODES(pool)[id].list_node); + SXER6("return %s", (*pool->state_to_string)(state)); + return state; +} + +/** + * Determine the size in bytes of a new pool of objects of size with states + * + * @return The size of the pool + */ +size_t +sxe_pool_size(unsigned number, size_t size, unsigned states) +{ + size_t pool_size; + + SXEE6("sxe_pool_size(number=%u,size=%"PRIuPTR",states=%u)", number, size, states); + pool_size = 1 * sizeof(SXE_POOL_IMPL) /* pool impl structure */ + + number * size /* user objects */ + + states * sizeof(SXE_LIST) /* state queue heads */ + + number * sizeof(SXE_POOL_NODE); /* internal nodes */ + + SXER6("return pool_size=%zu", pool_size); + return pool_size; +} + +/** + * Construct a new pool of objects of size with states + * + * @param options = SXE_POOL_OPTION_LOCKED for thread safety + * SXE_POOL_OPTION_TIMED to keep the time of last insertion for each node + * + * @return A pointer to the array of objects + * + * @note The base pointer must point at a region of memory big enough to hold the pool size (see sxe_pool_size()) + */ +void * +sxe_pool_construct(void * base, const char * name, unsigned number, size_t size, unsigned states, unsigned options) +{ + SXE_POOL_IMPL * pool; + unsigned i; + SXE_LOG_LEVEL log_level_saved; + kit_timestamp_t current_time = 0; /* Initialize to shut the compiler up */ + + SXEE6("sxe_pool_construct(base=%p,name=%s,number=%u,size=%"PRIuPTR",states=%u,options=%u)", + base, name, number, size, states, options); + SXEL6("Constructing pool: %16s: %10zu byte pool structure", name, sizeof(SXE_POOL_IMPL)); + SXEL6("Constructing pool: %16s: %10"PRIuPTR" bytes = %10u * %10"PRIuPTR" byte objects", name, size * number, number, size); + SXEL6("Constructing pool: %16s: %10zu bytes = %10u * %10zu byte state queue heads", + name, (size_t) states * sizeof(SXE_LIST), states, sizeof(SXE_LIST)); + SXEL6("Constructing pool: %16s: %10zu bytes = %10u * %10zu byte internal nodes", + name, sizeof(SXE_POOL_NODE)* number, number, sizeof(SXE_POOL_NODE)); + + pool = (SXE_POOL_IMPL *)base; + pool->queue = (SXE_LIST *)(sizeof(SXE_POOL_IMPL) + number * size); + pool->nodes = (SXE_POOL_NODE *)(sizeof(SXE_POOL_IMPL) + number * size + states * sizeof(SXE_LIST)); + pool->number = number; + pool->size = size; + pool->states = states; + pool->options = options; + pool->state_to_string = &sxe_pool_state_to_string; // Default to just printing the number + pool->state_timeouts = NULL; // If set, this pointer will be freed by the delete + + if (options & SXE_POOL_OPTION_LOCKED) + sxe_spinlock_construct(&pool->spinlock); + + if (pool->name != name) /* May be re-constructed using our own name */ + strlcpy(pool->name, name, sizeof(pool->name)); + + pool->event_timeout = NULL; + +#ifdef DEBUG + memset(SXE_POOL_QUEUE(pool), 0xBE, sizeof(SXE_POOL_NODE) * states); +#endif + + for (i = states; i-- > 0; ) { + SXE_LIST_CONSTRUCT(&SXE_POOL_QUEUE(pool)[i], i, SXE_POOL_NODE, list_node); + } + + SXEL6("Construct the free list"); + + log_level_saved = sxe_log_decrease_level(SXE_LOG_LEVEL_DEBUG); /* Shut up logging on every node */ + + if (options & SXE_POOL_OPTION_TIMED) { + current_time = kit_timestamp_get(); + } + else { + pool->next_count = 0; + } + + for (i = number; i-- > 0; ) { + if (options & SXE_POOL_OPTION_TIMED) { + SXE_POOL_NODES(pool)[i].last.time = current_time; + } + else { + SXE_POOL_NODES(pool)[i].last.count = ++pool->next_count; + } + + sxe_list_push(&SXE_POOL_QUEUE(pool)[0], &SXE_POOL_NODES(pool)[i].list_node); + } + + sxe_log_set_level(log_level_saved); + SXER6("return array=%p // pool=%p, pool->nodes=%p, pool->name=%s", pool + 1, pool, SXE_POOL_NODES(pool), pool->name); + return pool + 1; +} + +/** + * Get the a pointer to a pool from a base pointer; this can be used to get at the array in a memory mapped pool + * + * @return A pointer to the array of objects + */ +void * +sxe_pool_from_base(void * base) +{ + void * array; + + SXEE6("sxe_pool_from_base(base=%p)", base); + array = SXE_POOL_IMPL_TO_ARRAY(base); + SXER6("return array=%p", array); + return array; +} + +/** + * Get the base pointer from a pool array pointer; this is used (for example) by sxe-hash to get at a hash array's base pointer + * + * @return A pointer to the base of the pool + */ +void * +sxe_pool_to_base(void * array) +{ + void * base; + + SXEE6("sxe_pool_to_base(array=%p)", array); + base = SXE_POOL_ARRAY_TO_IMPL(array); + SXER6("return base=%p", base); + return base; +} + +/** + * Allocate and construct a new pool of objects of size with states + * + * @param name Name of pool; pointer to '\0' terminated string + * @param number Number of elements in the pool + * @param size Size of each element in the pool + * @param states Number of states each element can be in + * @param options SXE_POOL_OPTION_UNLOCKED for speed or SXE_POOL_OPTION_LOCKED for thread safety, and SXE_POOL_OPTION_TIMED to + * support timed operations; bit mask, combined with | operator + * + * @return A pointer to the array of objects + * + * @exception Aborts on failure to allocate memory + */ +void * +sxe_pool_new(const char * name, unsigned number, size_t size, unsigned states, unsigned options) +{ + void * base; + void * array; + + SXEE6("sxe_pool_new(name=%s,number=%u,size=%"PRIiPTR",states=%u,options=%sSXE_POOL_OPTION_LOCKED|%sSXE_POOL_OPTION_TIMED)", + name, number, size, states, options & SXE_POOL_OPTION_LOCKED ? "" : "!", options & SXE_POOL_OPTION_TIMED ? "" : "!"); + SXEA1((base = kit_malloc(sxe_pool_size(number, size, states))) != NULL, "Error allocating SXE pool %s", name); + + array = sxe_pool_construct(base, name, number, size, states, options); + +#if SXE_DEBUG + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + SXER6("return array=%p // pool=%p, pool->nodes=%p, pool->name=%s", array, pool, SXE_POOL_NODES(pool), pool->name); +#endif + + return array; +} + +/** + * @note Pools with timeouts are not currently relocatable or thread safe. + */ +void * +sxe_pool_new_with_timeouts( + const char *name, + unsigned number, + size_t size, + unsigned states, + const kit_timestamp_t *timeouts, + SXE_POOL_EVENT_TIMEOUT callback, + void *caller_info) +{ + char *array; + SXE_POOL_IMPL *pool; + unsigned i; + + SXEE6("sxe_pool_new_with_timeouts(name=%s,number=%u,size=%"PRIuPTR",states=%u,timeouts=%p,callback=%p,caller_info=%p)", + name, number, size, states, timeouts, callback, caller_info); + SXEA1(callback != NULL, "Internal: timeout callback must be a real address of a function"); + + /* If it feels like the first time... + */ + if (sxe_pool_timeout_count == 0) { + SXE_LIST_CONSTRUCT(&sxe_pool_timeout_list, 0, SXE_POOL_IMPL, timeout_node); + } + + array = sxe_pool_new(name, number, size, states, SXE_POOL_OPTION_TIMED); + pool = SXE_POOL_ARRAY_TO_IMPL(array); + pool->event_timeout = callback; + pool->caller_info = caller_info; + pool->state_timeouts = kit_malloc(states * sizeof(kit_timestamp_t)); + + SXEA1(pool->state_timeouts != NULL, "Error allocating SXE pool %s; state timeout array", name); + SXEL6("allocated %zu bytes to hold %u state timeouts", states * sizeof(*timeouts), states); + + for (i = 0; i < states; i++) { + SXEL6("state %u has timeout %lu", i, timeouts[i]); + pool->state_timeouts[i] = timeouts[i]; + } + + sxe_list_push(&sxe_pool_timeout_list, pool); + sxe_pool_timeout_count++; + + /* TODO: need sxe_pool_construct_with_timeouts() for pools with timeouts using spinlocks */ + + SXER6("return array=%p", array); + return array; +} + +void +sxe_pool_set_state_to_string(void * array, const char * (*state_to_string)(unsigned state)) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_set_state_to_string(pool=%s,state_to_string=%p)", pool->name, state_to_string); + pool->state_to_string = state_to_string; + SXER6("return"); +} + +void +sxe_pool_check_timeouts(void) +{ + SXE_LIST_WALKER walker; + SXE_POOL_IMPL *pool; + kit_timestamp_t time_now; + kit_timestamp_t timeout_for_this_state; + kit_timestamp_t time_oldest_for_this_state; + kit_timestamp_t time_oldest_for_this_state_last; + unsigned state; + unsigned index_oldest_for_this_state; + unsigned index_oldest_for_this_state_last; + + SXEE6("sxe_pool_check_timeouts()"); + sxe_list_walker_construct(&walker, &sxe_pool_timeout_list); + time_now = kit_timestamp_get(); + + while ((pool = (SXE_POOL_IMPL *)sxe_list_walker_step(&walker)) != NULL) { + for (state = 0; state < pool->states; state++) { + timeout_for_this_state = pool->state_timeouts[state]; + + if (timeout_for_this_state == 0) { + SXEL6("state %s timeout is infinite; ignoring", (*pool->state_to_string)(state)); + continue; + } + + index_oldest_for_this_state_last = SXE_POOL_NO_INDEX; + time_oldest_for_this_state_last = 0; + + for (;;) { + index_oldest_for_this_state = sxe_pool_get_oldest_element_index(SXE_POOL_IMPL_TO_ARRAY(pool), state); + + if (SXE_POOL_NO_INDEX == index_oldest_for_this_state) { + SXEL6("state %s timeout %" PRIu64 " has no elements", (*pool->state_to_string)(state), timeout_for_this_state); + break; + } + + time_oldest_for_this_state = sxe_pool_get_oldest_element_time(SXE_POOL_IMPL_TO_ARRAY(pool), state); + + SXEA1( (index_oldest_for_this_state_last != index_oldest_for_this_state) + || (time_oldest_for_this_state != time_oldest_for_this_state_last), + "Internal: callback failed to update state on pool element with timed out"); + + if ((time_now - time_oldest_for_this_state) < timeout_for_this_state) { + SXEL6("state %s timeout %" PRIu64 " has not been reached for oldest index %u", (*pool->state_to_string)(state), + timeout_for_this_state, index_oldest_for_this_state); + break; + } + + SXEL6("state %s timeout %" PRIu64 " has been reached for oldest index %u", (*pool->state_to_string)(state), + timeout_for_this_state, index_oldest_for_this_state); + (*pool->event_timeout)(SXE_POOL_IMPL_TO_ARRAY(pool), index_oldest_for_this_state, pool->caller_info); + index_oldest_for_this_state_last = index_oldest_for_this_state; + time_oldest_for_this_state_last = time_oldest_for_this_state; + } + } + } + + SXER6("return"); +} + +/** + * Internal lockless function to move a specific object from one state queue to the tail of another + */ +static bool +sxe_pool_set_indexed_element_state_unlocked(void * array, unsigned id, unsigned old_state, unsigned new_state, + unsigned on_incorrect_state) +{ + bool success = false; + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + SXE_POOL_NODE * node; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("(pool=%s,id=%u,old_state=%s,new_state=%s,on_incorrect_state=%s)", + pool->name, id, (*pool->state_to_string)(old_state), (*pool->state_to_string)(new_state), + on_incorrect_state == SXE_POOL_ON_INCORRECT_STATE_ABORT ? "ABORT" : "RETURN_FALSE"); + + node = &SXE_POOL_NODES(pool)[id]; + + if (SXE_LIST_NODE_GET_ID(&node->list_node) != old_state) { + SXEA1(on_incorrect_state != SXE_POOL_ON_INCORRECT_STATE_ABORT, + "sxe_pool_set_indexed_element_state_unlocked(pool=%s,id=%"PRIdPTR",old_state=%s,new_state=%s): Object is in state %s", + pool->name, (intptr_t)(node - SXE_POOL_NODES(pool)), (*pool->state_to_string)(old_state), + (*pool->state_to_string)(new_state), (*pool->state_to_string)(SXE_LIST_NODE_GET_ID(&node->list_node))); + goto SXE_EARLY_OUT; + } + + sxe_list_remove(&SXE_POOL_QUEUE(pool)[old_state], node); + + if (pool->options & SXE_POOL_OPTION_TIMED) { + node->last.time = kit_timestamp_get(); + } + else { + node->last.count = ++pool->next_count; + } + + sxe_list_push(&SXE_POOL_QUEUE(pool)[new_state], &node->list_node); + success = true; + +SXE_EARLY_OUT: + SXER6("return %s", success ? "true // success" : "false // failure"); + return success; +} + +/** + * Move a specific object from one state queue to the tail of another + */ +unsigned +sxe_pool_set_indexed_element_state(void * array, unsigned id, unsigned old_state, unsigned new_state) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + unsigned result = SXE_POOL_LOCK_TAKEN; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXE_UNUSED_PARAMETER(old_state); /* Used to verify sanity in debug build only */ + SXEE6("(pool=%s,id=%u,old_state=%s,new_state=%s)", + pool->name, id, (*pool->state_to_string)(old_state), (*pool->state_to_string)(new_state)); + SXEA6(id < pool->number, "sxe_pool_set_indexed_element_state(pool=%s,id=%u): Index is too big (number=%u)", + pool->name, id, pool->number); + SXEA6(old_state <= pool->states, "state %u is greater than maximum state %u for pool %s", old_state, pool->states, pool->name); + SXEA6(new_state <= pool->states, "state %u is greater than maximum state %u for pool %s", new_state, pool->states, pool->name); + + if ((result = sxe_pool_lock(pool)) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; + } + + SXEA1(sxe_pool_set_indexed_element_state_unlocked(array, id, old_state, new_state, SXE_POOL_ON_INCORRECT_STATE_ABORT), + "sxe_pool_set_indexed_element_state_unlocked failed: internal fatal error"); + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %s", sxe_pool_return_to_string(result)); + return result; +} + +/** + * Try to move a specific object from one state queue to the tail of another + * + * @param array Pointer to the pool array + * @param id Index of the element to move + * @param old_state State to move the element from + * @param new_state_inout Pointer to a state; on input, state to move the element from; on output, state the element is in + * + * @return id if the move occured (in this case, *new_state_inout will be unchanged); SXE_POOL_INCORRECT_STATE if the element + * was not in old_state (in this case, *new_state_inout will be set to the state the element was in); or + * SXE_POOL_LOCK_NOT_TAKEN if the pool lock could not be taken + */ +unsigned +sxe_pool_try_to_set_indexed_element_state(void * array, unsigned id, unsigned old_state, unsigned * new_state_inout) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + unsigned result = SXE_POOL_INCORRECT_STATE; + + SXE_UNUSED_PARAMETER(old_state); /* Used to verify sanity in debug build only */ + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_set_indexed_element_state(pool=%s,id=%u,old_state=%s,new_state=%s)", + pool->name, id, (*pool->state_to_string)(old_state), (*pool->state_to_string)(*new_state_inout)); + SXEA6(id < pool->number, "sxe_pool_set_indexed_element_state(pool=%s,id=%u): Index is too big (number=%u)", + pool->name, id, pool->number); + SXEA6(old_state <= pool->states, "state %u is greater than maximum state %u for pool %s", old_state, pool->states, pool->name); + SXEA6(*new_state_inout <= pool->states, "state %u is greater than maximum state %u for pool %s", *new_state_inout, pool->states, pool->name); + + if (sxe_pool_lock(pool) == SXE_POOL_LOCK_NOT_TAKEN) { + result = SXE_POOL_LOCK_NOT_TAKEN; /* Coverage exclusion: Add tests before using in multiprocess code */ + goto SXE_ERROR_OUT; /* Coverage exclusion: Add tests before using in multiprocess code */ + } + + if (sxe_pool_set_indexed_element_state_unlocked(array, id, old_state, *new_state_inout, SXE_POOL_ON_INCORRECT_STATE_RETURN_FALSE)) { + result = id; + } + else { + *new_state_inout = sxe_pool_index_to_state(array, id); + } + + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6((result == SXE_POOL_LOCK_NOT_TAKEN || result == SXE_POOL_INCORRECT_STATE) ? "return %s" : "return %p", + result == SXE_POOL_LOCK_NOT_TAKEN ? "LOCK_NOT_TAKEN" : + result == SXE_POOL_INCORRECT_STATE ? "INCORRECT_STATE" : + SXE_CAST(const char *, result)); + return result; +} + +/** + * Move an object from a the head of one state queue to the tail of another + * + * @return Object's index or SXE_POOL_NO_INDEX if there are no objects in the first state + */ +unsigned +sxe_pool_set_oldest_element_state(void * array, unsigned old_state, unsigned new_state) +{ + SXE_POOL_IMPL *pool = SXE_POOL_ARRAY_TO_IMPL(array); + const SXE_POOL_NODE *node; + unsigned result = SXE_POOL_LOCK_TAKEN; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_set_oldest_element_state(pool=%s, old_state=%s, new_state=%s)", + pool->name, (*pool->state_to_string)(old_state), (*pool->state_to_string)(new_state)); + SXEA6(old_state <= pool->states, "old state %u is greater than maximum state %u for pool %s", old_state, pool->states, + pool->name); + SXEA6(new_state <= pool->states, "new state %u is greater than maximum state %u for pool %s", new_state, pool->states, + pool->name); + + if ((result = sxe_pool_lock(pool)) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; + } + + if ((node = sxe_list_peek_head(&SXE_POOL_QUEUE(pool)[old_state])) == NULL) { + SXEL6("sxe_pool_set_oldest_element_state(pool=%s): No objects in state %s; returning SXE_POOL_NO_INDEX", + pool->name, (*pool->state_to_string)(old_state)); + result = SXE_POOL_NO_INDEX; + goto SXE_EARLY_OUT; + } + + result = (unsigned)(node - SXE_POOL_NODES(pool)); + SXEA1(sxe_pool_set_indexed_element_state_unlocked(array, result, old_state, new_state, SXE_POOL_ON_INCORRECT_STATE_ABORT), + "sxe_pool_set_indexed_element_state_unlocked failed: internal fatal error"); + +SXE_EARLY_OUT: + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %s", sxe_pool_return_to_string(result)); + return result; +} + +/** + * Update an object's time of use and move it to the back its current state queue + */ +unsigned +sxe_pool_touch_indexed_element(void * array, unsigned id) +{ + SXE_POOL_IMPL *pool = SXE_POOL_ARRAY_TO_IMPL(array); + unsigned result = SXE_POOL_LOCK_TAKEN; + const SXE_POOL_NODE *node; + unsigned state; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_touch_indexed_element(pool=%s,id=%u)", pool->name, id); + + if ((result = sxe_pool_lock(pool)) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; + } + + node = &SXE_POOL_NODES(pool)[id]; + state = SXE_LIST_NODE_GET_ID(&node->list_node); + SXEA1(sxe_pool_set_indexed_element_state_unlocked(array, id, state, state, SXE_POOL_ON_INCORRECT_STATE_ABORT), + "sxe_pool_set_indexed_element_state_unlocked failed: internal fatal error"); + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %s", sxe_pool_return_to_string(result)); + return result; +} + +/** + * Get the index of the oldest object in a given state (or SXE_POOL_NO_INDEX if none) + */ +unsigned +sxe_pool_get_oldest_element_index(void * array, unsigned state) +{ + SXE_POOL_IMPL *pool = SXE_POOL_ARRAY_TO_IMPL(array); + const SXE_POOL_NODE *node; + unsigned id = SXE_POOL_NO_INDEX; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_get_oldest_element_index(pool=%s, state=%s)", pool->name, (*pool->state_to_string)(state)); + SXEA6(state <= pool->states, "state %u is greater than maximum state %u for pool %s", state, pool->states, pool->name); + + if (sxe_pool_lock(pool) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; /* Coverage exclusion: Add tests before using in multiprocess code */ + } + + if ((node = sxe_list_peek_head(&SXE_POOL_QUEUE(pool)[state])) == NULL) { + SXEL6("No objects in state %s, returning SXE_POOL_NO_INDEX", (*pool->state_to_string)(state)); + goto SXE_EARLY_OUT; + } + + id = (unsigned)(node - SXE_POOL_NODES(pool)); + +SXE_EARLY_OUT: + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %s", sxe_pool_return_to_string(id)); + return id; +} + +/* + * Get the time or count of the oldest object in a given state (or 0 if none) + * + * @param array = Pointer to the pool array + * @param state = State to check + */ +static kit_timestamp_t +sxe_pool_impl_get_oldest_element_time_or_count(SXE_POOL_IMPL * pool, unsigned state) +{ + const SXE_POOL_NODE *node; + kit_timestamp_t last_time = 0; + + SXEE6("sxe_pool_get_oldest_element_%s(pool=%s, state=%s)", pool->options & SXE_POOL_OPTION_TIMED ? "time" : "count", + pool->name, (*pool->state_to_string)(state)); + + if (sxe_pool_lock(pool) == SXE_POOL_LOCK_NOT_TAKEN) { + goto SXE_ERROR_OUT; /* Coverage exclusion: Add tests before using in multiprocess code */ + } + + if ((node = sxe_list_peek_head(&SXE_POOL_QUEUE(pool)[state])) == NULL) { + SXEL6("No objects in state %s", (*pool->state_to_string)(state)); + goto SXE_EARLY_OUT; + } + + last_time = node->last.time; + +SXE_EARLY_OUT: + sxe_pool_unlock(pool); + +SXE_ERROR_OUT: + SXER6("return %" PRIu64, last_time); + return last_time; +} + +/** + * Get the last use time of the oldest object in a given state (or 0 if none) + * + * @param array = Pointer to the pool array + * @param state = State to check + * + * @exception Aborts if the pool is not a timed pool + */ +kit_timestamp_t +sxe_pool_get_oldest_element_time(void * array, unsigned state) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEA1(pool->options & SXE_POOL_OPTION_TIMED, "%s: pool %s is a timed pool", __func__, pool->name); + return sxe_pool_impl_get_oldest_element_time_or_count(pool, state); +} + +/** + * Get the insertion counter of the oldest object in a given state (or 0 if none) + * + * @param array = Pointer to the pool array + * @param state = State to check + * + * @exception Aborts if the pool is a timed pool + */ +uint64_t +sxe_pool_get_oldest_element_count(void * array, unsigned state) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEA1(!(pool->options & SXE_POOL_OPTION_TIMED), "%s: pool %s is a timed pool", __func__, pool->name); + return sxe_pool_impl_get_oldest_element_time_or_count(pool, state); +} + +/** + * Get the time of a given object has been in it's current state, by index + * + * @param array = Pointer to the pool array + * @param state = State to check + * + * @exception Release mode assertion: the pool must be a timed pool + */ +kit_timestamp_t +sxe_pool_get_element_time_by_index(void * array, unsigned element) +{ + SXE_POOL_IMPL *pool = SXE_POOL_ARRAY_TO_IMPL(array); + const SXE_POOL_NODE *node; + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_get_element_time_by_index(pool=%s, index=%u)", pool->name, element); + SXEA1(pool->options & SXE_POOL_OPTION_TIMED, "sxe_pool_get_element_time_by_index: pool %s is not a timed pool", pool->name); + SXEA6(element < pool->number, "index %u is greater than maximum index %u for pool %s", element, pool->number, pool->name); + + node = &SXE_POOL_NODES(pool)[element]; + +SXE_EARLY_OR_ERROR_OUT: + SXER6("return %llu", (unsigned long long) node->last.time); + return node->last.time; +} + +void +sxe_pool_delete(void * array) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_delete(pool=%s)", pool->name); + + if (pool->event_timeout != NULL) { + SXEA1(sxe_list_remove(&sxe_pool_timeout_list, pool) == pool, "Remove always returns the object removed"); + } + + if (pool->state_timeouts) + kit_free(pool->state_timeouts); + + kit_free(pool); + SXER6("return"); +} + +/** + * Reset the lock on a pool + * + * @note Don't call this functions unless you really know what you're doing! + */ +void +sxe_pool_override_locked(void * array) +{ + SXE_POOL_IMPL * pool = SXE_POOL_ARRAY_TO_IMPL(array); + + SXE_POOL_ASSERT_ARRAY_INITIALIZED(array); + SXEE6("sxe_pool_override_locked(pool=%s)", pool->name); + + if (pool->options & SXE_POOL_OPTION_LOCKED) { + sxe_spinlock_force(&pool->spinlock, 0); + } + + SXER6("return"); +} diff --git a/lib-sxe-pool/sxe-pool.h b/lib-sxe-pool/sxe-pool.h new file mode 100644 index 0000000..9a4720b --- /dev/null +++ b/lib-sxe-pool/sxe-pool.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_POOL_H__ +#define __SXE_POOL_H__ + +#include "kit-timestamp.h" +#include "sxe-list.h" +#include "sxe-util.h" + +#define SXE_POOL_LOCK_TAKEN 0 /* Only used by sxe_pool_set_indexed_element_state() */ +#define SXE_POOL_NO_INDEX -1U /* All elements are in use or state is empty */ +#define SXE_POOL_INCORRECT_STATE -2U /* Element is not in the expected state */ +#define SXE_POOL_LOCK_NOT_TAKEN -3U /* We gave up trying to acquire lock */ +#define SXE_POOL_NAME_MAXIMUM_LENGTH 31 + +#define SXE_POOL_OPTION_UNLOCKED 0 +#define SXE_POOL_OPTION_LOCKED SXE_BIT_OPTION(0) +#define SXE_POOL_OPTION_TIMED SXE_BIT_OPTION(1) + +typedef void (*SXE_POOL_EVENT_TIMEOUT)( void * array, unsigned array_index, void * caller_info); + +typedef struct SXE_POOL_WALKER { + SXE_LIST_WALKER list_walker; + void * pool; + unsigned state; + union { + kit_timestamp_t time; + uint64_t count; + } last; +} SXE_POOL_WALKER; + +#include "lib-sxe-pool-proto.h" +#endif diff --git a/lib-sxe-pool/test/test-sxe-pool-mmap.c b/lib-sxe-pool/test/test-sxe-pool-mmap.c new file mode 100644 index 0000000..e99fb6f --- /dev/null +++ b/lib-sxe-pool/test/test-sxe-pool-mmap.c @@ -0,0 +1,161 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include /* for PATH_MAX on WIN32 */ +#include /* for PATH_MAX not on WIN32 */ +#include +#include +#include +#include + +#include "kit-process.h" /* For spawn() */ +#include "sxe-log.h" +#include "sxe-mmap.h" +#include "sxe-spinlock.h" +#include "sxe-pool.h" +#include "sxe-test-get-temp-file-name.h" + +#define TEST_WAIT KIT_TIMESTAMP_FROM_UNIX_TIME(4) +#define TEST_CLIENT_INSTANCES 32 + +enum TEST_STATE { + TEST_STATE_FREE, + TEST_STATE_CLIENT_TAKE, + TEST_STATE_CLIENT_DONE, + TEST_STATE_SERVER, + TEST_STATE_NUMBER_OF_STATES +}; + +int +main(int argc, char ** argv) +{ + int fd; + kit_timestamp_t start_time; + unsigned count = 0; /* e.g. master=0, slaves=1, 2, 3, etc */ + char *unique_memmap_path_and_file; /* e.g. /tmp/test-sxe-pool-mmap-pid-1234.bin */ + char unique_memmap_path_and_file_master_buffer[PATH_MAX]; + unsigned unique_memmap_path_and_file_master_buffer_used; + unsigned id; + unsigned *pool; + unsigned *shared; + size_t size; + SXE_MMAP memmap; + int child[TEST_CLIENT_INSTANCES]; + SXE_POOL_WALKER walker; + + putenv(SXE_CAST_NOCONST(char *, "SXE_LOG_LEVEL_LIBKIT_LIB_SXE_POOL=5")); /* Set to 5 to suppress sxe-pool debug logging since this is kind of a stress test */ + putenv(SXE_CAST_NOCONST(char *, "SXE_LOG_LEVEL_LIBKIT_LIB_SXE_LIST=5")); /* Set to 5 to suppress sxe-list debug logging since this is kind of a stress test */ + + if (argc > 1) { + count = atoi(argv[1]); + unique_memmap_path_and_file = argv[2] ; + + SXEL1("Instance %2u unique memmap path and file: %s", count, unique_memmap_path_and_file); + sxe_mmap_open(&memmap, unique_memmap_path_and_file); + shared = SXE_CAST(unsigned *, SXE_MMAP_ADDR(&memmap)); + pool = sxe_pool_from_base(shared); + SXEL6("Instance %2u mapped to shared pool // base=%p, pool=%p", count, shared, pool); + + do { + usleep(10000 * count); + id = sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_CLIENT_TAKE); + SXEA1(id != SXE_POOL_LOCK_NOT_TAKEN, "Got SXE_POOL_LOCK_NOT_TAKEN"); + } while (id == SXE_POOL_NO_INDEX); + + SXEL6("Instance %2u got pool element %u", count, id); + pool[id] = count; + sxe_pool_set_indexed_element_state(pool, id, TEST_STATE_CLIENT_TAKE, TEST_STATE_CLIENT_DONE); + sxe_mmap_close(&memmap); + SXEL6("Instance %2u exiting", count); + return 0; + } + + plan_tests(6); + + sxe_test_get_temp_file_name("test-sxe-mmap-pool", unique_memmap_path_and_file_master_buffer, sizeof(unique_memmap_path_and_file_master_buffer), &unique_memmap_path_and_file_master_buffer_used); + unique_memmap_path_and_file = &unique_memmap_path_and_file_master_buffer[0]; + SXEL1("Instance %2d unique memmap path and file: %s", count, unique_memmap_path_and_file); + + ok((size = sxe_pool_size(TEST_CLIENT_INSTANCES/2, sizeof(*pool), TEST_STATE_NUMBER_OF_STATES)) >= TEST_CLIENT_INSTANCES * sizeof(*pool), + "Expect pool size %u to be at least the size of the array %u", (unsigned)size, (unsigned)(TEST_CLIENT_INSTANCES * sizeof(*pool))); + + SXEA1((fd = open(unique_memmap_path_and_file, O_CREAT | O_TRUNC | O_WRONLY, 0666)) >= 0, "Failed to create file '%s': %s", unique_memmap_path_and_file, strerror(errno)); + SXEA1(ftruncate(fd, size) >= 0, "Failed to extend the file to %zu bytes: %s", size, strerror(errno)); + close(fd); + sxe_mmap_open(&memmap, unique_memmap_path_and_file); + shared = SXE_CAST(unsigned *, SXE_MMAP_ADDR(&memmap)); + + pool = sxe_pool_construct(shared, "shared-pool", TEST_CLIENT_INSTANCES/2, sizeof(*pool), TEST_STATE_NUMBER_OF_STATES, + SXE_POOL_OPTION_LOCKED); + + for (count = 0; count < TEST_CLIENT_INSTANCES; count++) { + char buffer[12]; + + snprintf(buffer, sizeof(buffer), "%u", count + 1); + child[count] = kit_spawnl(P_NOWAIT, argv[0], argv[0], buffer, unique_memmap_path_and_file, NULL); + SXEA1(child[count] != -1, "Failed to spawn '%s %s': %s", argv[0], buffer, strerror(errno)); + } + + start_time = kit_timestamp_get(); + + for (count = 0; (count < TEST_CLIENT_INSTANCES); ) { + SXEA1((TEST_WAIT + start_time ) > kit_timestamp_get(), "Unexpected timeout (start_time=%lu, current=%lu)", + start_time, kit_timestamp_get()); + usleep(10000); + id = sxe_pool_set_oldest_element_state(pool, TEST_STATE_CLIENT_DONE, TEST_STATE_FREE); + + /* Assert here in the test. The actual service would take specific action here */ + SXEA1(id != SXE_POOL_LOCK_NOT_TAKEN, "Parent: Failed to acqure lock .. yield limit reached. id %u vs %u", id, + SXE_POOL_LOCK_NOT_TAKEN); + + if (id != SXE_POOL_NO_INDEX) { + SXEL6("Looks like instance %u got element %u", pool[id], id); + count++; + } + } + + ok(count == TEST_CLIENT_INSTANCES, "All clients got an element in the pool"); + start_time = kit_timestamp_get(); + + for (count = 0; (count < TEST_CLIENT_INSTANCES); count++) { + SXEA1((TEST_WAIT + start_time ) > kit_timestamp_get(), "Unexpected timeout (start_time=%lu, current=%lu)", + start_time, kit_timestamp_get()); + SXEA1(kit_cwait(NULL, child[count], WAIT_CHILD) == child[count], "Unexpected return from cwait for process 0x%08x: %s", + child[count], strerror(errno)); + } + + sxe_spinlock_take((SXE_SPINLOCK *)sxe_pool_to_base(pool)); + is(sxe_pool_set_indexed_element_state(pool, 0, 0, 1), SXE_POOL_LOCK_NOT_TAKEN, "Can't set state of an element if the pool is locked"); + is(sxe_pool_set_oldest_element_state( pool, 0, 1), SXE_POOL_LOCK_NOT_TAKEN, "Can't set state of oldest element if the pool is locked"); + is(sxe_pool_touch_indexed_element( pool, 0), SXE_POOL_LOCK_NOT_TAKEN, "Can't touch an element if the pool is locked"); + sxe_pool_walker_construct(&walker, pool, TEST_STATE_FREE); + is(sxe_pool_walker_step(&walker), SXE_POOL_LOCK_NOT_TAKEN, "Can't step a walker if the pool is locked"); + sxe_spinlock_give((SXE_SPINLOCK *)sxe_pool_to_base(pool)); + + sxe_pool_override_locked(pool); /* for coverage */ + sxe_mmap_close(&memmap); + SXEL1("Instance %02d unlinking: %s", count, unique_memmap_path_and_file); + unlink(unique_memmap_path_and_file); + SXEL1("Instance %02d exiting // master", count); + return exit_status(); +} diff --git a/lib-sxe-pool/test/test-sxe-pool-thrash.c b/lib-sxe-pool/test/test-sxe-pool-thrash.c new file mode 100644 index 0000000..f4c882b --- /dev/null +++ b/lib-sxe-pool/test/test-sxe-pool-thrash.c @@ -0,0 +1,187 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "sxe-log.h" +#include "sxe-pool-private.h" +#include "sxe-spinlock.h" +#include "sxe-thread.h" +#include "tap.h" + +#define TEST_ELEMENT_NUMBER 4 + +typedef unsigned TEST_ELEMENT_TYPE; + +typedef enum TEST_STATE { + TEST_STATE_UNUSED, + TEST_STATE_USED, + TEST_ELEMENT_NUMBER_OF_STATES /* This is not a state and must be last */ +} TEST_STATE; + +typedef struct TEST_STATS { + unsigned shoves; + unsigned empties; + unsigned walks; + unsigned steps; +} TEST_STATS; + +TEST_ELEMENT_TYPE * test_array; +TEST_STATS test_stats[TEST_ELEMENT_NUMBER_OF_STATES]; + +static const char * +test_state_to_string(unsigned state) +{ + switch (state) { + case TEST_STATE_UNUSED: return "UNUSED"; + case TEST_STATE_USED: return "USED"; + } + + return NULL; +} + +static SXE_THREAD_RETURN SXE_STDCALL +test_mover_thread(void * state_as_ptr) +{ + TEST_STATE from_state = (TEST_STATE)state_as_ptr; + TEST_STATE to_state = from_state == TEST_STATE_UNUSED ? TEST_STATE_USED : TEST_STATE_UNUSED; + + SXEE6("test_mover_thread(from_state=%s)", test_state_to_string(from_state)); + + for (;;) { + switch(sxe_pool_set_oldest_element_state(test_array, from_state, to_state)) { + case SXE_POOL_LOCK_NOT_TAKEN: SXEA1(SXE_POOL_LOCK_NOT_TAKEN, "Lock timed out"); break; + case SXE_POOL_NO_INDEX: test_stats[from_state].empties++; SXE_YIELD(); break; + default: test_stats[from_state].shoves++; break; + } + } + + SXER6("return 0"); + return 0; +} + +static SXE_THREAD_RETURN SXE_STDCALL +test_walker_thread(void * state_as_ptr) +{ + TEST_STATE state = (TEST_STATE)state_as_ptr; + SXE_POOL_WALKER walker; + unsigned node; + + SXEE6("test_walker_thread(state=%s)", test_state_to_string(state)); + + for (;;) { + sxe_pool_walker_construct(&walker, test_array, state); + + for (;;) { + switch(node = sxe_pool_walker_step(&walker)) { + case SXE_POOL_LOCK_NOT_TAKEN: SXEA1(SXE_POOL_LOCK_NOT_TAKEN, "Lock timed out"); break; + default: break; + } + + if (node == SXE_POOL_NO_INDEX) { + break; + } + + test_stats[state].steps++; + } + + test_stats[state].walks++; + + if (sxe_pool_get_number_in_state(test_array, state) == 0) { + SXE_YIELD(); + } + } + + SXER6("return 0"); + return 0; +} + +int +main(int argc, char * argv[]) +{ + unsigned seconds_to_run = 1; + SXE_THREAD thread; + TEST_STATE state; + + if (!getenv("SXE_LOG_LEVEL")) { + setenv("SXE_LOG_LEVEL", "3", 0); + } + + if (argc > 1) { + if (strcmp(argv[1], "-i") != 0) { + fprintf(stderr, "usage: test-sxe-pool-thrash [-i] - thrash a pool with multiple threads\n" + " -i = Run forever (default = run for one second)\n"); + exit(1); + } + + seconds_to_run = ~0U; + } + + plan_tests(15); + test_array = sxe_pool_new("tidalpool", TEST_ELEMENT_NUMBER, sizeof(TEST_ELEMENT_TYPE), TEST_ELEMENT_NUMBER_OF_STATES, + SXE_POOL_OPTION_LOCKED); + + SXE_POOL_IMPL *pool = SXE_POOL_ARRAY_TO_IMPL(test_array); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 0)), "0", "after sxe_pool_new(), thread state of pool[0] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 1)), "0", "after sxe_pool_new(), thread state of pool[1] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 2)), "0", "after sxe_pool_new(), thread state of pool[2] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 3)), "0", "after sxe_pool_new(), thread state of pool[3] is \"0\""); + + is(sxe_pool_set_oldest_element_state(test_array, TEST_STATE_UNUSED, TEST_STATE_USED), 3, "Switched pool[3] to \"USED\""); + + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 0)), "0", "after switching one pool, state of pool[0] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 1)), "0", "after switching one pool, state of pool[1] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 2)), "0", "after switching one pool, state of pool[2] is \"0\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 3)), "1", "after switching one pool, state of pool[3] is \"1\""); + + is(sxe_pool_set_oldest_element_state(test_array, TEST_STATE_UNUSED, TEST_STATE_USED), 2, "Switched pool[2] to \"USED\""); + + sxe_pool_set_state_to_string(test_array, test_state_to_string); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 0)), "UNUSED", "using test_state_to_string(), state of pool[0] is \"UNUSED\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 1)), "UNUSED", "using test_state_to_string(), state of pool[1] is \"UNUSED\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 2)), "USED", "using test_state_to_string(), state of pool[2] is \"USED\""); + is_eq(pool->state_to_string(sxe_pool_index_to_state(test_array, 3)), "USED", "using test_state_to_string(), state of pool[3] is \"USED\""); + + diag("Starting the \"thrash\""); + SXEA1(sxe_thread_create(&thread, test_mover_thread, (void *)TEST_STATE_UNUSED, SXE_THREAD_OPTION_DEFAULTS) == SXE_RETURN_OK, + "Unable to create thread"); + SXEA1(sxe_thread_create(&thread, test_walker_thread, (void *)TEST_STATE_UNUSED, SXE_THREAD_OPTION_DEFAULTS) == SXE_RETURN_OK, + "Unable to create thread"); + SXEA1(sxe_thread_create(&thread, test_mover_thread, (void *)TEST_STATE_USED, SXE_THREAD_OPTION_DEFAULTS) == SXE_RETURN_OK, + "Unable to create thread"); + SXEA1(sxe_thread_create(&thread, test_walker_thread, (void *)TEST_STATE_USED, SXE_THREAD_OPTION_DEFAULTS) == SXE_RETURN_OK, + "Unable to create thread"); + + while (seconds_to_run--) { + usleep(1000000); /* 1 second */ + + diag("\nState Shoves Empties Walks Steps"); + + for (state = TEST_STATE_UNUSED; state < TEST_ELEMENT_NUMBER_OF_STATES; state++) { + diag("%.5s %10u %10u %10u %10u", test_state_to_string(state), test_stats[state].shoves, + test_stats[state].empties, test_stats[state].walks, test_stats[state].steps); + } + } + + pass("Ran for one second without aborting"); + return exit_status(); +} diff --git a/lib-sxe-pool/test/test-sxe-pool-walker.c b/lib-sxe-pool/test/test-sxe-pool-walker.c new file mode 100644 index 0000000..4f99596 --- /dev/null +++ b/lib-sxe-pool/test/test-sxe-pool-walker.c @@ -0,0 +1,59 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-log.h" +#include "sxe-pool.h" +#include "tap.h" + +#define TEST_ELEMENT_NUMBER 5 + +typedef unsigned TEST_ELEMENT_TYPE; + +typedef enum TEST_STATE { + TEST_STATE_FREE, + TEST_STATE_USED, + TEST_ELEMENT_NUMBER_OF_STATES /* This is not a state and must be last */ +} TEST_STATE; + +int +main(void) +{ + TEST_ELEMENT_TYPE * array; + SXE_POOL_WALKER walker; + + plan_tests(9); + array = sxe_pool_new("looppool", TEST_ELEMENT_NUMBER, sizeof(TEST_ELEMENT_TYPE), TEST_ELEMENT_NUMBER_OF_STATES, + SXE_POOL_OPTION_LOCKED); + + sxe_pool_walker_construct(&walker, array, TEST_STATE_FREE); + is(sxe_pool_get_oldest_element_count(array, TEST_STATE_FREE), 1, "Oldest element has insertion count 1"); + is(sxe_pool_set_oldest_element_state(array, TEST_STATE_FREE, TEST_STATE_USED), 4, "Allocated the oldest element (4)"); + is(sxe_pool_get_oldest_element_count(array, TEST_STATE_FREE), 2, "2nd oldest element has insertion count 2"); + is(sxe_pool_walker_step(&walker), 3, "First step in free list is (3)"); + is(sxe_pool_set_oldest_element_state(array, TEST_STATE_FREE, TEST_STATE_USED), 3, "Allocated the oldest element (3)"); + is(sxe_pool_walker_step(&walker), 2, "Second step in free list is (2)"); + is(sxe_pool_walker_step(&walker), 1, "Third step in free list is (1)"); + is(sxe_pool_set_indexed_element_state(array, 1, TEST_STATE_FREE, TEST_STATE_USED), + SXE_POOL_LOCK_TAKEN, "Moved current element (1) to used list"); + is(sxe_pool_walker_step(&walker), 0, "Fourth step in free list is (0)"); + sxe_pool_delete(array); + return exit_status(); +} diff --git a/lib-sxe-pool/test/test-sxe-pool.c b/lib-sxe-pool/test/test-sxe-pool.c new file mode 100644 index 0000000..da28fb3 --- /dev/null +++ b/lib-sxe-pool/test/test-sxe-pool.c @@ -0,0 +1,347 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "kit-mock.h" +#include "sxe-log.h" +#include "sxe-pool.h" +#include "sxe-util.h" + +enum TEST_STATE { + TEST_STATE_FREE = 0, + TEST_STATE_USED, + TEST_STATE_ABUSED, + TEST_STATE_NUMBER_OF_STATES /* This is not a state; it MUST come last */ +}; + +#define TEST_POOL_GET_NUMBER_FREE( pool) sxe_pool_get_number_in_state(pool, TEST_STATE_FREE) +#define TEST_POOL_GET_NUMBER_USED( pool) sxe_pool_get_number_in_state(pool, TEST_STATE_USED) +#define TEST_POOL_GET_NUMBER_ABUSED(pool) sxe_pool_get_number_in_state(pool, TEST_STATE_ABUSED) + +unsigned test_pool_1_timeout_call_count = 0; +unsigned test_pool_2_timeout_call_count = 0; + +static void +test_pool_1_timeout(void * array, unsigned array_index, void * caller_info) +{ + SXEE6("test_pool_1_timeout(array=%p,array_index=%u,caller_info=%p)", array, array_index, caller_info); + SXE_UNUSED_PARAMETER(caller_info); + test_pool_1_timeout_call_count ++; + + if (1 == test_pool_1_timeout_call_count) { + sxe_pool_set_indexed_element_state(array, array_index, TEST_STATE_ABUSED, TEST_STATE_FREE); + } + + if (2 == test_pool_1_timeout_call_count) { + sxe_pool_set_indexed_element_state(array, array_index, TEST_STATE_USED, TEST_STATE_FREE); + } + + SXER6("return"); +} + +static void +test_pool_2_timeout(void * array, unsigned array_index, void * caller_info) +{ + SXEE6("test_pool_2_timeout(array=%p,array_index=%u)", array, array_index); + SXE_UNUSED_PARAMETER(caller_info); + test_pool_2_timeout_call_count ++; + + if (1 == test_pool_2_timeout_call_count) { + sxe_pool_set_indexed_element_state(array, array_index, TEST_STATE_USED, TEST_STATE_FREE); + } + + if (2 == test_pool_2_timeout_call_count) { + sxe_pool_set_indexed_element_state(array, array_index, TEST_STATE_ABUSED, TEST_STATE_USED); + } + + if (3 == test_pool_2_timeout_call_count) { + sxe_pool_set_indexed_element_state(array, array_index, TEST_STATE_USED, TEST_STATE_FREE); + } + + SXER6("return"); +} + +static void +test_pool_3_timeout(void * array, unsigned array_index, void * caller_info) +{ + SXEE6("test_pool_3_timeout(array=%p,array_index=%u)", array, array_index); + SXE_UNUSED_PARAMETER(caller_info); + sxe_pool_set_indexed_element_state(array, array_index, 0, 1); + sxe_pool_set_indexed_element_state(array, array_index, 1, 0); + SXER6("return"); +} + +static struct timeval test_mock_gettimeofday_timeval; + +static int +test_mock_gettimeofday(struct timeval * __restrict tv, +#ifdef __APPLE__ + void * __restrict tz +#elif __GNUC__ >= 9 + void * __restrict tz +#else + struct timezone * __restrict tz +#endif + ) +{ + /* Note: It's safe to use log functions here because they don't use mocked versions of gettimeofday() :-) */ + SXEE6("%s(tv=%p, tz=%p)", __func__, tv, tz); + SXEA6(tv != NULL, "tv must never contain NULL"); + SXE_UNUSED_PARAMETER(tz); + memcpy(tv, &test_mock_gettimeofday_timeval, sizeof(*tv)); + SXER6("return // gettimeofday: %f", (double)tv->tv_sec + 1.e-6 * (double)tv->tv_usec); + return 0; +} + +int +main(void) +{ + unsigned *pool; + size_t size; + void *base[2]; + unsigned i; + SXE_POOL_WALKER walker; + unsigned j; + unsigned id; + unsigned *pool_1_timeout; + unsigned *pool_2_timeout; + unsigned *pool_3_timeout; + kit_timestamp_t pool_1_timeouts[] = {KIT_TIMESTAMP_FROM_UNIX_TIME(0), KIT_TIMESTAMP_FROM_UNIX_TIME(4), KIT_TIMESTAMP_FROM_UNIX_TIME(3)}; + kit_timestamp_t pool_2_timeouts[] = {KIT_TIMESTAMP_FROM_UNIX_TIME(0), KIT_TIMESTAMP_FROM_UNIX_TIME(1), KIT_TIMESTAMP_FROM_UNIX_TIME(2)}; + kit_timestamp_t pool_3_timeouts[] = {KIT_TIMESTAMP_FROM_UNIX_TIME(1), KIT_TIMESTAMP_FROM_UNIX_TIME(0)}; + unsigned node_a; + unsigned node_b; + unsigned oldest; + time_t time_0; + kit_timestamp_t oldtime; + kit_timestamp_t curtime; + char timestamp[KIT_TIMESTAMP_STRING_SIZE]; + unsigned state; + unsigned oldest_index; + + time(&time_0); + plan_tests(122); + uint64_t start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + /* Initialization causes expected state + */ + ok((size = sxe_pool_size(4, sizeof(*pool), TEST_STATE_NUMBER_OF_STATES)) >= 4 * sizeof(*pool), + "Expect pool size %u to be at least the size of the array %u", (unsigned)size, 4 * (unsigned)sizeof(*pool)); + SXEA1((base[0] = kit_malloc(size)) != NULL, "Couldn't allocate memory for 1st copy of pool"); + pool = sxe_pool_construct(base[0], "cesspool", 4, sizeof(*pool), TEST_STATE_NUMBER_OF_STATES, SXE_POOL_OPTION_TIMED); + SXEA1((base[1] = kit_malloc(size)) != NULL, "Couldn't allocate memory for 2nd copy of pool"); + memcpy(base[1], base[0], size); + +#if defined(_WIN64) || defined(_LP64) + ok((uint64_t)sxe_pool_size(1010000, 7672, 2) >= 7748720000ULL, "Size of a huge pool must be at least the array size"); +#else + ok(sxe_pool_size(101, 7672, 2) >= 774872, "Size of a small pool must be at least the array size"); +#endif + + is_eq(sxe_pool_get_name(pool), "cesspool", "sxe_pool_get_name() works"); + + for (i = 0; i < 2; i++) { + if (i == 1) { + pool = sxe_pool_from_base(base[1]); + memset(base[0], 0xF0, size); + is(base[1], sxe_pool_to_base(pool), "Pool %u array maps back to pool", i); + + } + else { + is(pool, sxe_pool_from_base(base[i]), "Pool %u array is the expected one", i); + } + + is(TEST_POOL_GET_NUMBER_FREE(pool), 4, "4 free objects in newly created pool"); + sxe_pool_walker_construct(&walker, pool, TEST_STATE_FREE); + + for (j = 0; (id = sxe_pool_walker_step(&walker)) != SXE_POOL_NO_INDEX; j++) { + SXEA1(id <= 3, "test_visit_count: id %u is > 3", id); + } + + is(j, 4, "Visited 4 free objects in newly created pool"); + is(TEST_POOL_GET_NUMBER_USED(pool), 0, "0 used objects in newly created pool"); + is(TEST_POOL_GET_NUMBER_ABUSED(pool), 0, "0 abused objects in newly created pool"); + ok(sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED) == SXE_POOL_NO_INDEX, "Oldest object is SXE_POOL_NO_INDEX when pool is empty"); + ok(sxe_pool_get_oldest_element_time( pool, TEST_STATE_USED) == 0, "Oldest time is 0 when pool is empty"); + + /***** + * Verify the state between each of the following operations + * - Use 2 nodes (0 and 2) + * - Touch 0 (oldest used) + * - Mark 0 as free + * - Touch 0 (a free node) + * - Mark 2 as free + * (Note, assertions in the code catch double free and double use) + */ + sxe_pool_set_indexed_element_state(pool, 0, TEST_STATE_FREE, TEST_STATE_USED); + sxe_pool_set_indexed_element_state(pool, 2, TEST_STATE_FREE, TEST_STATE_USED); + is(sxe_pool_index_to_state(pool, 0), TEST_STATE_USED, "Index 0 is in use"); + is(sxe_pool_index_to_state(pool, 2), TEST_STATE_USED, "Index 2 is in use"); + + oldtime = sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED); + ok(oldtime, "Pool not empty so oldest time is %s, not 0", kit_timestamp_to_buf(oldtime, timestamp, sizeof(timestamp))); + oldest_index = sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED); + is(oldtime, sxe_pool_get_element_time_by_index(pool, oldest_index), "Get time by index matches get time by oldest element"); + + is(TEST_POOL_GET_NUMBER_FREE(pool), 2 , "Pool state is now: 2 free"); + is(TEST_POOL_GET_NUMBER_USED(pool), 2 , "Pool state is now: 2 used"); + + /* Wait for the next tick before touching your nodes :) + */ + while ((curtime = kit_timestamp_get()) <= oldtime) { + } + + sxe_pool_touch_indexed_element(pool, 0); + is(sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED), 2, "After touching 0, element 2 should be oldest"); + sxe_pool_touch_indexed_element(pool, 2); + is(sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED), 0, "After touching 2, element 0 should be oldest"); + ok(sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED) > oldtime, "Touching element 0 updated its time to %s", + kit_timestamp_to_buf(sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED), timestamp, sizeof(timestamp))); + is(TEST_POOL_GET_NUMBER_FREE(pool), 2 , "Pool state is now: 2 free"); + is(TEST_POOL_GET_NUMBER_USED(pool), 2 , "Pool state is now: 2 used"); + + state = TEST_STATE_FREE; + is(sxe_pool_try_to_set_indexed_element_state(pool, 0, TEST_STATE_ABUSED, &state), SXE_POOL_INCORRECT_STATE, + "Element 0 is not in ABUSED state"); + is(state, TEST_STATE_USED, "Element 0 is in the USED state"); + sxe_pool_set_indexed_element_state(pool, 0, TEST_STATE_USED, TEST_STATE_FREE); + is(sxe_pool_index_to_state(pool, 0), TEST_STATE_FREE, "Index 0 has now been freed"); + is(sxe_pool_index_to_state(pool, 2), TEST_STATE_USED, "Index 2 is still in use"); + ok(sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED) > 0.0, "Pool still not empty so oldest time is not 0"); + is(TEST_POOL_GET_NUMBER_FREE(pool), 3 , "Pool state is now: 3 free"); + is(TEST_POOL_GET_NUMBER_USED(pool), 1 , "Pool state is now: 1 used"); + + sxe_pool_touch_indexed_element(pool, 0); + is(sxe_pool_index_to_state(pool, 0), TEST_STATE_FREE, "Index 0 is still free"); + is(sxe_pool_index_to_state(pool, 2), TEST_STATE_USED, "Index 2 is still in use"); + ok(sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED) > 0.0, "Pool not empty so oldest time not 0"); + is(TEST_POOL_GET_NUMBER_FREE(pool), 3 , "Touch of a free node doesn't change pool state"); + is(TEST_POOL_GET_NUMBER_USED(pool), 1 , "Touch of a free node doesn't change pool state"); + + sxe_pool_set_indexed_element_state(pool, 2, TEST_STATE_USED, TEST_STATE_FREE); + is(sxe_pool_index_to_state(pool, 0), TEST_STATE_FREE, "Index 0 is still free"); + is(sxe_pool_index_to_state(pool, 2), TEST_STATE_FREE, "Index 2 is free as well"); + ok(sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED) == 0.0, "Pool is empty so oldest time is 0.0"); + is(TEST_POOL_GET_NUMBER_FREE(pool), 4 , "Pool state is now: 4 free"); + is(TEST_POOL_GET_NUMBER_USED(pool), 0 , "Pool state is now: 0 used"); + + /* test allocating two objects consecutively using sxe_pool_next_free() + */ + node_a = sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_USED); + is(sxe_pool_index_to_state(pool, node_a), TEST_STATE_USED, "The node A given really is in use"); + node_b = sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_USED); + is(sxe_pool_index_to_state(pool, node_b), TEST_STATE_USED, "The node B given really is in use"); + + /* track the state of the pool (check for oldest time and object, used and freed objects) + */ + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED)), node_a, "The oldest is node A"); + oldtime = sxe_pool_get_oldest_element_time(pool, TEST_STATE_USED); + ok(oldtime >= KIT_TIMESTAMP_FROM_UNIX_TIME(time_0), "Oldest time %s >= start time_t %ld", + kit_timestamp_to_buf(oldtime, timestamp, sizeof(timestamp)), time_0); + sxe_pool_touch_indexed_element(pool, node_a); + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED)), node_b, "The oldest is now node B"); + is(TEST_POOL_GET_NUMBER_FREE(pool), 2, "Pool state is now: 2 free"); + is(TEST_POOL_GET_NUMBER_USED(pool), 2, "Pool state is now: 2 used"); + + /* TODO: Better tests of the time values and how the oldest time changes + * in different scenarios (touch, mark_free, get_next_free, ...) */ + + state = TEST_STATE_FREE; + is(sxe_pool_try_to_set_indexed_element_state(pool, node_a, TEST_STATE_USED, &state), node_a, + "Try to set succeeded in setting element state"); + is(state, TEST_STATE_FREE, "New state was not changed, as expected"); + ok(sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_USED) != node_a, "Doesn't immediately reallocate node A after freeing it"); + + /* allocate all objects in pool, then test expected behaviour + */ + ok(sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_ABUSED) != SXE_POOL_NO_INDEX, "Allocated a third object"); + ok(sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_ABUSED) != SXE_POOL_NO_INDEX, "Allocated a fourth object"); + ok(sxe_pool_set_oldest_element_state(pool, TEST_STATE_FREE, TEST_STATE_ABUSED) == SXE_POOL_NO_INDEX, "Couldn't allocate a fifth object"); + + /* Make sure a third state works + */ + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_ABUSED)), 2, "Object 2 is oldest in abused state"); + is(sxe_pool_index_to_state(pool, oldest), TEST_STATE_ABUSED, "Object thinks it's abused"); + sxe_pool_set_indexed_element_state(pool, oldest, TEST_STATE_ABUSED, TEST_STATE_FREE); + is(TEST_POOL_GET_NUMBER_FREE(pool), 1, "Pool has one free object"); + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_ABUSED)), 3, "Object 3 is in abused state"); + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED)), 1, "Object 1 is in used state"); + is(sxe_pool_index_to_state(pool, oldest), TEST_STATE_USED, "Object thinks it's used"); + sxe_pool_set_indexed_element_state(pool, oldest, TEST_STATE_USED, TEST_STATE_FREE); + is((oldest = sxe_pool_get_oldest_element_index(pool, TEST_STATE_USED)), 0, "Object 0 is in used state"); + } + + kit_free(base[0]); + kit_free(base[1]); + + diag("Test sxe pools with timeouts"); + { +# define TEST_TIMEOUT "Test timeout: " + + SXEA1(gettimeofday(&test_mock_gettimeofday_timeval, NULL) == 0, "Failed to get time of day: %s", strerror(errno)); + MOCK_SET_HOOK(gettimeofday, test_mock_gettimeofday); /* Hook gettimeofday to mock it */ + + pool_1_timeout = sxe_pool_new_with_timeouts("pool_1_timeout", 4, sizeof(*pool_1_timeout), TEST_STATE_NUMBER_OF_STATES, + pool_1_timeouts, test_pool_1_timeout, NULL); + pool_2_timeout = sxe_pool_new_with_timeouts("pool_2_timeout", 4, sizeof(*pool_2_timeout), TEST_STATE_NUMBER_OF_STATES, + pool_2_timeouts, test_pool_2_timeout, NULL); + test_mock_gettimeofday_timeval.tv_sec += 100; + sxe_pool_check_timeouts(); + is(test_pool_1_timeout_call_count, 0, TEST_TIMEOUT "test_pool_1_timeout() not called; after 100 seconds: all elements still in TEST_STATE_FREE with infinite timeout"); + is(test_pool_2_timeout_call_count, 0, TEST_TIMEOUT "test_pool_2_timeout() not called; after 100 seconds: all elements still in TEST_STATE_FREE with infinite timeout"); + sxe_pool_set_oldest_element_state(pool_1_timeout, TEST_STATE_FREE, TEST_STATE_USED ); + sxe_pool_set_oldest_element_state(pool_1_timeout, TEST_STATE_FREE, TEST_STATE_ABUSED); + sxe_pool_set_oldest_element_state(pool_2_timeout, TEST_STATE_FREE, TEST_STATE_USED ); + sxe_pool_set_oldest_element_state(pool_2_timeout, TEST_STATE_FREE, TEST_STATE_ABUSED); + test_mock_gettimeofday_timeval.tv_sec += 1; + sxe_pool_check_timeouts(); + is(test_pool_1_timeout_call_count, 0, TEST_TIMEOUT "test_pool_1_timeout() not called; after 1 second(s): elements with TEST_STATE_USED/TEST_STATE_ABUSED have rest time 3/2 seconds"); + is(test_pool_2_timeout_call_count, 1, TEST_TIMEOUT "test_pool_2_timeout() called; after 1 second(s): elements with TEST_STATE_USED/TEST_STATE_ABUSED have rest time 0/1 seconds"); + test_mock_gettimeofday_timeval.tv_sec += 2; + sxe_pool_check_timeouts(); + is(test_pool_1_timeout_call_count, 1, TEST_TIMEOUT "test_pool_1_timeout() called; after 2 second(s): elements with TEST_STATE_USED/TEST_STATE_ABUSED have rest time 1/0 seconds"); + is(test_pool_2_timeout_call_count, 2, TEST_TIMEOUT "test_pool_2_timeout() called; after 2 second(s): elements with ---------------/TEST_STATE_ABUSED have rest time -/-1 seconds"); + test_mock_gettimeofday_timeval.tv_sec += 1; + sxe_pool_check_timeouts(); + is(test_pool_1_timeout_call_count, 2, TEST_TIMEOUT "test_pool_1_timeout() called; after 1 second(s): elements with TEST_STATE_USED/----------------- have rest time 0/- seconds"); + is(test_pool_2_timeout_call_count, 3, TEST_TIMEOUT "test_pool_2_timeout() called; after 1 second(s): elements with TEST_STATE_USED/----------------- have rest time 0/- seconds"); + + sxe_pool_delete(pool_1_timeout); + sxe_pool_delete(pool_2_timeout); + + /* timeout with a pool of 1 objects */ + pool_3_timeout = sxe_pool_new_with_timeouts("pool_3_timeout", 1, sizeof(*pool_3_timeout), 2, pool_3_timeouts, + test_pool_3_timeout, NULL); + test_mock_gettimeofday_timeval.tv_sec += 2; + sxe_pool_check_timeouts(); + sxe_pool_delete(pool_3_timeout); + } + + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-test/GNUmakefile b/lib-sxe-test/GNUmakefile new file mode 100644 index 0000000..92c7872 --- /dev/null +++ b/lib-sxe-test/GNUmakefile @@ -0,0 +1,8 @@ +LIBRARIES = sxe-test + +include ../dependencies.mak + +# Special case: usually only ./test/*.c files include tap.h +ifdef SXE_EMBEDDED_TAP + IFLAGS += $(CC_INC)$(COM.dir)/../libsxe/lib-tap/$(DST.dir) +endif diff --git a/lib-sxe-test/sxe-test-catch-abort.h b/lib-sxe-test/sxe-test-catch-abort.h new file mode 100644 index 0000000..a181fde --- /dev/null +++ b/lib-sxe-test/sxe-test-catch-abort.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ + +#include "sxe-log.h" + +#include + +#include /* for setjmp/longjmp - testing ASSERTs */ +#include /* for.. um, signal() */ +#include /* for __func__ on Windows */ + +static jmp_buf abort_jmpbuf_env; + +static __attribute__ ((unused)) void +test_signal_handler_sigabrt(int signum) +{ + /* SIGNAL HANDLER -- DO NOT USE SXEExx/SXERxx macros */ + if (signum == SIGABRT) { + longjmp(abort_jmpbuf_env, 1); + /* does not return */ + } + return; +} + +static __attribute__ ((unused)) int +test_expect_abort_in_function(void (*func)(void *), void * user_data) +{ + volatile int result = 1; + int savesigs = 0; + void * old_signal_handler; + + SXEE62("%s(user_data=%p)", __func__, user_data); + SXE_UNUSED_PARAMETER(savesigs); /* sigsetjmp may not 'use' this, but its required by the API */ + old_signal_handler = signal(SIGABRT, test_signal_handler_sigabrt); + + if (setjmp(abort_jmpbuf_env) == 0) { + func(user_data); /* not expected to return */ + + /* If we get here, the function did not abort, so the test failed */ + result = 0; + } + + signal(SIGABRT, old_signal_handler); + SXER61("return result=%d", result); + return result; /* 0 means failure */ +} diff --git a/lib-sxe-test/sxe-test-doubletime.h b/lib-sxe-test/sxe-test-doubletime.h new file mode 100644 index 0000000..32ba4e1 --- /dev/null +++ b/lib-sxe-test/sxe-test-doubletime.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ + +#ifndef __SXE_TEST_DOUBLETIME__ +#define __SXE_TEST_DOUBLETIME__ 1 + +#include +#include + +static __attribute__ ((unused)) double +test_doubletime(void) +{ + struct timeval now; + + assert(gettimeofday(&now, NULL) == 0); + return now.tv_sec + now.tv_usec / 1000000.0; +} + +#endif diff --git a/lib-sxe-test/sxe-test-get-temp-file-name.h b/lib-sxe-test/sxe-test-get-temp-file-name.h new file mode 100644 index 0000000..ccbd19e --- /dev/null +++ b/lib-sxe-test/sxe-test-get-temp-file-name.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ + +#include +#include +#include +#include +#include +#include +#include /* for PATH_MAX on WIN32 */ +#include /* for PATH_MAX not on WIN32 */ + +#include "sxe-log.h" + +/** + * Why go to the trouble of creating an especially unique file + * name in a special 'temp' folder? + * The reason is that the VirtualBox (vbox) host-guest file system + * has a bug and does not support memory mapped files with read + * *and* write access: + * http://www.virtualbox.org/ticket/819 + **/ + +static __attribute__ ((unused)) void +sxe_test_get_temp_file_name( + const char * file_stem , /* e.g. test-sxe-mmap-pool */ + char * unique_path_and_file_buffer , /* e.g. outputs win32: C:\DOCUME~1\SIMONH~1.GRE\LOCALS~1\Temp\-pid-.bin */ + /* e.g. outputs linux: /tmp/-pid-.bin */ + unsigned unique_path_and_file_buffer_size, + unsigned * unique_path_and_file_buffer_used) +{ + int used = 0 ; + int size = unique_path_and_file_buffer_size; + int done ; +#if defined(WIN32) + DWORD UniqueId = GetCurrentThreadId() ; +#else + pid_t UniqueId = getpid() ; +#endif + + SXEE6("sxe_test_get_temp_file(file_stem=%p, unique_path_and_file_buffer=%p, unique_path_and_file_buffer_size=%u)", file_stem, unique_path_and_file_buffer, unique_path_and_file_buffer_size); + + SXEA1(PATH_MAX == unique_path_and_file_buffer_size, "ERROR: unique_path_and_file_buffer_sizes should be PATH_MAX but is %u", unique_path_and_file_buffer_size); + +#ifdef _WIN32 + done = GetTempPath(size, unique_path_and_file_buffer); /* e.g. C:\TEMP\ */ + SXEA1(done != 0, "ERROR: GetTempPath() returned zero unexpectedly"); +#else + done = snprintf(&unique_path_and_file_buffer[used], size, "/tmp/"); + SXEA1(done <= size, "ERROR: snprintf() did not have enough space; given %d bytes and would have used %d bytes", size, done); + SXEA1(done > 0 , "ERROR: snprintf() failed with result: %d", done); +#endif + size -= done; + used += done; + + done = snprintf(&unique_path_and_file_buffer[used], size, "%s-pid-%d.bin", file_stem, UniqueId); + SXEA1(done <= size, "ERROR: snprintf() did not have enough space; given %d bytes and would have used %d bytes", size, done); + SXEA1(done > 0 , "ERROR: snprintf() failed with result: %d", done); + size -= done; + used += done; + + *unique_path_and_file_buffer_used = used; + + SXER6("return // unique_path_and_file_buffer=%s", unique_path_and_file_buffer); +} diff --git a/lib-sxe-test/sxe-test-memory.h b/lib-sxe-test/sxe-test-memory.h new file mode 100644 index 0000000..b7719ad --- /dev/null +++ b/lib-sxe-test/sxe-test-memory.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2021 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ +#ifndef __SXE_TEST_MEMORY__ +#define __SXE_TEST_MEMORY__ 1 + +#ifdef __SXE_MOCK_H__ +# ifndef SXE_MOCK_NO_CALLOC +# error "sxe-test-memory.h is incompatible with mock.h; define SXE_MOCK_NO_CALLOC to mix them" +# endif +#endif + +#include +#include // CONVENTION EXCLUSION: Need to include glibc malloc.h for malloc_info +#include +#include + +#pragma GCC diagnostic ignored "-Waggregate-return" // Shut gcc up about mallinfo returning a struct + +static bool test_memory_initialized = false; + +static __attribute__ ((unused)) size_t +test_memory(void) +{ + time_t time0 = 0; + + if (!test_memory_initialized) { + localtime(&time0); // Preallocate memory allocated for timezone + test_memory_initialized = true; + } + +#if __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 35 + struct mallinfo2 info = mallinfo2(); +#else + struct mallinfo info = mallinfo(); +#endif + return info.uordblks; +} + +#endif diff --git a/lib-sxe-test/sxe-test-tap-ev.h b/lib-sxe-test/sxe-test-tap-ev.h new file mode 100644 index 0000000..2938767 --- /dev/null +++ b/lib-sxe-test/sxe-test-tap-ev.h @@ -0,0 +1,238 @@ +/* Copyright 2010 Sophos Limited. All rights reserved. Sophos is a registered + * trademark of Sophos Limited. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ + +#ifndef __SXE_TEST_TAP_EV_H__ +#define __SXE_TEST_TAP_EV_H__ 1 + +#include +#include +#include + +#include "ev.h" +#include "sxe.h" +#include "sxe-log.h" +#include "sxe-time.h" +#include "sxe-util.h" +#include "tap.h" + +#ifdef gettimeofday +# define SXE_TEST_TAP_EV_mock_gettimeofday gettimeofday +# undef gettimeofday +#endif + +static void test_ev_loop_wait(ev_tstamp); +static void test_process_all_libev_events(void); + +static __attribute__ ((unused)) SXE_TIME +local_get_time(void) +{ + struct timeval tv; + + SXEA1(gettimeofday(&tv, NULL) >= 0, "gettimeofday failed: %s", strerror(errno)); + return SXE_TIME_FROM_TIMEVAL(&tv); +} + +static __attribute__ ((unused)) unsigned +test_tap_ev_length_nowait(void) +{ + test_process_all_libev_events(); + return tap_ev_length(); +} + +#define AT_TIMEOUT_THEN_TIMEOUT 0 +#define AT_TIMEOUT_THEN_ASSERT 1 + +static __attribute__ ((unused)) tap_ev +test_tap_ev_queue_shift_wait_or_what(tap_ev_queue queue, ev_tstamp seconds, int at_timeout) +{ + tap_ev event; + SXE_TIME deadline; + SXE_TIME current; + SXE_TIME wait_time; + + wait_time = SXE_TIME_FROM_DOUBLE_SECONDS(seconds); + deadline = local_get_time() + wait_time; + + for (;;) { + ev_loop(ev_default_loop(EVFLAG_AUTO), EVLOOP_NONBLOCK); /* run deferred events from before the loop or last loop pass */ + event = tap_ev_queue_shift(queue); + if (event != NULL) { + return event; + } + + if (sxe_get_deferred_count() == 0) { + test_ev_loop_wait(SXE_TIME_TO_DOUBLE_SECONDS(wait_time)); + event = tap_ev_queue_shift(queue); + if (event != NULL) { + return event; + } + } + + current = local_get_time(); + + if (current >= deadline) { + if (AT_TIMEOUT_THEN_TIMEOUT == at_timeout) { + return NULL; + } + SXEA1(current < deadline, "test_tap_ev_queue_shift_wait(): Timeout waiting for event"); + } + + wait_time = deadline - current; + } +} + +static __attribute__ ((unused)) tap_ev +test_tap_ev_queue_shift_wait_or_timeout(tap_ev_queue queue, ev_tstamp seconds) +{ + return test_tap_ev_queue_shift_wait_or_what(queue, seconds, AT_TIMEOUT_THEN_TIMEOUT); +} + +static __attribute__ ((unused)) tap_ev +test_tap_ev_queue_shift_wait_or_assert(tap_ev_queue queue, ev_tstamp seconds) +{ + return test_tap_ev_queue_shift_wait_or_what(queue, seconds, AT_TIMEOUT_THEN_ASSERT); +} + +static __attribute__ ((unused)) tap_ev +test_tap_ev_queue_shift_wait(tap_ev_queue queue, ev_tstamp seconds) +{ + return test_tap_ev_queue_shift_wait_or_what(queue, seconds, AT_TIMEOUT_THEN_ASSERT); +} + +static __attribute__ ((unused)) tap_ev +test_tap_ev_shift_wait(ev_tstamp seconds) /* _or_assert */ +{ + return test_tap_ev_queue_shift_wait(tap_ev_queue_get_default(), seconds); +} + +static __attribute__ ((unused)) tap_ev +test_tap_ev_shift_wait_or_timeout(ev_tstamp seconds) +{ + return test_tap_ev_queue_shift_wait_or_timeout(tap_ev_queue_get_default(), seconds); +} + +static __attribute__ ((unused)) const char * +test_tap_ev_identifier_wait(ev_tstamp seconds, tap_ev * ev_ptr) +{ + tap_ev event; + + return tap_ev_identifier(*(ev_ptr == NULL ? &event : ev_ptr) = test_tap_ev_shift_wait(seconds)); +} + +static __attribute__ ((unused)) const char * +test_tap_ev_queue_identifier_wait(tap_ev_queue queue, ev_tstamp seconds, tap_ev * ev_ptr) +{ + tap_ev event; + + if (ev_ptr == NULL) { + ev_ptr = &event; + } + + *ev_ptr = test_tap_ev_queue_shift_wait(queue, seconds); + + return tap_ev_identifier(*ev_ptr); +} + +/** + * Wait for (possibly fragmented) data on a SXE on a specific TAP queue + * + * @param queue TAP event queue + * @param seconds Seconds to wait for + * @param ev_ptr Pointer to a tap_ev event + * @param this Pointer to a SXE to check as the source of the read events or NULL to allow reads from multiple SXEs + * @param read_event_name Expected read event identifier (usually "read_event"); event must have "this", "buf" and "used" args + * @param buffer Pointer to a buffer to store the data + * @param expected_length Expect amount of data to be read + * @param who Textual description of SXE for diagnostics + */ +static __attribute__ ((unused)) void +test_ev_queue_wait_read(tap_ev_queue queue, ev_tstamp seconds, tap_ev * ev_ptr, void * this, const char * read_event_name, + char * buffer, unsigned expected_length, const char * who) +{ + unsigned used = 0; + unsigned i; + + for (i = 0; i < expected_length; i++) { + if (strcmp(test_tap_ev_queue_identifier_wait(queue, seconds, ev_ptr), read_event_name) != 0) { + fail("%s expected read event '%s', got event '%s' (already read %u fragments, %u bytes of %u expected)", who, + read_event_name, (const char *)tap_ev_identifier(*ev_ptr), i, used, expected_length); + goto SXE_ERROR_OUT; + } + + if (this != NULL && this != tap_ev_arg(*ev_ptr, "this")) { + fail("Expected a read event on SXE %p, got it on %p (%s)", this, tap_ev_arg(*ev_ptr, "this"), who); + goto SXE_ERROR_OUT; + } + + if (used + SXE_CAST(unsigned, tap_ev_arg(*ev_ptr, "used")) > expected_length) { + fail("Expected to read %u, read %u", expected_length, used + SXE_CAST(unsigned, tap_ev_arg(*ev_ptr, "used"))); + memcpy(&buffer[used], tap_ev_arg(*ev_ptr, "buf"), expected_length - used); + return; + } + + memcpy(&buffer[used], tap_ev_arg(*ev_ptr, "buf"), SXE_CAST(unsigned, tap_ev_arg(*ev_ptr, "used"))); + used += SXE_CAST(unsigned, tap_ev_arg(*ev_ptr, "used")); + tap_ev_free(*ev_ptr); + + if (used == expected_length) { + pass("Read %u bytes on %s", used, who); + return; + } + } + + SXEA1(used == expected_length, "test_ev_wait_read: Must have read %u bytes, but only read %u", expected_length, used); + +SXE_ERROR_OUT: + if (used == 0) { + strncpy(buffer, "NO DATA READ", expected_length); + } + else if (used < expected_length) { + buffer[used] = '\0'; + } +} + +/** + * Wait for (possibly fragmented) data on a SXE on the default TAP queue + * + * @param seconds Seconds to wait for + * @param ev_ptr Pointer to a tap_ev event + * @param this Pointer to a SXE to check as the source of the read events or NULL to allow reads from multiple SXEs + * @param read_event_name Expected read event identifier (usually "read_event"); event must have "this", "buf" and "used" args + * @param buffer Pointer to a buffer to store the data + * @param expected_length Expect amount of data to be read + * @param who Textual description of SXE for diagnostics + */ +static __attribute__ ((unused)) void +test_ev_wait_read(ev_tstamp seconds, tap_ev * ev_ptr, void * this, const char * read_event_name, char * buffer, + unsigned expected_length, const char * who) +{ + test_ev_queue_wait_read(tap_ev_queue_get_default(), seconds, ev_ptr, this, read_event_name, buffer, expected_length, who); +} + +#ifdef SXE_TEST_TAP_EV_mock_gettimeofday +# define gettimeofday SXE_TEST_TAP_EV_mock_gettimeofday +#endif + +#endif diff --git a/lib-sxe-test/sxe-test-waitpid-for-seconds.h b/lib-sxe-test/sxe-test-waitpid-for-seconds.h new file mode 100644 index 0000000..843eb5e --- /dev/null +++ b/lib-sxe-test/sxe-test-waitpid-for-seconds.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file contains includable code and should only be used in test programs. + */ + +#include +#include +#include +#include +#include + +#include "sxe-log.h" + +static __attribute__ ((unused)) pid_t +waitpid_for_seconds(pid_t pid, int * status, double seconds) +{ + double seconds_waited = 0.0; + pid_t result = 0; + + while (seconds_waited < seconds) { + SXEA1((result = waitpid(pid, status, WNOHANG)) != (pid_t)-1, "Failed to wait for pid %d: %s", pid, strerror(errno)); + + if (result != 0) { + break; + } + + /* Sleep for one hundredth of a second + */ + usleep(10000); + seconds_waited += 0.01; + } + + return result; +} diff --git a/lib-sxe-test/sxe-test.h b/lib-sxe-test/sxe-test.h new file mode 100644 index 0000000..10f392e --- /dev/null +++ b/lib-sxe-test/sxe-test.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This file is provided for backward compatibility. You should probably just include the sxe-test-*.h files your test needs. + */ + +#ifndef __SXE_TEST_H__ +#define __SXE_TEST_H__ + +#include + +#include "tap.h" +#include "sxe-test-catch-abort.h" +#include "sxe-test-get-temp-file-name.h" +#include "sxe-test-leak.h" +#include "sxe-test-waitpid-for-seconds.h" + +#endif diff --git a/lib-sxe-thread/GNUmakefile b/lib-sxe-thread/GNUmakefile new file mode 100644 index 0000000..5d50283 --- /dev/null +++ b/lib-sxe-thread/GNUmakefile @@ -0,0 +1,6 @@ +LIBRARIES = sxe-thread +include ../dependencies.mak + +ifneq ($(OS),Windows_NT) + LINK_FLAGS += -lpthread +endif diff --git a/lib-sxe-thread/sxe-thread-memory.c b/lib-sxe-thread/sxe-thread-memory.c new file mode 100644 index 0000000..2e7e654 --- /dev/null +++ b/lib-sxe-thread/sxe-thread-memory.c @@ -0,0 +1,182 @@ +/* Copyright (c) 2023 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "sxe-thread.h" + +static struct sxe_thread_memory *trackers = NULL; // Head of the list of per thread memory trackers +static __thread pid_t tid = -1; // Set to the tid of the thread when it first allocates + +/** + * Allocate memory and tracker memory whose address is to be stored in a per thread pointer so that the main thread can free it + * + * @param size Number of bytes to allocate + * @param obj_free A free function to call on the allocated object before freeing, or NULL to call sxe_free + * @param tracker_out If not NULL, set to the address of an allocated tracking structure; need if memory is to be realloced + * + * @return Pointer to the memory allocated or NULL on failure to allocate + * + * @note The implementation of thread identity that is safe to use from another thread is very Linux specific + */ +void * +sxe_thread_malloc(size_t size, void (*obj_free)(void *), struct sxe_thread_memory **tracker_out) +{ + struct sxe_thread_memory *tracker = kit_malloc(sizeof(**tracker_out)); + + /* If tracker and memory were allocated + */ + if (tracker && (tracker->memory = kit_malloc(size))) { // SonarQube False Positive + if (tid < 0) { + tid = gettid(); + SXEL7(": first call from tid %d", tid); + } + + tracker->tid = tid; + tracker->free = obj_free; + tracker->next = trackers; + + /* If first tracker in list remains unchanged by another thread, atomically replace it with tracker. + */ + while (!__sync_bool_compare_and_swap(&trackers, tracker->next, tracker)) + tracker->next = trackers; /* COVERAGE EXCLUSION: this only gets hit in a race condition */ + + if (tracker_out) + *tracker_out = tracker; + } + + return tracker ? tracker->memory : NULL; +} + +/** + * Reallocate tracked per thread memory + * + * @param tracker A tracking structure previously returned from sxe_thread_malloc + * @param size Number of bytes to reallocate to + * + * @return Pointer to the reallocated memory or NULL on failure to reallocate + */ +void * +sxe_thread_realloc(struct sxe_thread_memory *tracker, size_t size) +{ + void *memory = kit_realloc(tracker->memory, size); + + if (memory) + tracker->memory = memory; + + return memory; +} + +/** + * Free per thread memory for any threads that are no longer alive + * + * @param One of SXE_THREAD_MEMORY_UNUSED or SXE_THREAD_MEMORY_ALL (to include memory from the calling thread) + * + * @return the number of tracked per thread memory allocations remaining unfreed + * + * @note This function should only be called from the main thread + */ +unsigned +sxe_thread_memory_free(unsigned what) +{ + struct stat status; + struct sxe_thread_memory *tracker; + struct sxe_thread_memory *next; + struct sxe_thread_memory *keepers = NULL; // List of tackers not freed + struct sxe_thread_memory *last = NULL; // Last tracker in keepers list + unsigned i, unfreed; + int ret; + bool reap; + char task_dir[64]; + + SXEE6("(what=%s)", what == SXE_THREAD_MEMORY_UNUSED ? "UNUSED" : "ALL"); + + for (i = 0; i <= 1000; i++) { // If trying to free all, try every millisecond for 1 second + unfreed = 0; + + /* Atomically acquire the list of trackers + */ + for (tracker = trackers; !__sync_bool_compare_and_swap(&trackers, tracker, NULL); ) + tracker = trackers; /* COVERAGE EXCLUSION: this only gets hit in a race condition */ + + for (; tracker; tracker = next) { // For each tracker in the list + next = tracker->next; + + if (tid == tracker->tid) // If it's the calling thread, only reap it's memory if freeing ALL + reap = what == SXE_THREAD_MEMORY_ALL; + else { // Otherwise, if the threads task file is not present + snprintf(task_dir, sizeof(task_dir), "/proc/%d/task/%d", getpid(), tracker->tid); + reap = (ret = stat(task_dir, &status)) < 0 && errno == ENOENT; + } + + /* If not attempting to free all per thread memory and memory was allocated by this thread, or the allocating + * thread is not dead, or an error occurred trying to verify the status of the thread, don't free the memory + */ + if (!reap) { + if (tid != tracker->tid) { + if (ret >= 0) + SXEL7(": thread %d is alive", tracker->tid); + else + SXEL2(": Can't stat %s; error: %s", task_dir, strerror(errno)); /* COVERAGE EXCLUSION: Shouldn't happen */ + } + + last = last ?: tracker; + tracker->next = keepers; + keepers = tracker; + unfreed++; + continue; + } + + if (tid != tracker->tid) + SXEL7(": thread %d is dead", tracker->tid); + + if (tracker->free) + (*tracker->free)(tracker->memory); + else + kit_free(tracker->memory); + + kit_free(tracker); + } + + if (keepers) { + tracker = trackers; + + /* Attempt to replace trackers list with keepers followed by trackers, retrying if another thread mucks with trackers + */ + for (last->next = tracker; !__sync_bool_compare_and_swap(&trackers, tracker, keepers); last->next = tracker) + tracker = trackers; /* COVERAGE EXCLUSION: this only gets hit in a race condition */ + } + + if (what != SXE_THREAD_MEMORY_ALL || unfreed == 0) + break; + + usleep(1000); /* COVERAGE EXCLUSION: This only happens in a race condition, so can't be reliably covered */ + } + + SXER6("return unfreed=%u", unfreed); + return unfreed; +} diff --git a/lib-sxe-thread/sxe-thread.c b/lib-sxe-thread/sxe-thread.c new file mode 100644 index 0000000..7e8418f --- /dev/null +++ b/lib-sxe-thread/sxe-thread.c @@ -0,0 +1,107 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "sxe-thread.h" + +/** + * OS independant thread creation + */ +SXE_RETURN +sxe_thread_create(SXE_THREAD *thread, SXE_THREAD_RETURN (SXE_STDCALL * thread_main)(void *), void * user_data, unsigned options) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + int error; + + SXEE6("sxe_thread_create(thread=%p, thread_main=%p, user_data=%p, options=%u)", thread, thread_main, user_data, options); + SXEA1(options == SXE_THREAD_OPTION_DEFAULTS, "sxe_thread_create: options must be %u", SXE_THREAD_OPTION_DEFAULTS); + +#ifdef _WIN32 +# define WINDOWS_DEFAULT_SECURITY NULL +# define WINDOWS_DEFAULT_STACK_SIZE 0 + + if ((thread = CreateThread(WINDOWS_DEFAULT_SECURITY, WINDOWS_DEFAULT_STACK_SIZE, thread_main, user_data, 0, NULL)) != NULL) { + result = SXE_RETURN_OK; + goto SXE_EARLY_OUT; + } + + switch (error = sxe_socket_get_last_error()) { /* Coverage Exclusion - todo: win32 coverage */ + default: SXEL2("sxe_thread_create: Failed to create a thread: %s", sxe_socket_error_as_str(error)); break; + } + +#else /* POSIX */ +# define POSIX_DEFAULT_ATTRIBUTES NULL + + if ((error = pthread_create(thread, POSIX_DEFAULT_ATTRIBUTES, thread_main, user_data)) == 0) { + result = SXE_RETURN_OK; + goto SXE_EARLY_OUT; + } + + switch (error) { /* Coverage Exclusion - Failure case */ + case EAGAIN: SXEL2("sxe_thread_create: Not enough resources to create a thread"); break; /* Coverage Exclusion - Failure case */ + default: SXEL2("sxe_thread_create: Unexpected error creating a thread: %s", strerror(error)); break; /* Coverage Exclusion - Failure case */ + } +#endif + +SXE_EARLY_OUT: + SXER6("return %s", sxe_return_to_string(result)); + return result; +} + +/** + * Wait for a thread to terminate + * + * @param thread A thread created with sxe_thread_create + * @param retval_out Pointer to a location to store the thread's return value or NULL + * + * @return SXE_RETURN_OK, SXE_RETURN_NO_SUCH_PROCESS, SXE_RETURN_ERROR_INVALID, or SXE_RETURN_DEADLOCK_WOULD_OCCUR + * + * @note Currently, there is no WIN32 (visual C) implementation + */ +SXE_RETURN +sxe_thread_wait(SXE_THREAD thread, SXE_THREAD_RETURN *retval_out) +{ + SXE_THREAD_RETURN retval; + + return pthread_join(thread, retval_out ?: &retval); +} + +SXE_THREAD +sxe_thread_get_self(void) +{ + return pthread_self(); +} + +/** + * Yeild the processor to other threads + * + * @note Under Linux, pthread_yield can lead to deadlocks with the standard scheduler, so nanosleep must be used. Currently, + * there is no WIN32 (visual C) implementation. + */ +void +sxe_thread_yeild(void) +{ + struct timespec request = {0, 0}; + + nanosleep(&request, NULL); +} diff --git a/lib-sxe-thread/sxe-thread.h b/lib-sxe-thread/sxe-thread.h new file mode 100644 index 0000000..d7e7412 --- /dev/null +++ b/lib-sxe-thread/sxe-thread.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_THREAD_H__ +#define __SXE_THREAD_H__ + +#include "sxe-log.h" + +#define SXE_THREAD_OPTION_DEFAULTS 0 + +#ifdef _WIN32 +# include +# define __thread __declspec( thread ) +# define SXE_STDCALL __stdcall + typedef HANDLE SXE_THREAD; + typedef DWORD SXE_THREAD_RETURN; + + +#else +# include +# define SXE_STDCALL + typedef pthread_t SXE_THREAD; + typedef void * SXE_THREAD_RETURN; +#endif + +#define SXE_THREAD_MEMORY_UNUSED 1 // Free thread memory of dead threads +#define SXE_THREAD_MEMORY_ALL 2 // Free thread memory of dead threads and the current thread + +struct sxe_thread_memory { + void *memory; // Allocated memory + void (*free)(void *); // Function to call to free memory or NULL to call sxe_free + struct sxe_thread_memory *next; // Pointer to next tracker or NULL + pid_t tid; // The tid of the thread that allocated the memory +}; + +#include "lib-sxe-thread-proto.h" + +#endif diff --git a/lib-sxe-thread/test/test-sxe-thread-memory.c b/lib-sxe-thread/test/test-sxe-thread-memory.c new file mode 100644 index 0000000..7aaafa0 --- /dev/null +++ b/lib-sxe-thread/test/test-sxe-thread-memory.c @@ -0,0 +1,99 @@ +/* Copyright (c) 2023 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "kit-alloc.h" +#include "sxe-thread.h" + +static volatile bool got_memory = false; +static volatile bool time_to_die = false; +static __thread void *memory; + +static SXE_THREAD_RETURN SXE_STDCALL +my_thread_main(void * unused) +{ + SXEE6("test_thread_main(unused=%p) // tid=%d", unused, gettid()); + SXE_UNUSED_PARAMETER(unused); + + memory = sxe_thread_malloc(8, NULL, NULL); // Use the default free and don't get back a tracker + got_memory = true; + SXEL6("allocated 8 bytes at %p", memory); + + while (!time_to_die) + sxe_thread_yeild(); + + SXER6("return NULL"); + return (SXE_THREAD_RETURN)0; +} + +static void +my_free(void *mem) // Wrap sxe_free memory to give the right prototype (sxe_free is a macro) +{ + kit_free(mem); +} + +/* Wait for up to 100ms for the expected number of outstanding per thread allocations + */ +static void +test_thread_memory_free(unsigned what, unsigned expected, const char *message) +{ + unsigned got, i; + + for (i = 0; (got = sxe_thread_memory_free(what)) != expected && i <= 100; i++) + usleep(1000); // Delay for 1 ms + + is(expected, got, "%s", message); +} + +int +main(void) +{ + struct sxe_thread_memory *tracker; + SXE_THREAD thread; + uint64_t start_allocations; + SXE_RETURN result; + + plan_tests(7); + start_allocations = kit_memory_allocations(); + // KIT_ALLOC_SET_LOG(1); // Turn off when done + + memory = sxe_thread_malloc(8, my_free, &tracker); // Get some memory of our own + memory = sxe_thread_realloc(tracker, 16); // Reallocate it bigger + is(tracker->memory, memory, "Still tracking the memory"); + + is(result = sxe_thread_create(&thread, my_thread_main, memory, SXE_THREAD_OPTION_DEFAULTS), SXE_RETURN_OK, + "Created a thread"); + + while (!got_memory) + sxe_thread_yeild(); + + is(sxe_thread_memory_free(SXE_THREAD_MEMORY_UNUSED), 2, "Freed no memory while thread was alive"); + time_to_die = true; + is(sxe_thread_wait(thread, NULL), SXE_RETURN_OK, "Successfully waited for thread to complete"); + test_thread_memory_free(SXE_THREAD_MEMORY_UNUSED, 1, "Freed thread's memory once thread it was dead"); + is(sxe_thread_memory_free(SXE_THREAD_MEMORY_ALL), 0, "Freed my per thread memory"); + is(kit_memory_allocations(), start_allocations, "No memory was leaked"); + return exit_status(); +} diff --git a/lib-sxe-thread/test/test-sxe-thread.c b/lib-sxe-thread/test/test-sxe-thread.c new file mode 100644 index 0000000..1dfd4a5 --- /dev/null +++ b/lib-sxe-thread/test/test-sxe-thread.c @@ -0,0 +1,115 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#if SXE_DEBUG != 0 +#define LOCAL_SXE_DEBUG 1 +#endif + +#undef SXE_DEBUG /* Since we are testing diagnostic functions, the test program forces debug mode */ +#define SXE_DEBUG 1 + +#include "sxe-spinlock.h" +#include "sxe-thread.h" +#include "tap.h" + +#define TEST_YIELD_MAX 1000000 + +SXE_LOG_LEVEL test_log_level; +SXE_SPINLOCK ping; +SXE_SPINLOCK pong; +volatile unsigned thread_indent = 0; +volatile unsigned main_indent = 0; + +static void +test_log_line(SXE_LOG_LEVEL level, const char * line) +{ + char * tag; + + if ((tag = strstr(line, "thread:")) != NULL) { + thread_indent = tag - line; + } + else if ((tag = strstr(line, "main:" )) != NULL) { + main_indent = tag - line; + } + + /* Stuff unwanted diagnostics + */ + if (level >= test_log_level) { + fputs(line, stderr); + fflush(stderr); + } +} + +static SXE_THREAD_RETURN SXE_STDCALL +test_thread_main(void * lock) +{ + SXEE6("test_thread_main(lock=%p)", lock); + + SXEA1(lock == &ping, "Ping lock not passed to the thread"); + SXEA1(sxe_spinlock_take(&pong) == SXE_SPINLOCK_STATUS_TAKEN, "Pong lock not taken by thread"); + SXEA1(sxe_spinlock_take(&ping) == SXE_SPINLOCK_STATUS_TAKEN, "Ping lock not taken by thread"); + SXEL1("thread: about to pong the main thread"); + sxe_spinlock_give(&pong); + + for (;;) { + sleep(1); + } + + SXER6("return NULL"); + return (SXE_THREAD_RETURN)0; +} + +int +main(void) +{ + SXE_THREAD thread; + SXE_RETURN result; + unsigned i; + + plan_tests(7); + sxe_log_hook_line_out(test_log_line); + test_log_level = sxe_log_set_level(SXE_LOG_LEVEL_TRACE); /* Required to do indentation test */ + sxe_spinlock_construct(&ping); + sxe_spinlock_construct(&pong); + is(sxe_spinlock_take(&ping), SXE_SPINLOCK_STATUS_TAKEN, "Ping lock taken by main"); + + is((result = sxe_thread_create(&thread, test_thread_main, &ping, SXE_THREAD_OPTION_DEFAULTS)), SXE_RETURN_OK, + "Created SXE thread"); + sxe_spinlock_give(&ping); /* Allow thread to proceed */ + + /* Wait for thread_indent to be set. + */ + for (i = 0; thread_indent == 0 && i < TEST_YIELD_MAX; i++) { + SXE_YIELD(); + } + + ok(i < TEST_YIELD_MAX, "Thread log indent set to %u after %u yeilds", thread_indent, i); + SXEL1("main: about to confirm the pong from the thread"); + is(sxe_spinlock_take(&pong), SXE_SPINLOCK_STATUS_TAKEN, "Pong lock taken by main"); + ok(main_indent > 0, "Main log indent set to %u", main_indent); + ok(thread_indent > main_indent, "Thread indent is greater than main indent"); + ok(sxe_thread_get_self(), "A thread can always get its self"); + return exit_status(); +} diff --git a/lib-sxe-util/GNUmakefile b/lib-sxe-util/GNUmakefile new file mode 100644 index 0000000..5735275 --- /dev/null +++ b/lib-sxe-util/GNUmakefile @@ -0,0 +1,3 @@ +LIBRARIES = sxe-util + +include ../dependencies.mak diff --git a/lib-sxe-util/sxe-file-limit.c b/lib-sxe-util/sxe-file-limit.c new file mode 100644 index 0000000..af5c556 --- /dev/null +++ b/lib-sxe-util/sxe-file-limit.c @@ -0,0 +1,61 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#ifdef WIN32 +#include +#else +#include +#include +#endif + +#include "sxe-util.h" + +SXE_RETURN +sxe_set_file_limit(const unsigned limit) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + +#ifndef _WIN32 + struct rlimit rt; + rt.rlim_max = limit; + rt.rlim_cur = limit; +#endif + + if +#ifdef _WIN32 + (_setmaxstdio(limit) == -1) +#else + (setrlimit(RLIMIT_NOFILE, &rt) == -1) +#endif + { + SXEL7("Failed to set file limit to '%u'", limit); + goto SXE_ERROR_OUT; /* COVERAGE EXCLUSION - TODO: WIN32 COVERAGE */ + } + + result = SXE_RETURN_OK; + +SXE_ERROR_OUT: + return result; +} + diff --git a/lib-sxe-util/sxe-hex.c b/lib-sxe-util/sxe-hex.c new file mode 100644 index 0000000..2a66d0e --- /dev/null +++ b/lib-sxe-util/sxe-hex.c @@ -0,0 +1,140 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include "sxe-log.h" +#include "sxe-util.h" + +#define NA 0xff + +static unsigned char hex_character_to_nibble[] = { + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NA, NA, NA, NA, NA, NA, + NA, 10, 11, 12, 13, 14, 15, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, 10, 11, 12, 13, 14, 15, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA +}; + +static char hex_nibble_to_character[] = "0123456789abcdef"; + +SXE_RETURN +sxe_valid_hex_to_unsigned(const char * hex, unsigned hex_length_maximum, unsigned * value) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + unsigned val = 0; + unsigned i; + + SXEE7("(hex='%.*s', hex_length_maximum=%u)", hex_length_maximum, hex, hex_length_maximum); + + for (i = 0; (i < hex_length_maximum) && (hex[i] != '\0'); i++) { + if (hex_character_to_nibble[(unsigned char)(hex[i])] == NA) { + SXEL6("Encountered unexpected hex character '%c'", hex[i]); + goto SXE_EARLY_OUT; + } + + val = (val << 4) | hex_character_to_nibble[(unsigned char)(hex[i])]; + } + + *value = val; + result = SXE_RETURN_OK; + +SXE_EARLY_OUT: + SXER7("return %s", sxe_return_to_string(result)); + return result; +} + +unsigned +sxe_hex_to_unsigned(const char * hex, unsigned hex_length_maximum) +{ + unsigned result = 0; + unsigned i; + + SXEE7("(hex='%.*s', hex_length_maximum=%u)", hex_length_maximum, hex, hex_length_maximum); + + for (i = 0; (i < hex_length_maximum) && (hex[i] != '\0'); i++) { + if (hex_character_to_nibble[(unsigned char)(hex[i])] == NA) { + SXEL6("Encountered unexpected hex character '%c'", hex[i]); + result = SXE_UNSIGNED_MAXIMUM; + goto SXE_EARLY_OUT; + } + + result = (result << 4) | hex_character_to_nibble[(unsigned char)(hex[i])]; + } + +SXE_EARLY_OUT: + SXER7("return result=0x%x", result); + return result; +} + +SXE_RETURN +sxe_hex_to_bytes(unsigned char * bytes, const char * hex, unsigned hex_length) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + unsigned i; + char character; + unsigned char nibble_high; + unsigned char nibble_low; + + SXEA1((hex_length % 1) == 0, "sxe_hex_to_bytes: hex string length %u is odd", hex_length); + SXEE7("(bytes=%p,hex='%.*s',hex_length=%u)", bytes, hex_length, hex, hex_length); + + for (i = 0; i < hex_length; i++) { + if (((nibble_high = hex_character_to_nibble[(unsigned char)(character = hex[ i])]) == NA) + || ((nibble_low = hex_character_to_nibble[(unsigned char)(character = hex[++i])]) == NA)) + { + SXEL6(isprint(character) ? "%s%c'" : "%s\\x%02x'", "sxe_hex_to_bytes: Unexpected hex character '", character); + goto SXE_EARLY_OUT; + } + + bytes[(i - 1) / 2] = (unsigned char)((nibble_high << 4) | nibble_low); + } + + result = SXE_RETURN_OK; + +SXE_EARLY_OUT: + SXER7("return result=%s", sxe_return_to_string(result)); + return result; +} + +char * +sxe_hex_from_bytes(char * hex, const unsigned char * bytes, unsigned size) +{ + unsigned i; + + for (i = 0; i < size; i++) { + hex[2 * i + 0] = hex_nibble_to_character[bytes[i] >> 4]; + hex[2 * i + 1] = hex_nibble_to_character[bytes[i] & 0xF]; + } + + return hex; +} diff --git a/lib-sxe-util/sxe-mkpath.c b/lib-sxe-util/sxe-mkpath.c new file mode 100644 index 0000000..a39bd66 --- /dev/null +++ b/lib-sxe-util/sxe-mkpath.c @@ -0,0 +1,82 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* TODO: Rewrite to avoid using "system()" */ + +#include +#include +#include +#include +#include +#include + +#include "sxe-log.h" +#include "sxe-util.h" + +#ifdef _WIN32 +# define MKDIR_COMMAND "mkdir" +#else +# define MKDIR_COMMAND "mkdir -p" +#endif + +SXE_RETURN +sxe_mkpath(const char * path) +{ + SXE_RETURN result = SXE_RETURN_ERROR_INTERNAL; + char command[PATH_MAX + sizeof(MKDIR_COMMAND) + 1]; + struct stat filestat; +#ifdef _WIN32 + unsigned command_length; + unsigned i; +#endif + + SXEA1(snprintf(command, sizeof(command), MKDIR_COMMAND " %s", path) >= 0, "Failed to format 'mkdir' command"); + +#ifdef _WIN32 + /* Remove slash termination (tossers at MS don't implement stat correctly) + */ + command_length = strlen(command) - 1; + + if (command[command_length] == '/') { + command[command_length] = '\0'; /* Coverage Exclusion - todo: win32 coverage */ + } + + for (i = command_length - 1; i >= sizeof(MKDIR_COMMAND); i--) { + if (command[i] == '/') { + command[i] = '\\'; + } + } +#endif + + SXEL6("sxe_mkpath(): command '%s' '%s'", command, &command[sizeof(MKDIR_COMMAND)]); + + if (stat(&command[sizeof(MKDIR_COMMAND)], &filestat) >= 0) { + SXEL6("sxe_mkpath(): '%s' already exists", path); + result = SXE_RETURN_OK; /* COVERAGE EXCLUSION - harmless, and only happens on windows */ + goto SXE_EARLY_OUT; /* COVERAGE EXCLUSION - harmless, and only happens on windows */ + } + + if (system(command) == 0) { + result = SXE_RETURN_OK; + } +SXE_EARLY_OUT: + return result; +} diff --git a/lib-sxe-util/sxe-rot13.c b/lib-sxe-util/sxe-rot13.c new file mode 100644 index 0000000..3e63bfa --- /dev/null +++ b/lib-sxe-util/sxe-rot13.c @@ -0,0 +1,126 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include "sxe-log.h" +#include "sxe-util.h" + +unsigned char sxe_rot13_char[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, '-' , '.' , 0x2f, + '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 'N' , 'O' , 'P' , 'Q' , 'R' , 'S' , 'T' , 'U' , 'V' , 'W' , 'X' , 'Y' , 'Z' , 'A' , 'B' , + 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' , 'J' , 'K' , 'L' , 'M' , 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 'n' , 'o' , 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z' , 'a' , 'b' , + 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +static unsigned char sxe_rot13_hex_to_nibble[256] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0 - 9 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, /* A - Z */ + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x0b, /* a - z */ + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + + +/** + * Apply an in-place rot13 transformation on the provided text buffer. The transformation will stop on the first NUL character. + * + * @param string - NUL terminated string which is to be rot13 trancoded + * @param length - length of 'string' + */ +char * +sxe_strn_rot13_in_place(char * string, unsigned length) +{ + unsigned i; + + SXEE6("sxe_strn_rot13(string='%.*s', length=%u)", length, string, length); + + for (i = 0 ; (i < length) && (string[i] != '\0'); i++) { + string[i] = SXE_ROT13_CHAR(string[i]); + } + + SXER6("return string='%.*s'", length, string); + return string; +} + +char * +sxe_strn_rot13(char * buffer, const char * string, unsigned length) +{ + unsigned i; + + for (i = 0; i < length; i++) { + if (string[i] == '\0') { + buffer[i] = '\0'; + break; + } + + buffer[i] = SXE_ROT13_CHAR(string[i]); + } + + return buffer; +} + +unsigned +sxe_rot13_hex_to_unsigned(const char * text, unsigned text_length) +{ + unsigned result = 0; + unsigned i; + + SXEE6("sxe_rot13_hex_to_unsigned(text='%.*s', test_length=%u)", text_length, text, text_length); + + for (i = 0; (i < text_length) && (text[i] != '\0'); i++) { + if (sxe_rot13_hex_to_nibble[(unsigned)(text[i])] == 0xff) { + SXEL6("Encountered unexpected rot13 hex character '%c'", text[i]); + result = SXE_UNSIGNED_MAXIMUM; + goto SXE_EARLY_OUT; + } + + result = (result << 4) | sxe_rot13_hex_to_nibble[(unsigned)text[i]]; + } + +SXE_EARLY_OUT: + SXER6("return result=0x%x", result); + return result; +} + diff --git a/lib-sxe-util/sxe-stat.c b/lib-sxe-util/sxe-stat.c new file mode 100644 index 0000000..0380fd3 --- /dev/null +++ b/lib-sxe-util/sxe-stat.c @@ -0,0 +1,51 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include "sxe-util.h" + +SXE_STAT * +sxe_stat(SXE_STAT * status, const char * file) +{ + SXEE6("%s(status=%p,file=%s", __func__, status, file); + + if (stat(file, status) < 0) { + SXEL4("%s: warning: can't stat file '%s': %s", __func__, file, strerror(errno)); + status = NULL; + } + + SXER6("return status=%p // 0 on error", status); + return status; +} + +time_t +sxe_stat_get_time_modification(const SXE_STAT * status) +{ + return status != NULL ? status->st_mtime : 0; +} + +off_t +sxe_stat_get_file_size(const SXE_STAT * status) +{ + return status != NULL ? status->st_size : 0; +} diff --git a/lib-sxe-util/sxe-str-to-printable.c b/lib-sxe-util/sxe-str-to-printable.c new file mode 100644 index 0000000..ab25777 --- /dev/null +++ b/lib-sxe-util/sxe-str-to-printable.c @@ -0,0 +1,95 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "sxe-log.h" +#include "sxe-util.h" + +#define SXE_STR_PRINTABLE_SIMULTANEOUSLY 10 +#define SXE_STR_PRINTABLE_LENGTH_MAXIMUM 1020 + +static unsigned sxe_str_next = 0; +static char sxe_str_printable[SXE_STR_PRINTABLE_SIMULTANEOUSLY][SXE_STR_PRINTABLE_LENGTH_MAXIMUM + sizeof("...")]; + +/** + * Convert a string into a printable string + * + * @param str = String to convert + * + * @return str if the string is already printable or is NULL, or up to the first 1020 characters of the string converted into a + * printable form, returned in the least recently used of a set of 10 buffers. + * + * @note This function is not thread safe. Also, as it is expected to be used in logging, it does not itself log. + */ + +const char * +sxe_str_to_printable(const char * str) +{ + unsigned from; + unsigned to; + unsigned this; + + if (str == NULL) { + return NULL; + } + + for (from = 0; isprint(str[from]); from++) + /* Skip printable characters */; + + if (str[from] == '\0') { + return str; + } + + /* Note: Currently not thread safe. + */ + this = sxe_str_next; + sxe_str_next = (this + 1) % SXE_STR_PRINTABLE_SIMULTANEOUSLY; + + memcpy(sxe_str_printable[this], str, from > SXE_STR_PRINTABLE_LENGTH_MAXIMUM ? SXE_STR_PRINTABLE_LENGTH_MAXIMUM : from); + + for (to = from; from < SXE_STR_PRINTABLE_LENGTH_MAXIMUM; from++) { + if (str[from] == '\0') { + sxe_str_printable[this][to++] = '\0'; + break; + } + + if (isprint((unsigned char)str[from]) || (str[from] == ' ')) { + sxe_str_printable[this][to++] = str[from]; + continue; + } + + sxe_str_printable[this][to++] = '\\'; + sxe_str_printable[this][to++] = 'x'; + snprintf(&sxe_str_printable[this][to], 3, "%02x", (unsigned char)str[from]); + to += 2; + } + + if (sxe_str_printable[this][to - 1] != '\0') { + memcpy(&sxe_str_printable[this][SXE_STR_PRINTABLE_LENGTH_MAXIMUM], "...", sizeof("...")); + } + + return sxe_str_printable[this]; +} diff --git a/lib-sxe-util/sxe-str.c b/lib-sxe-util/sxe-str.c new file mode 100644 index 0000000..ece666a --- /dev/null +++ b/lib-sxe-util/sxe-str.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "sxe-util.h" + +char * +sxe_strnchr(const char * buf, char c, unsigned n) +{ + const char * end; + + for (end = &buf[n - 1]; buf <= end; buf++) { + if (*buf == '\0') { + break; + } + + if (*buf == c) { + return SXE_CAST_NOCONST(char *, buf); + } + } + + return NULL; +} + +/* DEPRECATED: For backward compatibility only; just call strnstr + */ +char * +sxe_strnstr(const char *buf, const char *str, unsigned n) +{ + return strnstr(buf, str, n); +} + +char * +sxe_rstrnchr(const char * buf, char c, unsigned n) +{ + const char * end; + + for (end = &buf[n - 1]; buf <= end; end--) { + if (*end == c) { + return SXE_CAST_NOCONST(char *, end); + } + } + + return NULL; +} + +char * +sxe_strncspn(const char * buf, const char * reject, unsigned n) +{ + unsigned i; + unsigned j; + + for (i = 0; (i < n) && (buf[i] != '\0'); i++) { + for (j = 0; reject[j] != '\0'; j++) { + if (buf[i] == reject[j]) { + return SXE_CAST_NOCONST(char *, &buf[i]); + } + } + } + + return NULL; +} + +char * +sxe_rstrnstr(const char * haystack, const char * needle, unsigned haystack_len) +{ + size_t needle_len = strlen(needle); // SonarQube False Positive + const char *ptr; + + for (ptr = haystack + haystack_len - needle_len; ptr >= haystack; ptr--) + if (memcmp(ptr, needle, needle_len) == 0) { + return SXE_CAST_NOCONST(char *, ptr); + } + + return NULL; +} + +char * +sxe_strncasestr(const char * buf, const char * str, unsigned n) +{ + size_t length = strlen(str); // SonarQube False Positive + const char *end; + + if (n < length) + return NULL; + + for (end = &buf[n - length]; buf <= end; buf++) { + if (*buf == '\0') + break; + + if (strncasecmp(buf, str, length) == 0) + return SXE_CAST_NOCONST(char *, buf); + } + + return NULL; +} diff --git a/lib-sxe-util/sxe-uint64.c b/lib-sxe-util/sxe-uint64.c new file mode 100644 index 0000000..9c6c36b --- /dev/null +++ b/lib-sxe-util/sxe-uint64.c @@ -0,0 +1,52 @@ +/* Copyright (c) 2022 Jim Belton + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "sxe-util.h" + +/** + * Base 2 logarithm of a uint64_t as an unsigned int + * + * @note This implementation is non-portable. See sxe-unsigned for a portable version for unsigned ints + */ +unsigned +sxe_uint64_log2(uint64_t value) +{ + return 63 - __builtin_clzll(value); +} + +uint64_t +sxe_uint64_align(uint64_t value, uint64_t multiple) +{ + uint64_t mod; + + if (multiple & (multiple - 1)) + return (mod = value % multiple) ? value - mod + multiple : value; + + return (mod = value & (multiple - 1)) ? value - mod + multiple : value; +} + +int +sxe_uint64_cmp(uint64_t left, uint64_t right) +{ + return left < right ? -1 : left == right ? 0 : 1; +} diff --git a/lib-sxe-util/sxe-unsigned.c b/lib-sxe-util/sxe-unsigned.c new file mode 100644 index 0000000..bcb79e2 --- /dev/null +++ b/lib-sxe-util/sxe-unsigned.c @@ -0,0 +1,72 @@ +/* Copyright 2010 Sophos Limited. All rights reserved. Sophos is a registered + * trademark of Sophos Limited. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-util.h" + +/* Log2 table for all byte values. Each is stored in a nibble to reduce cache thrash. + */ +static unsigned sxe_log2_table[] = { + 0x22221100, 0x33333333, 0x44444444, 0x44444444, 0x55555555, 0x55555555, 0x55555555, 0x55555555, + 0x66666666, 0x66666666, 0x66666666, 0x66666666, 0x66666666, 0x66666666, 0x66666666, 0x66666666, + 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, + 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777, 0x77777777 +}; + +static unsigned sxe_mask_table[] = +{ + 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x00000003F, 0x0000007F, 0x000000FF, + 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x000003FFF, 0x00007FFF, 0x0000FFFF, + 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x0003FFFFF, 0x007FFFFF, 0x00FFFFFF, + 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x03FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, +}; + +unsigned +sxe_unsigned_log2(unsigned number) +{ + unsigned shift = 0; + + if (number > 0xFFFF) { + if (number > 0xFFFFFF) { + shift = 24; + } + else { + shift = 16; + } + } + else if (number > 0xFF) { + shift = 8; + } + else { + SXEL7("number %u, number >> 3 = %u, sxe_log2_table[number >> 3] = %08x", number, number >> 3, sxe_log2_table[number >> 3]); + SXEL7("number & 0x7 = %u, (number & 0xF) * 4 = %u, sxe_log2_table[number >> 3] >> ((number & 0x7) * 4) = %08x", number & 0x7, (number & 0xF) * 4, sxe_log2_table[number >> 3] >> ((number & 0xF) * 4)); + return (sxe_log2_table[number >> 3] >> ((number & 0x7) * 4)) & 0xF; + } + + number >>= shift; + return ((sxe_log2_table[number >> 3] >> ((number & 0x7) * 4)) & 0xF) + shift; +} + +unsigned +sxe_unsigned_mask(unsigned number) +{ + return sxe_mask_table[sxe_unsigned_log2(number)]; +} diff --git a/lib-sxe-util/sxe-util.h b/lib-sxe-util/sxe-util.h new file mode 100644 index 0000000..a0585c1 --- /dev/null +++ b/lib-sxe-util/sxe-util.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef __SXE_UTIL_H__ +#define __SXE_UTIL_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include "sxe-log.h" + +#define SXE_LITERAL_LENGTH(literal) (sizeof(literal "") - 1) /* The empty string prevents the caller passing a non-literal */ +#define SXE_UNSIGNED_MAXIMUM (~0U) +#define SXE_BIT_OPTION(number) (1 << number) + +#define SXE_CAST(type, ptr) ((type)(uintptr_t)(ptr)) +#define SXE_CAST_NOCONST(type, ptr) SXE_CAST(type, ptr) + +/* Fun macros for relocatable data structures + */ +#define SXE_PTR_FIX(base, type, ptr_rel) ((type)(void *)((char *)(ptr_rel) + (size_t)(base))) +#define SXE_PTR_REL(base, type, ptr_fix) ((type)(void *)((char *)(ptr_fix) - (size_t)(base))) + +#define SXE_ROT13_CHAR(character) (sxe_rot13_char[(unsigned char)(character)]) + +typedef struct SXE_SHA1 { + uint32_t word[5]; +} SXE_SHA1; + +extern unsigned char sxe_rot13_char[]; + +#define SXE_BOOL_TO_STR(bool_val) (bool_val) ? "true" : "false" + +/* Definitions for sxe-stat (wierd formatting due to requirement that our structure tags are upper case + */ +typedef struct \ + stat SXE_STAT; + +#include "lib-sxe-util-proto.h" + +#endif diff --git a/lib-sxe-util/test/test-sxe-file-limit.c b/lib-sxe-util/test/test-sxe-file-limit.c new file mode 100644 index 0000000..ea662fe --- /dev/null +++ b/lib-sxe-util/test/test-sxe-file-limit.c @@ -0,0 +1,45 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include "sxe-util.h" +#include "tap.h" + +int +main(void) +{ + plan_tests(2); + + is(sxe_set_file_limit(200), SXE_RETURN_OK, "Succesfully set the file limit"); + +#ifdef _WIN32 + is(1, 1, "A bad file limit on windows causes an abort."); +#else + is(sxe_set_file_limit(-1U), SXE_RETURN_ERROR_INTERNAL, "Succesfully failed to set the file limit"); +#endif + + return exit_status(); +} + + diff --git a/lib-sxe-util/test/test-sxe-hex.c b/lib-sxe-util/test/test-sxe-hex.c new file mode 100644 index 0000000..b1ce392 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-hex.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "sxe-util.h" +#include "sxe-log.h" +#include "tap.h" + +#define SHA1_HEX "2ce679528627da7780f8a4fec07cb34f902468a0" + +typedef struct TEST_SHA1_STRUCT { + unsigned word[5]; +} TEST_SHA1; + +static unsigned char sxe_sha1_expected_bytes[] = {0x2c, 0xe6, 0x79, 0x52, 0x86, 0x27, 0xda, 0x77, 0x80, 0xf8, + 0xa4, 0xfe, 0xc0, 0x7c, 0xb3, 0x4f, 0x90, 0x24, 0x68, 0xa0}; + +int +main(void) +{ + TEST_SHA1 sha1_expected; + TEST_SHA1 sha1_got; + char hex_buffer[sizeof(SHA1_HEX)]; + unsigned val = 0; + char invalid_hex[2] = {-50, -1}; + + plan_tests(17); + is(sxe_hex_to_unsigned("0", 2), 0, "'0':2 -> 0"); + is(sxe_hex_to_unsigned("face", 4), 0xface, "'face':4 -> 0xface"); + is(sxe_hex_to_unsigned("B00B", 2), 0xb0, "'B00B':2 -> 0xb0"); + is(sxe_hex_to_unsigned("XXXX", 4), SXE_UNSIGNED_MAXIMUM, "'XXXX':4 -> 0x%x (SXE_UNSIGNED_MAXIMUM)", + sxe_hex_to_unsigned("XXXX", 4)); + is(sxe_hex_to_unsigned(invalid_hex, 2), SXE_UNSIGNED_MAXIMUM, "invalid_hex -> SXE_UNSIGNED_MAXIMUM"); + + is(sxe_valid_hex_to_unsigned("0a", 2, &val), SXE_RETURN_OK, "0a is valid hex"); + is(val, 10, "0a hex is 10 decimal"); + is(sxe_valid_hex_to_unsigned("F45C6AC6", 8, &val), SXE_RETURN_OK, "F45C6AC6 is valid hex"); + is(val, 4099697350, "F45C6AC6 hex is 4099697350 decimal"); + is(sxe_valid_hex_to_unsigned("ZZ", 2, &val), SXE_RETURN_ERROR_INTERNAL, "ZZ is not valid hex"); + is(sxe_valid_hex_to_unsigned(invalid_hex, 2, &val), SXE_RETURN_ERROR_INTERNAL, "invalid_hex -> SXE_RETURN_ERROR_INTERNAL"); + + ok(sxe_hex_to_bytes((unsigned char *)&sha1_got, "goofy goober", 12) != SXE_RETURN_OK, "Conversion from hex 'goofy goober' to bytes failed"); + is(sxe_hex_to_bytes((unsigned char *)&sha1_got, SHA1_HEX, 40), SXE_RETURN_OK, "Conversion from hex '%s' to bytes succeeded", SHA1_HEX); + + memcpy(&sha1_expected, sxe_sha1_expected_bytes, sizeof(sha1_expected)); + + if (memcmp(&sha1_got, &sha1_expected, sizeof(TEST_SHA1)) == 0) { + pass( "bytes are as expected"); + } + else { + SXEL1("Expected:"); + SXED1(&sha1_expected, sizeof(sha1_expected)); + SXEL1("Got:"); + SXED1(&sha1_got, sizeof(sha1_got)); + fail( "bytes are not as expected"); + } + + tap_test_case_name("sxe_hex_from_bytes"); + hex_buffer[sizeof(hex_buffer) - 1] = 0xBE; + is(sxe_hex_from_bytes(hex_buffer, sxe_sha1_expected_bytes, sizeof(sxe_sha1_expected_bytes)), hex_buffer, "Returns hex buffer"); + is_strncmp(hex_buffer, SHA1_HEX, SXE_LITERAL_LENGTH(SHA1_HEX), "SHA1 converted to hex as expected"); + is((unsigned char)hex_buffer[sizeof(hex_buffer) - 1], 0xBE, "Guard byte is intact"); + return exit_status(); +} diff --git a/lib-sxe-util/test/test-sxe-mkpath.c b/lib-sxe-util/test/test-sxe-mkpath.c new file mode 100644 index 0000000..8ccd181 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-mkpath.c @@ -0,0 +1,44 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +#include "sxe-log.h" +#include "sxe-util.h" +#include "tap.h" + +#define TEST_DIR "test-directory" +#define TEST_PATH TEST_DIR "/test-subdirectory" +int +main(void) +{ + struct stat status; + + plan_tests(2); + + SXEA1((system("rm -rf " TEST_DIR) == 0) || (stat(TEST_DIR, &status) < 0), "Couldn't remove " TEST_DIR); + is(sxe_mkpath(TEST_PATH), SXE_RETURN_OK, "sxe_mkpath succeeded"); + ok(stat(TEST_PATH, &status) >= 0, "sxe_mkpath created " TEST_PATH); + return exit_status(); +} + + diff --git a/lib-sxe-util/test/test-sxe-rot13.c b/lib-sxe-util/test/test-sxe-rot13.c new file mode 100644 index 0000000..262e114 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-rot13.c @@ -0,0 +1,64 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "sxe-util.h" +#include "tap.h" + +int +main(void) +{ + char string[] = "0.AMNZ-amnz/"; + char buffer[] = "XXXXXXXXXXXXX"; + unsigned i; + + plan_tests(11); + is_eq(sxe_strn_rot13_in_place(string, strlen(string)), "0.NZAM-nzam/", "ROT13 of '0.AMNZ-amnz/' is '0.NZAM-nzam/'" ); + is_eq(sxe_strn_rot13_in_place(string, 6), "0.AMNZ-nzam/", "ROT13 of '0.NZAM-nzam/:6 is '0.AMNZ-nzam/'" ); + is_eq(sxe_strn_rot13(buffer, string, strlen(string)), "0.NZAM-amnz/X", "ROT13 of '0.AMNZ-nzam/':12 is '0.NZAM-amnz/X'"); + is_eq(sxe_strn_rot13(buffer, string, 32), "0.NZAM-amnz/", "ROT13 of '0.AMNZ-nzam/':32 is '0.NZAM-amnz/\\0'"); + is(sxe_strn_rot13_in_place(string, strlen(string)), string, "sxe_strn_rot13_in_place is in place" ); + is(sxe_strn_rot13(buffer, string, strlen(string)), buffer, "sxe_strn_rot13 is a copy" ); + is(sxe_rot13_hex_to_unsigned("0", 2), 0, "'0':2 -> 0"); + is(sxe_rot13_hex_to_unsigned("snpr", 4), 0xface, "'snpr':4 -> 0xface"); + is(sxe_rot13_hex_to_unsigned("O00O", 2), 0xb0, "'O00O':2 -> 0xb0"); + is(sxe_rot13_hex_to_unsigned("XXXX", 4), SXE_UNSIGNED_MAXIMUM, "'XXXX':4 -> 0x%x (SXE_UNSIGNED_MAXIMUM)", + sxe_rot13_hex_to_unsigned("XXXX", 4)); + + for (i = 0; i < 256; i++) { + if ((('a' <= i) && (i <= 'm')) || (('A' <= i) && (i <= 'M'))) { + if (SXE_ROT13_CHAR(i) != i + 13) { + break; + } + } + else if ((('n' <= i) && (i <= 'z')) || (('N' <= i) && (i <= 'Z'))) { + if (SXE_ROT13_CHAR(i) != i - 13) { + break; + } + } + else if (SXE_ROT13_CHAR(i) != i) { + break; + } + } + + is(i, 256, "All 256 characters are rot13 encoded correctly"); + return exit_status(); +} diff --git a/lib-sxe-util/test/test-sxe-stat.c b/lib-sxe-util/test/test-sxe-stat.c new file mode 100644 index 0000000..32265da --- /dev/null +++ b/lib-sxe-util/test/test-sxe-stat.c @@ -0,0 +1,63 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "sxe-log.h" +#include "sxe-util.h" +#include "tap.h" + +#define TEST_FILE "test-sxe-stat-file" + +int +main(void) +{ + SXE_STAT status; + FILE * file_ptr; + time_t current_time; + + plan_tests(7); + time(¤t_time); + unlink(TEST_FILE); + + is(sxe_stat(&status, TEST_FILE), NULL, "After removal, correctly can't stat '" TEST_FILE "'"); + is(sxe_stat_get_time_modification(NULL), 0, "Modification time of NULL SXE_STAT is 0"); + SXEA11((file_ptr = fopen(TEST_FILE, "w")) != NULL, "Can't create '" TEST_FILE "': %s", strerror(errno)); + fclose(file_ptr); + is(sxe_stat(&status, TEST_FILE), &status, "After creation, able to stat '" TEST_FILE "'"); + ok(sxe_stat_get_time_modification(&status) >= current_time, "Modification time of file is %lu (program time %lu)", + sxe_stat_get_time_modification(&status), current_time); + is(sxe_stat_get_file_size(&status), 0, "It's an empty file, 0 bytes"); + SXEA11((file_ptr = fopen(TEST_FILE, "w")) != NULL, "Can't create '" TEST_FILE "': %s", strerror(errno)); + fwrite("foobar", 1, 6, file_ptr); + fclose(file_ptr); + is(sxe_stat(&status, TEST_FILE), &status, "After write, able to stat '" TEST_FILE "' again"); + is(sxe_stat_get_file_size(&status), 6, "It's not empty any more, file is 6 bytes now"); + + unlink(TEST_FILE); + return exit_status(); +} + + diff --git a/lib-sxe-util/test/test-sxe-str.c b/lib-sxe-util/test/test-sxe-str.c new file mode 100644 index 0000000..39b36f9 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-str.c @@ -0,0 +1,71 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sxe-util.h" +#include "tap.h" + +char foobar[] = "foobarino"; +char FOObar[] = "FOObar"; +char abc [] = "abcabcdabcdeabcdef"; + +int +main(void) +{ + const char * needle; + int length; + + plan_tests(23); + is( sxe_strnchr(foobar, 'b', 6), &foobar[3], "Found 'b' in 'foobarino':6"); + is( sxe_strnchr(foobar, 'b', 2), NULL, "Did not find 'b' in 'foobarino':2"); + is( sxe_strnchr(foobar, 'x', 10), NULL, "Did not find 'x' in 'foobarino':10"); + is( sxe_strnchr(foobar, 'o', 3), &foobar[1], "Found first 'o' in 'foobarino':3"); + is( sxe_rstrnchr(foobar, 'o', 3), &foobar[2], "Found last 'o' in 'foobarino':3"); + is( sxe_rstrnchr(foobar, 'x', 3), NULL, "Did not find 'x' in 'foobar':3"); + is( sxe_strnstr(foobar, "foo", 6), &foobar[0], "Found 'foo' in 'foobarino':6"); + is( sxe_strnstr(foobar, "bar", 6), &foobar[3], "Found 'bar' in 'foobarino':6"); + is( sxe_strnstr(foobar, "foo", 2), NULL, "Did not find 'foo' in 'foobarino':2"); + is( sxe_strnstr(foobar, "rino", 6), NULL, "Did not find 'rino' in 'foobarino':6"); + is( sxe_strnstr(foobar, "gorp", 13), NULL, "Did not find 'gorp' in 'foobarino':13"); + is(sxe_strncasestr(FOObar, "foo", 6), &FOObar[0], "Found 'foo' in 'FOObar':6"); + is(sxe_strncasestr(FOObar, "foo", 2), NULL, "Did not find 'foo' in 'FOObar':2"); + is(sxe_strncasestr(FOObar, "gorp", 10), NULL, "Did not find 'gorp' in 'FOObar':10"); + is( sxe_strncspn(foobar, "=& ", 10), NULL, "Did not find '=', '&' or ' ' in 'foobarino'"); + is( sxe_strncspn(foobar, "ni", 10), &foobar[6], "Found 'n' or 'i' in 'foobarino':10 at character 6"); + is( sxe_strncspn(foobar, "ni", 6), NULL, "Didn't find 'n' or 'i' in 'foobarino':6"); + + needle = "abc"; + length = 3; + is(sxe_rstrnstr(abc, needle, length), &abc[0], "Found the last occurrence of '%s' in '%s' :%d", needle, abc, length); + length = 7; + is(sxe_rstrnstr(abc, needle, length), &abc[3], "Found the last occurrence of '%s' in '%s' :%d", needle, abc, length); + length = 12; + is(sxe_rstrnstr(abc, needle, length), &abc[7], "Found the last occurrence of '%s' in '%s' :%d", needle, abc, length); + length = 18; + is(sxe_rstrnstr(abc, needle, length), &abc[12], "Found the last occurrence of '%s' in '%s' :%d", needle, abc, length); + needle = "foo"; + is(sxe_rstrnstr(abc, needle, length), NULL, "Didn't find '%s' in '%s' :%d", needle, abc, length); + needle = ""; + is(sxe_rstrnstr(abc, needle, length), &abc[length], "Found the empty string at the end of the haystack"); + + return exit_status(); +} + + diff --git a/lib-sxe-util/test/test-sxe-to-printable.c b/lib-sxe-util/test/test-sxe-to-printable.c new file mode 100644 index 0000000..3af7c60 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-to-printable.c @@ -0,0 +1,55 @@ +/* Copyright (c) 2010 Sophos Group. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "sxe-util.h" +#include "sxe-log.h" +#include "tap.h" + +static char hello_world[] = "hello, world"; +static char very_long_string[] = +"\n12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" +"01234567890123456789012345678901234567890123456789012345678901234"; + +int +main(void) +{ + const char * str; + + plan_tests(6); + + is(sxe_str_to_printable(NULL), NULL, "NULL in, NULL out"); + is(sxe_str_to_printable(hello_world), hello_world, "'hello, world' is already printable"); + is_eq(sxe_str_to_printable("\n\t \x7F\x80"), "\\x0a\\x09 \\x7f\\x80", "Printable version of '\\n\\t \\x7F\\x80' is '\\x0a\\x09 \\x7f\\x80"); + ok((str = sxe_str_to_printable(very_long_string)) != very_long_string, "Very long string is not already printable"); + is(strlen(str), 1023, "Very long printable string truncated at expected length"); + is_eq(&str[1020], "...", "Truncated printable string ends with ..."); + + return exit_status(); +} diff --git a/lib-sxe-util/test/test-sxe-uint64.c b/lib-sxe-util/test/test-sxe-uint64.c new file mode 100644 index 0000000..e63aaa4 --- /dev/null +++ b/lib-sxe-util/test/test-sxe-uint64.c @@ -0,0 +1,28 @@ +#include "sxe-util.h" +#include "tap.h" + +int +main(void) +{ + plan_tests(14); + + is(sxe_uint64_log2(1), 0, "sxe_uint64_log2(1) == 0"); + is(sxe_uint64_log2(2), 1, "sxe_uint64_log2(2) == 1"); + is(sxe_uint64_log2(3), 1, "sxe_uint64_log2(3) == 1"); + is(sxe_uint64_log2(4), 2, "sxe_uint64_log2(4) == 2"); + is(sxe_uint64_log2(128), 7, "sxe_uint64_log2(128) == 7"); + is(sxe_uint64_log2(256), 8, "sxe_uint64_log2(256) == 0"); + is(sxe_uint64_log2(0x10000), 16, "sxe_uint64_log2(0x10000) == 16"); + is(sxe_uint64_log2(0x1000000), 24, "sxe_uint64_log2(0x1000000) == 24"); + is(sxe_uint64_log2(0xFFFFFFFFFFFFFFFFULL), 63, "sxe_uint64_log2((0xFFFFFFFFFFFFFFFFULL) == 31"); + + is(sxe_uint64_align(7, 9), 9, "Align on an odd multiple"); + is(sxe_uint64_align(1, 4096), 4096, "Align on a page boundary"); + is(sxe_uint64_align(8192, 4096), 8192, "Already aligned on a page boundary"); + is(sxe_uint64_align(0xEFFFFFFFFFFFFFFFULL, 4096), 0xF000000000000000ULL, + "(sxe_uint64_align(0xEFFFFFFFFFFFFFFFULL, 4096) == 0xF000000000000000ULL"); + + is(sxe_uint64_cmp(0, 1), -1, "O cmp 1 == -1"); + + return exit_status(); +} diff --git a/lib-sxe-util/test/test-sxe-unsigned.c b/lib-sxe-util/test/test-sxe-unsigned.c new file mode 100644 index 0000000..e896b3d --- /dev/null +++ b/lib-sxe-util/test/test-sxe-unsigned.c @@ -0,0 +1,28 @@ +#include "sxe-util.h" +#include "tap.h" + +int +main(void) +{ + plan_tests(16); + + is(sxe_unsigned_log2(1), 0, "sxe_log2_unsigned(1) == 0"); + is(sxe_unsigned_log2(2), 1, "sxe_log2_unsigned(2) == 1"); + is(sxe_unsigned_log2(3), 1, "sxe_log2_unsigned(3) == 1"); + is(sxe_unsigned_log2(4), 2, "sxe_log2_unsigned(4) == 2"); + is(sxe_unsigned_log2(128), 7, "sxe_log2_unsigned(128) == 7"); + is(sxe_unsigned_log2(256), 8, "sxe_log2_unsigned(256) == 0"); + is(sxe_unsigned_log2(0x10000), 16, "sxe_log2_unsigned(0x10000) == 16"); + is(sxe_unsigned_log2(0x1000000), 24, "sxe_log2_unsigned(0x1000000) == 24"); + + is(sxe_unsigned_mask(1), 1, "sxe_unsigned_mask(1) == 1"); + is(sxe_unsigned_mask(2), 3, "sxe_unsigned_mask(2) == 3"); + is(sxe_unsigned_mask(3), 3, "sxe_unsigned_mask(3) == 3"); + is(sxe_unsigned_mask(255), 255, "sxe_unsigned_mask(255) == 255"); + is(sxe_unsigned_mask(256), 511, "sxe_unsigned_mask(256) == 511"); + is(sxe_unsigned_mask(511), 511, "sxe_unsigned_mask(511) == 511"); + is(sxe_unsigned_mask(0xFFFF), 0xFFFF, "sxe_unsigned_mask(0xFFFF) == 0xFFFF"); + is(sxe_unsigned_mask(0x1000000), 0x1FFFFFF, "sxe_unsigned_mask(0x1000000) == 0x1FFFFFF"); + + return exit_status(); +} diff --git a/lib-tzcode/CONTRIBUTING b/lib-tzcode/CONTRIBUTING new file mode 100644 index 0000000..6d800e4 --- /dev/null +++ b/lib-tzcode/CONTRIBUTING @@ -0,0 +1,97 @@ +# Contributing to the tz code and data + +Please do not create issues or pull requests on GitHub, as the +proper procedure for proposing and distributing patches is via +email as described below. + +The time zone database is by no means authoritative: governments +change timekeeping rules erratically and sometimes with little +warning, the data entries do not cover all of civil time before +1970, and undoubtedly errors remain in the code and data. Feel +free to fill gaps or fix mistakes, and please email improvements +to for use in the future. In your email, please give +reliable sources that reviewers can check. + +## Contributing technical changes + +To email small changes, please run a POSIX shell command like +'diff -u old/europe new/europe >myfix.patch', and attach +'myfix.patch' to the email. + +For more-elaborate or possibly controversial changes, +such as renaming, adding or removing zones, please read +"Theory and pragmatics of the tz code and data" +. +It is also good to browse the mailing list archives + for examples of patches that tend +to work well. Additions to data should contain commentary citing +reliable sources as justification. Citations should use "https:" URLs +if available. + +For changes that fix sensitive security-related bugs, please see the +distribution's 'SECURITY' file. + +Please submit changes against either the latest release + or the main branch of the development +repository. The latter is preferred. + +## Sample Git workflow for developing contributions + +If you use Git the following workflow may be helpful: + + * Copy the development repository. + + git clone https://github.com/eggert/tz.git + cd tz + + * Get current with the main branch. + + git checkout main + git pull + + * Switch to a new branch for the changes. Choose a different + branch name for each change set. + + git checkout -b mybranch + + * Sleuth by using 'git blame'. For example, when fixing data for + Africa/Sao_Tome, if the command 'git blame africa' outputs a line + '2951fa3b (Paul Eggert 2018-01-08 09:03:13 -0800 1068) Zone + Africa/Sao_Tome 0:26:56 - LMT 1884', commit 2951fa3b should + provide some justification for the 'Zone Africa/Sao_Tome' line. + + * Edit source files. Include commentary that justifies the + changes by citing reliable sources. + + * Debug the changes, e.g.: + + make check + make install + ./zdump -v America/Los_Angeles + + * For each separable change, commit it in the new branch, e.g.: + + git add northamerica + git commit + + See recent 'git log' output for the commit-message style. + + * Create patch files 0001-..., 0002-..., ... + + git format-patch main + + * After reviewing the patch files, send the patches to + for others to review. + + git send-email main + + For an archived example of such an email, see + "[PROPOSED] Fix off-by-1 error for Jamaica and T&C before 1913" + . + + * Start anew by getting current with the main branch again + (the second step above). + +----- + +This file is in the public domain. diff --git a/lib-tzcode/LICENSE b/lib-tzcode/LICENSE new file mode 100644 index 0000000..8ba4399 --- /dev/null +++ b/lib-tzcode/LICENSE @@ -0,0 +1,5 @@ +Unless specified below, all files in the tz code and data (including +this LICENSE file) are in the public domain. + +If the files date.c, newstrftime.3, and strftime.c are present, they +contain material derived from BSD and use the BSD 3-clause license. diff --git a/lib-tzcode/Makefile b/lib-tzcode/Makefile new file mode 100644 index 0000000..2349adb --- /dev/null +++ b/lib-tzcode/Makefile @@ -0,0 +1,1282 @@ +# Make and install tzdb code and data. + +# This file is in the public domain, so clarified as of +# 2009-05-17 by Arthur David Olson. + +# Package name for the code distribution. +PACKAGE= tzcode + +# Version number for the distribution, overridden in the 'tarballs' rule below. +VERSION= unknown + +# Email address for bug reports. +BUGEMAIL= tz@iana.org + +# DATAFORM selects the data format. +# Available formats represent essentially the same data, albeit +# possibly with minor discrepancies that users are not likely to notice. +# To get new features and the best data right away, use: +# DATAFORM= vanguard +# To wait a while before using new features, to give downstream users +# time to upgrade zic (the default), use: +# DATAFORM= main +# To wait even longer for new features, use: +# DATAFORM= rearguard +# Rearguard users might also want "ZFLAGS = -b fat"; see below. +DATAFORM= main + +# Change the line below for your timezone (after finding the one you want in +# one of the $(TDATA) source files, or adding it to a source file). +# Alternatively, if you discover you've got the wrong timezone, you can just +# 'zic -l -' to remove it, or 'zic -l rightzone' to change it. +# Use the command +# make zonenames +# to get a list of the values you can use for LOCALTIME. + +LOCALTIME= Factory + +# The POSIXRULES macro controls interpretation of POSIX-like TZ +# settings like TZ='EET-2EEST' that lack DST transition rules. +# If POSIXRULES is '-', no template is installed; this is the default. +# Any other value for POSIXRULES is obsolete and should not be relied on, as: +# * It does not work correctly in popular implementations such as GNU/Linux. +# * It does not work even in tzcode, except for historical timestamps +# that precede the last explicit transition in the POSIXRULES file. +# Hence it typically does not work for current and future timestamps. +# If, despite the above, you want a template for handling these settings, +# you can change the line below (after finding the timezone you want in the +# one of the $(TDATA) source files, or adding it to a source file). +# Alternatively, if you discover you've got the wrong timezone, you can just +# 'zic -p -' to remove it, or 'zic -p rightzone' to change it. +# Use the command +# make zonenames +# to get a list of the values you can use for POSIXRULES. + +POSIXRULES= - + +# Also see TZDEFRULESTRING below, which takes effect only +# if POSIXRULES is '-' or if the template file cannot be accessed. + + +# Installation locations. +# +# The defaults are suitable for Debian, except that if REDO is +# posix_right or right_posix then files that Debian puts under +# /usr/share/zoneinfo/posix and /usr/share/zoneinfo/right are instead +# put under /usr/share/zoneinfo-posix and /usr/share/zoneinfo-leaps, +# respectively. Problems with the Debian approach are discussed in +# the commentary for the right_posix rule (below). + +# Destination directory, which can be used for staging. +# 'make DESTDIR=/stage install' installs under /stage (e.g., to +# /stage/etc/localtime instead of to /etc/localtime). Files under +# /stage are not intended to work as-is, but can be copied by hand to +# the root directory later. If DESTDIR is empty, 'make install' does +# not stage, but installs directly into production locations. +DESTDIR = + +# Everything is installed into subdirectories of TOPDIR, and used there. +# TOPDIR should be empty (meaning the root directory), +# or a directory name that does not end in "/". +# TOPDIR should be empty or an absolute name unless you're just testing. +TOPDIR = + +# The default local timezone is taken from the file TZDEFAULT. +TZDEFAULT = $(TOPDIR)/etc/localtime + +# The subdirectory containing installed program and data files, and +# likewise for installed files that can be shared among architectures. +# These should be relative file names. +USRDIR = usr +USRSHAREDIR = $(USRDIR)/share + +# "Compiled" timezone information is placed in the "TZDIR" directory +# (and subdirectories). +# TZDIR_BASENAME should not contain "/" and should not be ".", ".." or empty. +TZDIR_BASENAME= zoneinfo +TZDIR = $(TOPDIR)/$(USRSHAREDIR)/$(TZDIR_BASENAME) + +# The "tzselect" and (if you do "make INSTALL") "date" commands go in: +BINDIR = $(TOPDIR)/$(USRDIR)/bin + +# The "zdump" command goes in: +ZDUMPDIR = $(BINDIR) + +# The "zic" command goes in: +ZICDIR = $(TOPDIR)/$(USRDIR)/sbin + +# Manual pages go in subdirectories of. . . +MANDIR = $(TOPDIR)/$(USRSHAREDIR)/man + +# Library functions are put in an archive in LIBDIR. +LIBDIR = $(TOPDIR)/$(USRDIR)/lib + + +# Types to try, as an alternative to time_t. +TIME_T_ALTERNATIVES = $(TIME_T_ALTERNATIVES_HEAD) $(TIME_T_ALTERNATIVES_TAIL) +TIME_T_ALTERNATIVES_HEAD = int_least64_t +TIME_T_ALTERNATIVES_TAIL = int_least32_t uint_least32_t uint_least64_t + +# What kind of TZif data files to generate. (TZif is the binary time +# zone data format that zic generates; see Internet RFC 8536.) +# If you want only POSIX time, with time values interpreted as +# seconds since the epoch (not counting leap seconds), use +# REDO= posix_only +# below. If you want only "right" time, with values interpreted +# as seconds since the epoch (counting leap seconds), use +# REDO= right_only +# below. If you want both sets of data available, with leap seconds not +# counted normally, use +# REDO= posix_right +# below. If you want both sets of data available, with leap seconds counted +# normally, use +# REDO= right_posix +# below. POSIX mandates that leap seconds not be counted; for compatibility +# with it, use "posix_only" or "posix_right". Use POSIX time on systems with +# leap smearing; this can work better than unsmeared "right" time with +# applications that are not leap second aware, and is closer to unsmeared +# "right" time than unsmeared POSIX time is (e.g., 0.5 vs 1.0 s max error). + +REDO= posix_right + +# Whether to put an "Expires" line in the leapseconds file. +# Use EXPIRES_LINE=1 to put the line in, 0 to omit it. +# The EXPIRES_LINE value matters only if REDO's value contains "right". +# If you change EXPIRES_LINE, remove the leapseconds file before running "make". +# zic's support for the Expires line was introduced in tzdb 2020a, +# and was modified in tzdb 2021b to generate version 4 TZif files. +# EXPIRES_LINE defaults to 0 for now so that the leapseconds file +# can be given to pre-2020a zic implementations and so that TZif files +# built by newer zic implementations can be read by pre-2021b libraries. +EXPIRES_LINE= 0 + +# To install data in text form that has all the information of the TZif data, +# (optionally incorporating leap second information), use +# TZDATA_TEXT= tzdata.zi leapseconds +# To install text data without leap second information (e.g., because +# REDO='posix_only'), use +# TZDATA_TEXT= tzdata.zi +# To avoid installing text data, use +# TZDATA_TEXT= + +TZDATA_TEXT= leapseconds tzdata.zi + +# For backward-compatibility links for old zone names, use +# BACKWARD= backward +# To omit these links, use +# BACKWARD= + +BACKWARD= backward + +# If you want out-of-scope and often-wrong data from the file 'backzone', +# but only for entries listed in the backward-compatibility file zone.tab, use +# PACKRATDATA= backzone +# PACKRATLIST= zone.tab +# If you want all the 'backzone' data, use +# PACKRATDATA= backzone +# PACKRATLIST= +# To omit this data, use +# PACKRATDATA= +# PACKRATLIST= + +PACKRATDATA= +PACKRATLIST= + +# The name of a locale using the UTF-8 encoding, used during self-tests. +# The tests are skipped if the name does not appear to work on this system. + +UTF8_LOCALE= en_US.utf8 + +# Non-default libraries needed to link. +# On some hosts, this should have -lintl unless CFLAGS has -DHAVE_GETTEXT=0. +LDLIBS= + +# Add the following to the end of the "CFLAGS=" line as needed to override +# defaults specified in the source code. "-DFOO" is equivalent to "-DFOO=1". +# -DDEPRECATE_TWO_DIGIT_YEARS for optional runtime warnings about strftime +# formats that generate only the last two digits of year numbers +# -DEPOCH_LOCAL if the 'time' function returns local time not UT +# -DEPOCH_OFFSET=N if the 'time' function returns a value N greater +# than what POSIX specifies, assuming local time is UT. +# For example, N is 252460800 on AmigaOS. +# -DHAVE_DECL_ASCTIME_R=0 if does not declare asctime_r +# -DHAVE_DECL_ENVIRON if declares 'environ' +# -DHAVE_DECL_TIMEGM=0 if does not declare timegm +# -DHAVE_DIRECT_H if mkdir needs (MS-Windows) +# -DHAVE__GENERIC=0 if _Generic does not work* +# -DHAVE_GETRANDOM if getrandom works (e.g., GNU/Linux), +# -DHAVE_GETRANDOM=0 to avoid using getrandom +# -DHAVE_GETTEXT if gettext works (e.g., GNU/Linux, FreeBSD, Solaris), +# where LDLIBS also needs to contain -lintl on some hosts; +# -DHAVE_GETTEXT=0 to avoid using gettext +# -DHAVE_INCOMPATIBLE_CTIME_R if your system's time.h declares +# ctime_r and asctime_r incompatibly with the POSIX standard +# (Solaris when _POSIX_PTHREAD_SEMANTICS is not defined). +# -DHAVE_INTTYPES_H=0 if does not work*+ +# -DHAVE_LINK=0 if your system lacks a link function +# -DHAVE_LOCALTIME_R=0 if your system lacks a localtime_r function +# -DHAVE_LOCALTIME_RZ=0 if you do not want zdump to use localtime_rz +# localtime_rz can make zdump significantly faster, but is nonstandard. +# -DHAVE_MALLOC_ERRNO=0 if malloc etc. do not set errno on failure. +# -DHAVE_POSIX_DECLS=0 if your system's include files do not declare +# functions like 'link' or variables like 'tzname' required by POSIX +# -DHAVE_SETENV=0 if your system lacks the setenv function +# -DHAVE_SNPRINTF=0 if your system lacks the snprintf function+ +# -DHAVE_STDCKDINT_H=0 if neither nor substitutes like +# __builtin_add_overflow work* +# -DHAVE_STDINT_H=0 if does not work*+ +# -DHAVE_STRFTIME_L if declares locale_t and strftime_l +# -DHAVE_STRDUP=0 if your system lacks the strdup function +# -DHAVE_STRTOLL=0 if your system lacks the strtoll function+ +# -DHAVE_SYMLINK=0 if your system lacks the symlink function +# -DHAVE_SYS_STAT_H=0 if does not work* +# -DHAVE_TZSET=0 if your system lacks a tzset function +# -DHAVE_UNISTD_H=0 if does not work* +# -DHAVE_UTMPX_H=0 if does not work* +# -Dlocale_t=XXX if your system uses XXX instead of locale_t +# -DPORT_TO_C89 if tzcode should also run on C89 platforms+ +# -DRESERVE_STD_EXT_IDS if your platform reserves standard identifiers +# with external linkage, e.g., applications cannot define 'localtime'. +# -Dssize_t=long on hosts like MS-Windows that lack ssize_t +# -DSUPPORT_C89 if the tzcode library should support C89 callers+ +# -DSUPPRESS_TZDIR to not prepend TZDIR to file names; this has +# security implications and is not recommended for general use +# -DTHREAD_SAFE to make localtime.c thread-safe, as POSIX requires; +# not needed by the main-program tz code, which is single-threaded. +# Append other compiler flags as needed, e.g., -pthread on GNU/Linux. +# -Dtime_tz=\"T\" to use T as the time_t type, rather than the system time_t +# This is intended for internal use only; it mangles external names. +# -DTZ_DOMAIN=\"foo\" to use "foo" for gettext domain name; default is "tz" +# -DTZ_DOMAINDIR=\"/path\" to use "/path" for gettext directory; +# the default is system-supplied, typically "/usr/lib/locale" +# -DTZDEFRULESTRING=\",date/time,date/time\" to default to the specified +# DST transitions for POSIX-style TZ strings lacking them, +# in the usual case where POSIXRULES is '-'. If not specified, +# TZDEFRULESTRING defaults to US rules for future DST transitions. +# This mishandles some past timestamps, as US DST rules have changed. +# It also mishandles settings like TZ='EET-2EEST' for eastern Europe, +# as Europe and US DST rules differ. +# -DTZNAME_MAXIMUM=N to limit time zone abbreviations to N bytes (default 255) +# -DUNINIT_TRAP if reading uninitialized storage can cause problems +# other than simply getting garbage data +# -DUSE_LTZ=0 to build zdump with the system time zone library +# Also set TZDOBJS=zdump.o and CHECK_TIME_T_ALTERNATIVES= below. +# -DZIC_BLOAT_DEFAULT=\"fat\" to default zic's -b option to "fat", and +# similarly for "slim". Fat TZif files work around incompatibilities +# and bugs in some TZif readers, notably older ones that +# ignore or otherwise mishandle 64-bit data in TZif files; +# however, fat TZif files may trigger bugs in newer TZif readers. +# Slim TZif files are more efficient, and are the default. +# -DZIC_MAX_ABBR_LEN_WO_WARN=3 +# (or some other number) to set the maximum time zone abbreviation length +# that zic will accept without a warning (the default is 6) +# $(GCC_DEBUG_FLAGS) if you are using recent GCC and want lots of checking +# +# * Options marked "*" can be omitted if your compiler is C23 compatible. +# * Options marked "+" are obsolescent and are planned to be removed +# once the code assumes C99 or later. +# +# Select instrumentation via "make GCC_INSTRUMENT='whatever'". +GCC_INSTRUMENT = \ + -fsanitize=undefined -fsanitize-address-use-after-scope \ + -fsanitize-undefined-trap-on-error -fstack-protector +# Omit -fanalyzer from GCC_DEBUG_FLAGS, as it makes GCC too slow. +GCC_DEBUG_FLAGS = -DGCC_LINT -g3 -O3 -fno-common \ + $(GCC_INSTRUMENT) \ + -Wall -Wextra \ + -Walloc-size-larger-than=100000 -Warray-bounds=2 \ + -Wbad-function-cast -Wbidi-chars=any,ucn -Wcast-align=strict -Wdate-time \ + -Wdeclaration-after-statement -Wdouble-promotion \ + -Wduplicated-branches -Wduplicated-cond \ + -Wformat=2 -Wformat-overflow=2 -Wformat-signedness -Wformat-truncation \ + -Wimplicit-fallthrough=5 -Winit-self -Wlogical-op \ + -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ + -Wnull-dereference \ + -Wold-style-definition -Woverlength-strings -Wpointer-arith \ + -Wshadow -Wshift-overflow=2 -Wstrict-overflow \ + -Wstrict-prototypes -Wstringop-overflow=4 \ + -Wstringop-truncation -Wsuggest-attribute=cold \ + -Wsuggest-attribute=const -Wsuggest-attribute=format \ + -Wsuggest-attribute=malloc \ + -Wsuggest-attribute=noreturn -Wsuggest-attribute=pure \ + -Wtrampolines -Wundef -Wuninitialized -Wunused-macros -Wuse-after-free=3 \ + -Wvariadic-macros -Wvla -Wwrite-strings \ + -Wno-address -Wno-format-nonliteral -Wno-sign-compare \ + -Wno-type-limits +# +# If your system has a "GMT offset" field in its "struct tm"s +# (or if you decide to add such a field in your system's "time.h" file), +# add the name to a define such as +# -DTM_GMTOFF=tm_gmtoff +# to the end of the "CFLAGS=" line. If not defined, the code attempts to +# guess TM_GMTOFF from other macros; define NO_TM_GMTOFF to suppress this. +# Similarly, if your system has a "zone abbreviation" field, define +# -DTM_ZONE=tm_zone +# and define NO_TM_ZONE to suppress any guessing. Although these two fields +# not required by POSIX, a future version of POSIX is planned to require them +# and they are widely available on GNU/Linux and BSD systems. +# +# The next batch of options control support for external variables +# exported by tzcode. In practice these variables are less useful +# than TM_GMTOFF and TM_ZONE. However, most of them are standardized. +# # +# # To omit or support the external variable "tzname", add one of: +# # -DHAVE_TZNAME=0 # do not support "tzname" +# # -DHAVE_TZNAME=1 # support "tzname", which is defined by system library +# # -DHAVE_TZNAME=2 # support and define "tzname" +# # to the "CFLAGS=" line. "tzname" is required by POSIX 1988 and later. +# # If not defined, the code attempts to guess HAVE_TZNAME from other macros. +# # Warning: unless time_tz is also defined, HAVE_TZNAME=1 can cause +# # crashes when combined with some platforms' standard libraries, +# # presumably due to memory allocation issues. +# # +# # To omit or support the external variables "timezone" and "daylight", add +# # -DUSG_COMPAT=0 # do not support +# # -DUSG_COMPAT=1 # support, and variables are defined by system library +# # -DUSG_COMPAT=2 # support and define variables +# # to the "CFLAGS=" line; "timezone" and "daylight" are inspired by +# # Unix Systems Group code and are required by POSIX 2008 (with XSI) and later. +# # If not defined, the code attempts to guess USG_COMPAT from other macros. +# # +# # To support the external variable "altzone", add +# # -DALTZONE=0 # do not support +# # -DALTZONE=1 # support "altzone", which is defined by system library +# # -DALTZONE=2 # support and define "altzone" +# # to the end of the "CFLAGS=" line; although "altzone" appeared in +# # System V Release 3.1 it has not been standardized. +# # If not defined, the code attempts to guess ALTZONE from other macros. +# +# If you want functions that were inspired by early versions of X3J11's work, +# add +# -DSTD_INSPIRED +# to the end of the "CFLAGS=" line. This arranges for the following +# functions to be added to the time conversion library. +# "offtime" is like "gmtime" except that it accepts a second (long) argument +# that gives an offset to add to the time_t when converting it. +# "timelocal" is equivalent to "mktime". +# "timeoff" is like "timegm" except that it accepts a second (long) argument +# that gives an offset to use when converting to a time_t. +# "posix2time" and "time2posix" are described in an included manual page. +# X3J11's work does not describe any of these functions. +# These functions may well disappear in future releases of the time +# conversion package. +# +# If you don't want functions that were inspired by NetBSD, add +# -DNETBSD_INSPIRED=0 +# to the end of the "CFLAGS=" line. Otherwise, the functions +# "localtime_rz", "mktime_z", "tzalloc", and "tzfree" are added to the +# time library, and if STD_INSPIRED is also defined to nonzero the functions +# "posix2time_z" and "time2posix_z" are added as well. +# The functions ending in "_z" (or "_rz") are like their unsuffixed +# (or suffixed-by-"_r") counterparts, except with an extra first +# argument of opaque type timezone_t that specifies the timezone. +# "tzalloc" allocates a timezone_t value, and "tzfree" frees it. +# +# If you want to allocate state structures in localtime, add +# -DALL_STATE +# to the end of the "CFLAGS=" line. Storage is obtained by calling malloc. +# +# NIST-PCTS:151-2, Version 1.4, (1993-12-03) is a test suite put +# out by the National Institute of Standards and Technology +# which claims to test C and Posix conformance. If you want to pass PCTS, add +# -DPCTS +# to the end of the "CFLAGS=" line. +# +# If you want strict compliance with XPG4 as of 1994-04-09, add +# -DXPG4_1994_04_09 +# to the end of the "CFLAGS=" line. This causes "strftime" to always return +# 53 as a week number (rather than 52 or 53) for January days before +# January's first Monday when a "%V" format is used and January 1 +# falls on a Friday, Saturday, or Sunday. + +CFLAGS= + +# Linker flags. Default to $(LFLAGS) for backwards compatibility +# to release 2012h and earlier. + +LDFLAGS= $(LFLAGS) + +# For leap seconds, this Makefile uses LEAPSECONDS='-L leapseconds' in +# submake command lines. The default is no leap seconds. + +LEAPSECONDS= + +# The zic command and its arguments. + +zic= ./zic +ZIC= $(zic) $(ZFLAGS) + +# To shrink the size of installed TZif files, +# append "-r @N" to omit data before N-seconds-after-the-Epoch. +# To grow the files and work around bugs in older applications, +# possibly at the expense of introducing bugs in newer ones, +# append "-b fat"; see ZIC_BLOAT_DEFAULT above. +# See the zic man page for more about -b and -r. +ZFLAGS= + +# How to use zic to install TZif files. + +ZIC_INSTALL= $(ZIC) -d '$(DESTDIR)$(TZDIR)' $(LEAPSECONDS) + +# The name of a Posix-compliant 'awk' on your system. +# mawk 1.3.3 and Solaris 10 /usr/bin/awk do not work. +# Also, it is better (though not essential) if 'awk' supports UTF-8, +# and unfortunately mawk and busybox awk do not support UTF-8. +# Try AWK=gawk or AWK=nawk if your awk has the abovementioned problems. +AWK= awk + +# The full path name of a Posix-compliant shell, preferably one that supports +# the Korn shell's 'select' statement as an extension. +# These days, Bash is the most popular. +# It should be OK to set this to /bin/sh, on platforms where /bin/sh +# lacks 'select' or doesn't completely conform to Posix, but /bin/bash +# is typically nicer if it works. +KSHELL= /bin/bash + +# Name of curl , used for HTML validation. +CURL= curl + +# Name of GNU Privacy Guard , used to sign distributions. +GPG= gpg + +# This expensive test requires USE_LTZ. +# To suppress it, define this macro to be empty. +CHECK_TIME_T_ALTERNATIVES = check_time_t_alternatives + +# SAFE_CHAR is a regular expression that matches a safe character. +# Some parts of this distribution are limited to safe characters; +# others can use any UTF-8 character. +# For now, the safe characters are a safe subset of ASCII. +# The caller must set the shell variable 'sharp' to the character '#', +# since Makefile macros cannot contain '#'. +# TAB_CHAR is a single tab character, in single quotes. +TAB_CHAR= ' ' +SAFE_CHARSET1= $(TAB_CHAR)' !\"'$$sharp'$$%&'\''()*+,./0123456789:;<=>?@' +SAFE_CHARSET2= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\^_`' +SAFE_CHARSET3= 'abcdefghijklmnopqrstuvwxyz{|}~' +SAFE_CHARSET= $(SAFE_CHARSET1)$(SAFE_CHARSET2)$(SAFE_CHARSET3) +SAFE_CHAR= '[]'$(SAFE_CHARSET)'-]' + +# These non-alphabetic, non-ASCII printable characters are Latin-1, +# and so are likely displayable even in editors like XEmacs 21 +# that have limited display capabilities. +UNUSUAL_OK_LATIN_1 = ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷ +# Non-ASCII non-letters that OK_CHAR allows, as these characters are +# useful in commentary. +UNUSUAL_OK_CHARSET= $(UNUSUAL_OK_LATIN_1) + +# Put this in a bracket expression to match spaces. +s = [:space:] + +# OK_CHAR matches any character allowed in the distributed files. +# This is the same as SAFE_CHAR, except that UNUSUAL_OK_CHARSET and +# multibyte letters are also allowed so that commentary can contain a +# few safe symbols and people's names and can quote non-English sources. +# Other non-letters are limited to ASCII renderings for the +# convenience of maintainers using XEmacs 21.5.34, which by default +# mishandles Unicode characters U+0100 and greater. +OK_CHAR= '[][:alpha:]$(UNUSUAL_OK_CHARSET)'$(SAFE_CHARSET)'-]' + +# SAFE_LINE matches a line of safe characters. +# SAFE_SHARP_LINE is similar, except any OK character can follow '#'; +# this is so that comments can contain non-ASCII characters. +# OK_LINE matches a line of OK characters. +SAFE_LINE= '^'$(SAFE_CHAR)'*$$' +SAFE_SHARP_LINE='^'$(SAFE_CHAR)'*('$$sharp$(OK_CHAR)'*)?$$' +OK_LINE= '^'$(OK_CHAR)'*$$' + +# Flags to give 'tar' when making a distribution. +# Try to use flags appropriate for GNU tar. +GNUTARFLAGS= --format=pax --pax-option='delete=atime,delete=ctime' \ + --numeric-owner --owner=0 --group=0 \ + --mode=go+u,go-w --sort=name +TARFLAGS= `if tar $(GNUTARFLAGS) --version >/dev/null 2>&1; \ + then echo $(GNUTARFLAGS); \ + else :; \ + fi` + +# Flags to give 'gzip' when making a distribution. +GZIPFLAGS= -9n + +# When comparing .tzs files, use GNU diff's -F'^TZ=' option if supported. +# This makes it easier to see which Zone has been affected. +DIFF_TZS= diff -u$$(! diff -u -F'^TZ=' - - <>/dev/null >&0 2>&1 \ + || echo ' -F^TZ=') + +############################################################################### + +#MAKE= make + +cc= cc +CC= $(cc) -DTZDIR='"$(TZDIR)"' + +AR= ar + +# ':' on typical hosts; 'ranlib' on the ancient hosts that still need ranlib. +RANLIB= : + +TZCOBJS= zic.o +TZDOBJS= zdump.o localtime.o asctime.o strftime.o +DATEOBJS= date.o localtime.o strftime.o asctime.o +LIBSRCS= localtime.c asctime.c difftime.c strftime.c +LIBOBJS= localtime.o asctime.o difftime.o strftime.o +HEADERS= tzfile.h private.h +NONLIBSRCS= zic.c zdump.c +NEWUCBSRCS= date.c +SOURCES= $(HEADERS) $(LIBSRCS) $(NONLIBSRCS) $(NEWUCBSRCS) \ + tzselect.ksh workman.sh +MANS= newctime.3 newstrftime.3 newtzset.3 time2posix.3 \ + tzfile.5 tzselect.8 zic.8 zdump.8 +MANTXTS= newctime.3.txt newstrftime.3.txt newtzset.3.txt \ + time2posix.3.txt \ + tzfile.5.txt tzselect.8.txt zic.8.txt zdump.8.txt \ + date.1.txt +COMMON= calendars CONTRIBUTING LICENSE Makefile \ + NEWS README SECURITY theory.html version +WEB_PAGES= tz-art.html tz-how-to.html tz-link.html +CHECK_WEB_PAGES=check_theory.html check_tz-art.html \ + check_tz-how-to.html check_tz-link.html +DOCS= $(MANS) date.1 $(MANTXTS) $(WEB_PAGES) +PRIMARY_YDATA= africa antarctica asia australasia \ + europe northamerica southamerica +YDATA= $(PRIMARY_YDATA) etcetera +NDATA= factory +TDATA_TO_CHECK= $(YDATA) $(NDATA) backward +TDATA= $(YDATA) $(NDATA) $(BACKWARD) +ZONETABLES= zone1970.tab zone.tab +TABDATA= iso3166.tab $(TZDATA_TEXT) $(ZONETABLES) +LEAP_DEPS= leapseconds.awk leap-seconds.list +TZDATA_ZI_DEPS= ziguard.awk zishrink.awk version $(TDATA) \ + $(PACKRATDATA) $(PACKRATLIST) +DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) $(PACKRATDATA) $(PACKRATLIST) +DATA= $(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \ + leapseconds $(ZONETABLES) +AWK_SCRIPTS= checklinks.awk checktab.awk leapseconds.awk \ + ziguard.awk zishrink.awk +MISC= $(AWK_SCRIPTS) +TZS_YEAR= 2050 +TZS_CUTOFF_FLAG= -c $(TZS_YEAR) +TZS= to$(TZS_YEAR).tzs +TZS_NEW= to$(TZS_YEAR)new.tzs +TZS_DEPS= $(YDATA) asctime.c localtime.c \ + private.h tzfile.h zdump.c zic.c +TZDATA_DIST = $(COMMON) $(DATA) $(MISC) +# EIGHT_YARDS is just a yard short of the whole ENCHILADA. +EIGHT_YARDS = $(TZDATA_DIST) $(DOCS) $(SOURCES) tzdata.zi +ENCHILADA = $(EIGHT_YARDS) $(TZS) + +# Consult these files when deciding whether to rebuild the 'version' file. +# This list is not the same as the output of 'git ls-files', since +# .gitignore is not distributed. +VERSION_DEPS= \ + calendars CONTRIBUTING LICENSE Makefile NEWS README SECURITY \ + africa antarctica asctime.c asia australasia \ + backward backzone \ + checklinks.awk checktab.awk \ + date.1 date.c difftime.c \ + etcetera europe factory iso3166.tab \ + leap-seconds.list leapseconds.awk localtime.c \ + newctime.3 newstrftime.3 newtzset.3 northamerica \ + private.h southamerica strftime.c theory.html \ + time2posix.3 tz-art.html tz-how-to.html tz-link.html \ + tzfile.5 tzfile.h tzselect.8 tzselect.ksh \ + workman.sh zdump.8 zdump.c zic.8 zic.c \ + ziguard.awk zishrink.awk \ + zone.tab zone1970.tab + +# And for the benefit of csh users on systems that assume the user +# shell should be used to handle commands in Makefiles. . . + +SHELL= /bin/sh + +# The following definitions and targets are added to build for libkit + +.PHONY: test + +CFLAGS += -DRESERVE_STD_EXT_IDS=1 -fPIC # Build with the tz_ prefix on global function names + +%.o: %.c # Must restore the default rule because they are disabled by mak + $(CC) $(CFLAGS) -c -g $< -o $@ + +release: libtz.a + @mkdir -p build-linux-64-release + @cp -p libtz.a build-linux-64-release/tzcode.a + +debug: libtz.a + @mkdir -p build-linux-64-debug + @cp -p libtz.a build-linux-64-debug/tzcode.a + +checked: libtz.a + @mkdir -p build-linux-64-checked + @cp -p libtz.a build-linux-64-checked/tzcode.a + +coverage: libtz.a + @mkdir -p build-linux-64-release-coverage + @cp -p libtz.a build-linux-64-release-coverage/tzcode.a + +test: libtz.a test/test-tzcode.c + @mkdir -p build-linux-64-release + $(CC) -I. test/test-tzcode.c libtz.a -ltap -o build-linux-64-release/test-tzcode + TZ=":America/Vancouver" build-linux-64-release/test-tzcode + +# End of additions to build for libkit + +all: tzselect zic zdump libtz.a $(TABDATA) \ + vanguard.zi main.zi rearguard.zi + +ALL: all date $(ENCHILADA) + +install: all $(DATA) $(REDO) $(MANS) + mkdir -p '$(DESTDIR)$(BINDIR)' \ + '$(DESTDIR)$(ZDUMPDIR)' '$(DESTDIR)$(ZICDIR)' \ + '$(DESTDIR)$(LIBDIR)' \ + '$(DESTDIR)$(MANDIR)/man3' '$(DESTDIR)$(MANDIR)/man5' \ + '$(DESTDIR)$(MANDIR)/man8' + $(ZIC_INSTALL) -l $(LOCALTIME) \ + `case '$(POSIXRULES)' in ?*) echo '-p';; esac \ + ` $(POSIXRULES) \ + -t '$(DESTDIR)$(TZDEFAULT)' + cp -f $(TABDATA) '$(DESTDIR)$(TZDIR)/.' + cp tzselect '$(DESTDIR)$(BINDIR)/.' + cp zdump '$(DESTDIR)$(ZDUMPDIR)/.' + cp zic '$(DESTDIR)$(ZICDIR)/.' + cp libtz.a '$(DESTDIR)$(LIBDIR)/.' + $(RANLIB) '$(DESTDIR)$(LIBDIR)/libtz.a' + cp -f newctime.3 newtzset.3 '$(DESTDIR)$(MANDIR)/man3/.' + cp -f tzfile.5 '$(DESTDIR)$(MANDIR)/man5/.' + cp -f tzselect.8 zdump.8 zic.8 '$(DESTDIR)$(MANDIR)/man8/.' + +INSTALL: ALL install date.1 + mkdir -p '$(DESTDIR)$(BINDIR)' '$(DESTDIR)$(MANDIR)/man1' + cp date '$(DESTDIR)$(BINDIR)/.' + cp -f date.1 '$(DESTDIR)$(MANDIR)/man1/.' + +# Calculate version number from git, if available. +# Otherwise, use $(VERSION) unless it is "unknown" and there is already +# a 'version' file, in which case reuse the existing 'version' contents +# and append "-dirty" if the contents do not already end in "-dirty". +version: $(VERSION_DEPS) + { (type git) >/dev/null 2>&1 && \ + V=`git describe --match '[0-9][0-9][0-9][0-9][a-z]*' \ + --abbrev=7 --dirty` || \ + if test '$(VERSION)' = unknown && V=`cat $@`; then \ + case $$V in *-dirty);; *) V=$$V-dirty;; esac; \ + else \ + V='$(VERSION)'; \ + fi; } && \ + printf '%s\n' "$$V" >$@.out + mv $@.out $@ + +# These files can be tailored by setting BACKWARD, PACKRATDATA, PACKRATLIST. +vanguard.zi main.zi rearguard.zi: $(DSTDATA_ZI_DEPS) + $(AWK) \ + -v DATAFORM=`expr $@ : '\(.*\).zi'` \ + -v PACKRATDATA='$(PACKRATDATA)' \ + -v PACKRATLIST='$(PACKRATLIST)' \ + -f ziguard.awk \ + $(TDATA) $(PACKRATDATA) >$@.out + mv $@.out $@ +# This file has a version comment that attempts to capture any tailoring +# via BACKWARD, DATAFORM, PACKRATDATA, PACKRATLIST, and REDO. +tzdata.zi: $(DATAFORM).zi version zishrink.awk + version=`sed 1q version` && \ + LC_ALL=C $(AWK) \ + -v dataform='$(DATAFORM)' \ + -v deps='$(DSTDATA_ZI_DEPS) zishrink.awk' \ + -v redo='$(REDO)' \ + -v version="$$version" \ + -f zishrink.awk \ + $(DATAFORM).zi >$@.out + mv $@.out $@ + +version.h: version + VERSION=`cat version` && printf '%s\n' \ + 'static char const PKGVERSION[]="($(PACKAGE)) ";' \ + "static char const TZVERSION[]=\"$$VERSION\";" \ + 'static char const REPORT_BUGS_TO[]="$(BUGEMAIL)";' \ + >$@.out + mv $@.out $@ + +zdump: $(TZDOBJS) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(TZDOBJS) $(LDLIBS) + +zic: $(TZCOBJS) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(TZCOBJS) $(LDLIBS) + +leapseconds: $(LEAP_DEPS) + $(AWK) -v EXPIRES_LINE=$(EXPIRES_LINE) \ + -f leapseconds.awk leap-seconds.list >$@.out + mv $@.out $@ + +# Arguments to pass to submakes of install_data. +# They can be overridden by later submake arguments. +INSTALLARGS = \ + BACKWARD='$(BACKWARD)' \ + DESTDIR='$(DESTDIR)' \ + LEAPSECONDS='$(LEAPSECONDS)' \ + PACKRATDATA='$(PACKRATDATA)' \ + PACKRATLIST='$(PACKRATLIST)' \ + TZDEFAULT='$(TZDEFAULT)' \ + TZDIR='$(TZDIR)' \ + ZIC='$(ZIC)' + +INSTALL_DATA_DEPS = zic leapseconds tzdata.zi + +# 'make install_data' installs one set of TZif files. +install_data: $(INSTALL_DATA_DEPS) + $(ZIC_INSTALL) tzdata.zi + +posix_only: $(INSTALL_DATA_DEPS) + $(MAKE) $(INSTALLARGS) LEAPSECONDS= install_data + +right_only: $(INSTALL_DATA_DEPS) + $(MAKE) $(INSTALLARGS) LEAPSECONDS='-L leapseconds' \ + install_data + +# In earlier versions of this makefile, the other two directories were +# subdirectories of $(TZDIR). However, this led to configuration errors. +# For example, with posix_right under the earlier scheme, +# TZ='right/Australia/Adelaide' got you localtime with leap seconds, +# but gmtime without leap seconds, which led to problems with applications +# like sendmail that subtract gmtime from localtime. +# Therefore, the other two directories are now siblings of $(TZDIR). +# You must replace all of $(TZDIR) to switch from not using leap seconds +# to using them, or vice versa. +right_posix: right_only + rm -fr '$(DESTDIR)$(TZDIR)-leaps' + ln -s '$(TZDIR_BASENAME)' '$(DESTDIR)$(TZDIR)-leaps' || \ + $(MAKE) $(INSTALLARGS) TZDIR='$(TZDIR)-leaps' right_only + $(MAKE) $(INSTALLARGS) TZDIR='$(TZDIR)-posix' posix_only + +posix_right: posix_only + rm -fr '$(DESTDIR)$(TZDIR)-posix' + ln -s '$(TZDIR_BASENAME)' '$(DESTDIR)$(TZDIR)-posix' || \ + $(MAKE) $(INSTALLARGS) TZDIR='$(TZDIR)-posix' posix_only + $(MAKE) $(INSTALLARGS) TZDIR='$(TZDIR)-leaps' right_only + +zones: $(REDO) + +# dummy.zd is not a real file; it is mentioned here only so that the +# top-level 'make' does not have a syntax error. +ZDS = dummy.zd +# Rule used only by submakes invoked by the $(TZS_NEW) rule. +# It is separate so that GNU 'make -j' can run instances in parallel. +$(ZDS): zdump + ./zdump -i $(TZS_CUTOFF_FLAG) '$(wd)/'$$(expr $@ : '\(.*\).zd') \ + >$@ + +TZS_NEW_DEPS = tzdata.zi zdump zic +$(TZS_NEW): $(TZS_NEW_DEPS) + rm -fr tzs$(TZS_YEAR).dir + mkdir tzs$(TZS_YEAR).dir + $(zic) -d tzs$(TZS_YEAR).dir tzdata.zi + $(AWK) '/^L/{print "Link\t" $$2 "\t" $$3}' \ + tzdata.zi | LC_ALL=C sort >$@.out + wd=`pwd` && \ + x=`$(AWK) '/^Z/{print "tzs$(TZS_YEAR).dir/" $$2 ".zd"}' \ + tzdata.zi \ + | LC_ALL=C sort -t . -k 2,2` && \ + set x $$x && \ + shift && \ + ZDS=$$* && \ + $(MAKE) wd="$$wd" TZS_CUTOFF_FLAG="$(TZS_CUTOFF_FLAG)" \ + ZDS="$$ZDS" $$ZDS && \ + sed 's,^TZ=".*\.dir/,TZ=",' $$ZDS >>$@.out + rm -fr tzs$(TZS_YEAR).dir + mv $@.out $@ + +# If $(TZS) exists but 'make check_tzs' fails, a maintainer should inspect the +# failed output and fix the inconsistency, perhaps by running 'make force_tzs'. +$(TZS): + touch $@ + +force_tzs: $(TZS_NEW) + cp $(TZS_NEW) $(TZS) + +libtz.a: $(LIBOBJS) + rm -f $@ + $(AR) -rc $@ $(LIBOBJS) + $(RANLIB) $@ + +date: $(DATEOBJS) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(DATEOBJS) $(LDLIBS) + +tzselect: tzselect.ksh version + VERSION=`cat version` && sed \ + -e 's|#!/bin/bash|#!$(KSHELL)|g' \ + -e 's|AWK=[^}]*|AWK='\''$(AWK)'\''|g' \ + -e 's|\(PKGVERSION\)=.*|\1='\''($(PACKAGE)) '\''|' \ + -e 's|\(REPORT_BUGS_TO\)=.*|\1=$(BUGEMAIL)|' \ + -e 's|TZDIR=[^}]*|TZDIR=$(TZDIR)|' \ + -e 's|\(TZVERSION\)=.*|\1='"$$VERSION"'|' \ + <$@.ksh >$@.out + chmod +x $@.out + mv $@.out $@ + +check: check_back check_mild +check_mild: check_character_set check_white_space check_links \ + check_name_lengths check_slashed_abbrs check_sorted \ + check_tables check_web check_ziguard check_zishrink check_tzs + +check_character_set: $(ENCHILADA) + test ! '$(UTF8_LOCALE)' || \ + ! printf 'A\304\200B\n' | \ + LC_ALL='$(UTF8_LOCALE)' grep -q '^A.B$$' >/dev/null 2>&1 || { \ + LC_ALL='$(UTF8_LOCALE)' && export LC_ALL && \ + sharp='#' && \ + ! grep -Env $(SAFE_LINE) $(MANS) date.1 $(MANTXTS) \ + $(MISC) $(SOURCES) $(WEB_PAGES) \ + CONTRIBUTING LICENSE README SECURITY \ + version tzdata.zi && \ + ! grep -Env $(SAFE_LINE)'|^UNUSUAL_OK_'$(OK_CHAR)'*$$' \ + Makefile && \ + ! grep -Env $(SAFE_SHARP_LINE) $(TDATA_TO_CHECK) backzone \ + leapseconds zone.tab && \ + ! grep -Env $(OK_LINE) $(ENCHILADA); \ + } + touch $@ + +check_white_space: $(ENCHILADA) + patfmt=' \t|[\f\r\v]' && pat=`printf "$$patfmt\\n"` && \ + ! grep -En "$$pat" \ + $$(ls $(ENCHILADA) | grep -Fvx leap-seconds.list) + ! grep -n '[$s]$$' \ + $$(ls $(ENCHILADA) | grep -Fvx leap-seconds.list) + touch $@ + +PRECEDES_FILE_NAME = ^(Zone|Link[$s]+[^$s]+)[$s]+ +FILE_NAME_COMPONENT_TOO_LONG = $(PRECEDES_FILE_NAME)[^$s]*[^/$s]{15} + +check_name_lengths: $(TDATA_TO_CHECK) backzone + ! grep -En '$(FILE_NAME_COMPONENT_TOO_LONG)' \ + $(TDATA_TO_CHECK) backzone + touch $@ + +PRECEDES_STDOFF = ^(Zone[$s]+[^$s]+)?[$s]+ +STDOFF = [-+]?[0-9:.]+ +RULELESS_SAVE = (-|$(STDOFF)[sd]?) +RULELESS_SLASHED_ABBRS = \ + $(PRECEDES_STDOFF)$(STDOFF)[$s]+$(RULELESS_SAVE)[$s]+[^$s]*/ + +check_slashed_abbrs: $(TDATA_TO_CHECK) + ! grep -En '$(RULELESS_SLASHED_ABBRS)' $(TDATA_TO_CHECK) + touch $@ + +CHECK_CC_LIST = { n = split($$1,a,/,/); for (i=2; i<=n; i++) print a[1], a[i]; } + +check_sorted: backward backzone + $(AWK) '/^Link/ {printf "%.5d %s\n", g, $$3} !/./ {g++}' \ + backward | LC_ALL=C sort -cu + $(AWK) '/^Zone/ {print $$2}' backzone | LC_ALL=C sort -cu + touch $@ + +check_back: checklinks.awk $(TDATA_TO_CHECK) + $(AWK) \ + -v DATAFORM=$(DATAFORM) \ + -v backcheck=backward \ + -f checklinks.awk $(TDATA_TO_CHECK) + touch $@ + +check_links: checklinks.awk tzdata.zi + $(AWK) \ + -v DATAFORM=$(DATAFORM) \ + -f checklinks.awk tzdata.zi + touch $@ + +check_tables: checktab.awk $(YDATA) backward $(ZONETABLES) + for tab in $(ZONETABLES); do \ + test "$$tab" = zone.tab && links='$(BACKWARD)' || links=''; \ + $(AWK) -f checktab.awk -v zone_table=$$tab $(YDATA) $$links \ + || exit; \ + done + touch $@ + +check_tzs: $(TZS) $(TZS_NEW) + if test -s $(TZS); then \ + $(DIFF_TZS) $(TZS) $(TZS_NEW); \ + else \ + cp $(TZS_NEW) $(TZS); \ + fi + touch $@ + +check_web: $(CHECK_WEB_PAGES) +check_theory.html: theory.html +check_tz-art.html: tz-art.html +check_tz-how-to.html: tz-how-to.html +check_tz-link.html: tz-link.html +check_theory.html check_tz-art.html check_tz-how-to.html check_tz-link.html: + $(CURL) -sS --url https://validator.w3.org/nu/ -F out=gnu \ + -F file=@$$(expr $@ : 'check_\(.*\)') -o $@.out && \ + test ! -s $@.out || { cat $@.out; exit 1; } + mv $@.out $@ + +check_ziguard: rearguard.zi vanguard.zi ziguard.awk + $(AWK) -v DATAFORM=rearguard -f ziguard.awk vanguard.zi | \ + diff -u rearguard.zi - + $(AWK) -v DATAFORM=vanguard -f ziguard.awk rearguard.zi | \ + diff -u vanguard.zi - + touch $@ + +# Check that zishrink.awk does not alter the data, and that ziguard.awk +# preserves main-format data. +check_zishrink: check_zishrink_posix check_zishrink_right +check_zishrink_posix check_zishrink_right: \ + zic leapseconds $(PACKRATDATA) $(PACKRATLIST) \ + $(TDATA) $(DATAFORM).zi tzdata.zi + rm -fr $@.dir $@-t.dir $@-shrunk.dir + mkdir $@.dir $@-t.dir $@-shrunk.dir + case $@ in \ + *_right) leap='-L leapseconds';; \ + *) leap=;; \ + esac && \ + $(ZIC) $$leap -d $@.dir $(DATAFORM).zi && \ + $(ZIC) $$leap -d $@-shrunk.dir tzdata.zi && \ + case $(DATAFORM),$(PACKRATLIST) in \ + main,) \ + $(ZIC) $$leap -d $@-t.dir $(TDATA) && \ + $(AWK) '/^Rule/' $(TDATA) | \ + $(ZIC) $$leap -d $@-t.dir - $(PACKRATDATA) && \ + diff -r $@.dir $@-t.dir;; \ + esac + diff -r $@.dir $@-shrunk.dir + rm -fr $@.dir $@-t.dir $@-shrunk.dir + touch $@ + +clean_misc: + rm -fr check_*.dir + rm -f *.o *.out $(TIME_T_ALTERNATIVES) \ + check_* core typecheck_* \ + date tzselect version.h zdump zic libtz.a +clean: clean_misc + rm -fr *.dir tzdb-*/ + rm -f *.zi $(TZS_NEW) + rm -rf build-linux-64-release build-linux-64-debug build-linux-64-release-coverage + +maintainer-clean: clean + @echo 'This command is intended for maintainers to use; it' + @echo 'deletes files that may need special tools to rebuild.' + rm -f leapseconds version $(MANTXTS) $(TZS) *.asc *.tar.* + +names: + @echo $(ENCHILADA) + +public: check check_public $(CHECK_TIME_T_ALTERNATIVES) \ + tarballs signatures + +date.1.txt: date.1 +newctime.3.txt: newctime.3 +newstrftime.3.txt: newstrftime.3 +newtzset.3.txt: newtzset.3 +time2posix.3.txt: time2posix.3 +tzfile.5.txt: tzfile.5 +tzselect.8.txt: tzselect.8 +zdump.8.txt: zdump.8 +zic.8.txt: zic.8 + +$(MANTXTS): workman.sh + LC_ALL=C sh workman.sh `expr $@ : '\(.*\)\.txt$$'` >$@.out + mv $@.out $@ + +# Set file timestamps deterministically if possible, +# so that tarballs containing the timestamps are reproducible. +# +# '$(SET_TIMESTAMP_N) N DEST A B C ...' sets the timestamp of the +# file DEST to the maximum of the timestamps of the files A B C ..., +# plus N if GNU ls and touch are available. +SET_TIMESTAMP_N = sh -c '\ + n=$$0 dest=$$1; shift; \ + touch -cmr `ls -t "$$@" | sed 1q` "$$dest" && \ + if test $$n != 0 && \ + lsout=`ls -n --time-style="+%s" "$$dest" 2>/dev/null`; then \ + set x $$lsout && \ + touch -cmd @`expr $$7 + $$n` "$$dest"; \ + else :; fi' +# If DEST depends on A B C ... in this Makefile, callers should use +# $(SET_TIMESTAMP_DEP) DEST A B C ..., for the benefit of any +# downstream 'make' that considers equal timestamps to be out of date. +# POSIX allows this 'make' behavior, and HP-UX 'make' does it. +# If all that matters is that the timestamp be reproducible +# and plausible, use $(SET_TIMESTAMP). +SET_TIMESTAMP = $(SET_TIMESTAMP_N) 0 +SET_TIMESTAMP_DEP = $(SET_TIMESTAMP_N) 1 + +# Set the timestamps to those of the git repository, if available, +# and if the files have not changed since then. +# This uses GNU 'ls --time-style=+%s', which outputs the seconds count, +# and GNU 'touch -d@N FILE', where N is the number of seconds since 1970. +# If git or GNU is absent, don't bother to sync with git timestamps. +# Also, set the timestamp of each prebuilt file like 'leapseconds' +# to be the maximum of the files it depends on. +set-timestamps.out: $(EIGHT_YARDS) + rm -f $@ + if (type git) >/dev/null 2>&1 && \ + files=`git ls-files $(EIGHT_YARDS)` && \ + touch -md @1 test.out; then \ + rm -f test.out && \ + for file in $$files; do \ + if git diff --quiet $$file; then \ + time=`git log -1 --format='tformat:%ct' $$file` && \ + touch -cmd @$$time $$file; \ + else \ + echo >&2 "$$file: warning: does not match repository"; \ + fi || exit; \ + done; \ + fi + $(SET_TIMESTAMP_DEP) leapseconds $(LEAP_DEPS) + for file in `ls $(MANTXTS) | sed 's/\.txt$$//'`; do \ + $(SET_TIMESTAMP_DEP) $$file.txt $$file workman.sh || \ + exit; \ + done + $(SET_TIMESTAMP_DEP) version $(VERSION_DEPS) + $(SET_TIMESTAMP_DEP) tzdata.zi $(TZDATA_ZI_DEPS) + touch $@ +set-tzs-timestamp.out: $(TZS) + $(SET_TIMESTAMP_DEP) $(TZS) $(TZS_DEPS) + touch $@ + +# The zics below ensure that each data file can stand on its own. +# We also do an all-files run to catch links to links. + +check_public: $(VERSION_DEPS) + rm -fr public.dir + mkdir public.dir + ln $(VERSION_DEPS) public.dir + cd public.dir && $(MAKE) CFLAGS='$(GCC_DEBUG_FLAGS)' ALL + for i in $(TDATA_TO_CHECK) public.dir/tzdata.zi \ + public.dir/vanguard.zi public.dir/main.zi \ + public.dir/rearguard.zi; \ + do \ + public.dir/zic -v -d public.dir/zoneinfo $$i 2>&1 || exit; \ + done + public.dir/zic -v -d public.dir/zoneinfo-all $(TDATA_TO_CHECK) + : + : Also check 'backzone' syntax. + rm public.dir/main.zi + cd public.dir && $(MAKE) PACKRATDATA=backzone main.zi + public.dir/zic -d public.dir/zoneinfo main.zi + rm public.dir/main.zi + cd public.dir && \ + $(MAKE) PACKRATDATA=backzone PACKRATLIST=zone.tab main.zi + public.dir/zic -d public.dir/zoneinfo main.zi + : + rm -fr public.dir + touch $@ + +# Check that the code works under various alternative +# implementations of time_t. +check_time_t_alternatives: $(TIME_T_ALTERNATIVES) +$(TIME_T_ALTERNATIVES_TAIL): $(TIME_T_ALTERNATIVES_HEAD) +$(TIME_T_ALTERNATIVES): $(VERSION_DEPS) + rm -fr $@.dir + mkdir $@.dir + ln $(VERSION_DEPS) $@.dir + case $@ in \ + int*32_t) range=-2147483648,2147483648;; \ + u*) range=0,4294967296;; \ + *) range=-4294967296,4294967296;; \ + esac && \ + wd=`pwd` && \ + zones=`$(AWK) '/^[^#]/ { print $$3 }' /dev/null; then \ + quiet_option='-q'; \ + else \ + quiet_option=''; \ + fi && \ + diff $$quiet_option -r $(TIME_T_ALTERNATIVES_HEAD).dir/etc \ + $@.dir/etc && \ + diff $$quiet_option -r \ + $(TIME_T_ALTERNATIVES_HEAD).dir/usr/share \ + $@.dir/usr/share; \ + } + touch $@ + +TRADITIONAL_ASC = \ + tzcode$(VERSION).tar.gz.asc \ + tzdata$(VERSION).tar.gz.asc +REARGUARD_ASC = \ + tzdata$(VERSION)-rearguard.tar.gz.asc +ALL_ASC = $(TRADITIONAL_ASC) $(REARGUARD_ASC) \ + tzdb-$(VERSION).tar.lz.asc + +tarballs rearguard_tarballs tailored_tarballs traditional_tarballs \ +signatures rearguard_signatures traditional_signatures: \ + version set-timestamps.out rearguard.zi vanguard.zi + VERSION=`cat version` && \ + $(MAKE) AWK='$(AWK)' VERSION="$$VERSION" $@_version + +# These *_version rules are intended for use if VERSION is set by some +# other means. Ordinarily these rules are used only by the above +# non-_version rules, which set VERSION on the 'make' command line. +tarballs_version: traditional_tarballs_version rearguard_tarballs_version \ + tzdb-$(VERSION).tar.lz +rearguard_tarballs_version: \ + tzdata$(VERSION)-rearguard.tar.gz +traditional_tarballs_version: \ + tzcode$(VERSION).tar.gz tzdata$(VERSION).tar.gz +tailored_tarballs_version: \ + tzdata$(VERSION)-tailored.tar.gz +signatures_version: $(ALL_ASC) +rearguard_signatures_version: $(REARGUARD_ASC) +traditional_signatures_version: $(TRADITIONAL_ASC) + +tzcode$(VERSION).tar.gz: set-timestamps.out + LC_ALL=C && export LC_ALL && \ + tar $(TARFLAGS) -cf - \ + $(COMMON) $(DOCS) $(SOURCES) | \ + gzip $(GZIPFLAGS) >$@.out + mv $@.out $@ + +tzdata$(VERSION).tar.gz: set-timestamps.out + LC_ALL=C && export LC_ALL && \ + tar $(TARFLAGS) -cf - $(TZDATA_DIST) | \ + gzip $(GZIPFLAGS) >$@.out + mv $@.out $@ + +# Create empty files with a reproducible timestamp. +CREATE_EMPTY = TZ=UTC0 touch -mt 202010122253.00 + +# The obsolescent *rearguard* targets and related macros are present +# for backwards compatibility with tz releases 2018e through 2022a. +# They should go away eventually. To build rearguard tarballs you +# can instead use 'make DATAFORM=rearguard tailored_tarballs'. +tzdata$(VERSION)-rearguard.tar.gz: rearguard.zi set-timestamps.out + rm -fr $@.dir + mkdir $@.dir + ln $(TZDATA_DIST) $@.dir + cd $@.dir && rm -f $(TDATA) $(PACKRATDATA) version + for f in $(TDATA) $(PACKRATDATA); do \ + rearf=$@.dir/$$f; \ + $(AWK) -v DATAFORM=rearguard -f ziguard.awk $$f >$$rearf && \ + $(SET_TIMESTAMP_DEP) $$rearf ziguard.awk $$f || exit; \ + done + sed '1s/$$/-rearguard/' $@.dir/version + : The dummy pacificnew pacifies TZUpdater 2.3.1 and earlier. + $(CREATE_EMPTY) $@.dir/pacificnew + touch -cmr version $@.dir/version + LC_ALL=C && export LC_ALL && \ + (cd $@.dir && \ + tar $(TARFLAGS) -cf - \ + $(TZDATA_DIST) pacificnew | \ + gzip $(GZIPFLAGS)) >$@.out + mv $@.out $@ + +# Create a tailored tarball suitable for TZUpdater and compatible tools. +# For example, 'make DATAFORM=vanguard tailored_tarballs' makes a tarball +# useful for testing whether TZUpdater supports vanguard form. +# The generated tarball is not byte-for-byte equivalent to a hand-tailored +# traditional tarball, as data entries are put into 'etcetera' even if they +# came from some other source file. However, the effect should be the same +# for ordinary use, which reads all the source files. +tzdata$(VERSION)-tailored.tar.gz: set-timestamps.out + rm -fr $@.dir + mkdir $@.dir + : The dummy pacificnew pacifies TZUpdater 2.3.1 and earlier. + cd $@.dir && \ + $(CREATE_EMPTY) $(PRIMARY_YDATA) $(NDATA) backward \ + `test $(DATAFORM) = vanguard || echo pacificnew` + (grep '^#' tzdata.zi && echo && cat $(DATAFORM).zi) \ + >$@.dir/etcetera + touch -cmr tzdata.zi $@.dir/etcetera + sed -n \ + -e '/^# *version *\(.*\)/h' \ + -e '/^# *ddeps */H' \ + -e '$$!d' \ + -e 'g' \ + -e 's/^# *version *//' \ + -e 's/\n# *ddeps */-/' \ + -e 's/ /-/g' \ + -e 'p' \ + $@.dir/version + touch -cmr version $@.dir/version + links= && \ + for file in $(TZDATA_DIST); do \ + test -f $@.dir/$$file || links="$$links $$file"; \ + done && \ + ln $$links $@.dir + LC_ALL=C && export LC_ALL && \ + (cd $@.dir && \ + tar $(TARFLAGS) -cf - * | gzip $(GZIPFLAGS)) >$@.out + mv $@.out $@ + +tzdb-$(VERSION).tar.lz: set-timestamps.out set-tzs-timestamp.out + rm -fr tzdb-$(VERSION) + mkdir tzdb-$(VERSION) + ln $(ENCHILADA) tzdb-$(VERSION) + $(SET_TIMESTAMP) tzdb-$(VERSION) tzdb-$(VERSION)/* + LC_ALL=C && export LC_ALL && \ + tar $(TARFLAGS) -cf - tzdb-$(VERSION) | lzip -9 >$@.out + mv $@.out $@ + +tzcode$(VERSION).tar.gz.asc: tzcode$(VERSION).tar.gz +tzdata$(VERSION).tar.gz.asc: tzdata$(VERSION).tar.gz +tzdata$(VERSION)-rearguard.tar.gz.asc: tzdata$(VERSION)-rearguard.tar.gz +tzdb-$(VERSION).tar.lz.asc: tzdb-$(VERSION).tar.lz +$(ALL_ASC): + $(GPG) --armor --detach-sign $? + +TYPECHECK_CFLAGS = $(CFLAGS) -DTYPECHECK -D__time_t_defined -D_TIME_T +typecheck: typecheck_long_long typecheck_unsigned +typecheck_long_long typecheck_unsigned: $(VERSION_DEPS) + rm -fr $@.dir + mkdir $@.dir + ln $(VERSION_DEPS) $@.dir + cd $@.dir && \ + case $@ in \ + *_long_long) i="long long";; \ + *_unsigned ) i="unsigned" ;; \ + esac && \ + typecheck_cflags='' && \ + $(MAKE) \ + CFLAGS="$(TYPECHECK_CFLAGS) \"-Dtime_t=$$i\"" \ + TOPDIR="`pwd`" \ + install + $@.dir/zdump -i -c 1970,1971 Europe/Rome + touch $@ + +zonenames: tzdata.zi + @$(AWK) '/^Z/ { print $$2 } /^L/ { print $$3 }' tzdata.zi + +asctime.o: private.h tzfile.h +date.o: private.h +difftime.o: private.h +localtime.o: private.h tzfile.h +strftime.o: private.h tzfile.h +zdump.o: version.h +zic.o: private.h tzfile.h version.h + +.PHONY: ALL INSTALL all +.PHONY: check check_mild check_time_t_alternatives +.PHONY: check_web check_zishrink +.PHONY: clean clean_misc dummy.zd force_tzs +.PHONY: install install_data maintainer-clean names +.PHONY: posix_only posix_right public +.PHONY: rearguard_signatures rearguard_signatures_version +.PHONY: rearguard_tarballs rearguard_tarballs_version +.PHONY: right_only right_posix signatures signatures_version +.PHONY: tarballs tarballs_version +.PHONY: traditional_signatures traditional_signatures_version +.PHONY: traditional_tarballs traditional_tarballs_version +.PHONY: tailored_tarballs tailored_tarballs_version +.PHONY: typecheck +.PHONY: zonenames zones +.PHONY: $(ZDS) diff --git a/lib-tzcode/NEWS b/lib-tzcode/NEWS new file mode 100644 index 0000000..b54538a --- /dev/null +++ b/lib-tzcode/NEWS @@ -0,0 +1,6013 @@ +News for the tz database + +Release 2023c - 2023-03-28 12:42:14 -0700 + + Changes to past and future timestamps + + Model Lebanon's DST chaos by reverting data to tzdb 2023a. + (Thanks to Rany Hany for the heads-up.) + + +Release 2023b - 2023-03-23 19:50:38 -0700 + + Changes to future timestamps + + This year Lebanon springs forward April 20/21 not March 25/26. + (Thanks to Saadallah Itani.) [This was reverted in 2023c.] + + +Release 2023a - 2023-03-22 12:39:33 -0700 + + Briefly: + Egypt now uses DST again, from April through October. + This year Morocco springs forward April 23, not April 30. + Palestine delays the start of DST this year. + Much of Greenland still uses DST from 2024 on. + America/Yellowknife now links to America/Edmonton. + tzselect can now use current time to help infer timezone. + The code now defaults to C99 or later. + Fix use of C23 attributes. + + Changes to future timestamps + + Starting in 2023, Egypt will observe DST from April's last Friday + through October's last Thursday. (Thanks to Ahmad ElDardiry.) + Assume the transition times are 00:00 and 24:00, respectively. + + In 2023 Morocco's spring-forward transition after Ramadan + will occur April 23, not April 30. (Thanks to Milamber.) + Adjust predictions for future years accordingly. This affects + predictions for 2023, 2031, 2038, and later years. + + This year Palestine will delay its spring forward from + March 25 to April 29 due to Ramadan. (Thanks to Heba Hamad.) + Make guesses for future Ramadans too. + + Much of Greenland, represented by America/Nuuk, will continue to + observe DST using European Union rules. When combined with + Greenland's decision not to change the clocks in fall 2023, + America/Nuuk therefore changes from -03/-02 to -02/-01 effective + 2023-10-29 at 01:00 UTC. (Thanks to Thomas M. Steenholdt.) + This change from 2022g doesn't affect timestamps until 2024-03-30, + and doesn't affect tm_isdst until 2023-03-25. + + Changes to past timestamps + + America/Yellowknife has changed from a Zone to a backward + compatibility Link, as it no longer differs from America/Edmonton + since 1970. (Thanks to Almaz Mingaleev.) This affects some + pre-1948 timestamps. The old data are now in 'backzone'. + + Changes to past time zone abbreviations + + When observing Moscow time, Europe/Kirov and Europe/Volgograd now + use the abbreviations MSK/MSD instead of numeric abbreviations, + for consistency with other timezones observing Moscow time. + + Changes to code + + You can now tell tzselect local time, to simplify later choices. + Select the 'time' option in its first prompt. + + You can now compile with -DTZNAME_MAXIMUM=N to limit time zone + abbreviations to N bytes (default 255). The reference runtime + library now rejects POSIX-style TZ strings that contain longer + abbreviations, treating them as UTC. Previously the limit was + platform dependent and abbreviations were silently truncated to + 16 bytes even when the limit was greater than 16. + + The code by default is now designed for C99 or later. To build in + a C89 environment, compile with -DPORT_TO_C89. To support C89 + callers of the tzcode library, compile with -DSUPPORT_C89. The + two new macros are transitional aids planned to be removed in a + future version, when C99 or later will be required. + + The code now builds again on pre-C99 platforms, if you compile + with -DPORT_TO_C89. This fixes a bug introduced in 2022f. + + On C23-compatible platforms tzcode no longer uses syntax like + 'static [[noreturn]] void usage(void);'. Instead, it uses + '[[noreturn]] static void usage(void);' as strict C23 requires. + (Problem reported by Houge Langley.) + + The code's functions now constrain their arguments with the C + 'restrict' keyword consistently with their documentation. + This may allow future optimizations. + + zdump again builds standalone with ckdadd and without setenv, + fixing a bug introduced in 2022g. (Problem reported by panic.) + + leapseconds.awk can now process a leap seconds file that never + expires; this might be useful if leap seconds are discontinued. + + Changes to commentary + + tz-link.html has a new section "Coordinating with governments and + distributors". (Thanks to Neil Fuller for some of the text.) + + To improve tzselect diagnostics, zone1970.tab's comments column is + now limited to countries that have multiple timezones. + + Note that leap seconds are planned to be discontinued by 2035. + + +Release 2022g - 2022-11-29 08:58:31 -0800 + + Briefly: + The northern edge of Chihuahua changes to US timekeeping. + Much of Greenland stops changing clocks after March 2023. + Fix some pre-1996 timestamps in northern Canada. + C89 is now deprecated; please use C99 or later. + Portability fixes for AIX, libintl, MS-Windows, musl, z/OS + In C code, use more C23 features if available. + C23 timegm now supported by default + Fixes for unlikely integer overflows + + Changes to future timestamps + + In the Mexican state of Chihuahua, the border strip near the US + will change to agree with nearby US locations on 2022-11-30. + The strip's western part, represented by Ciudad Juárez, switches + from -06 all year to -07/-06 with US DST rules, like El Paso, TX. + The eastern part, represented by Ojinaga, will observe US DST next + year, like Presidio, TX. (Thanks to Heitor David Pinto.) + A new Zone America/Ciudad_Juarez splits from America/Ojinaga. + + Much of Greenland, represented by America/Nuuk, stops observing + winter time after March 2023, so its daylight saving time becomes + standard time. (Thanks to Jonas Nyrup and Jürgen Appel.) + + Changes to past timestamps + + Changes for pre-1996 northern Canada (thanks to Chris Walton): + + Merge America/Iqaluit and America/Pangnirtung into the former, + with a backward compatibility link for the latter name. + There is no good evidence the two locations differ since 1970. + This change affects pre-1996 America/Pangnirtung timestamps. + + Cambridge Bay, Inuvik, Iqaluit, Rankin Inlet, Resolute and + Yellowknife did not observe DST in 1965, and did observe DST + from 1972 through 1979. + + Whitehorse moved from -09 to -08 on 1966-02-27, not 1967-05-28. + + Colombia's 1993 fallback was 02-06 24:00, not 04-04 00:00. + (Thanks to Alois Treindl.) + + Singapore's 1981-12-31 change was at 16:00 UTC (23:30 local time), + not 24:00 local time. (Thanks to Geoff Clare via Robert Elz.) + + Changes to code + + Although tzcode still works with C89, bugs found in recent routine + maintenance indicate that bitrot has set in and that in practice + C89 is no longer used to build tzcode. As it is a maintenance + burden, support for C89 is planned to be removed soon. Instead, + please use compilers compatible with C99, C11, C17, or C23. + + timegm, which tzcode implemented in 1989, will finally be + standardized 34 years later as part of C23, so timegm is now + supported even if STD_INSPIRED is not defined. + + Fix bug in zdump's tzalloc emulation on hosts that lack tm_zone. + (Problem reported by Đoàn Trần Công Danh.) + + Fix bug in zic on hosts where malloc(0) yields NULL on success. + (Problem reported by Tim McBrayer for AIX 6.1.) + + Fix zic configuration to avoid linkage failures on some platforms. + (Problems reported by Gilmore Davidson and Igor Ivanov.) + + Work around MS-Windows nmake incompatibility with POSIX. + (Problem reported by Manuela Friedrich.) + + Port mktime and strftime to debugging platforms where accessing + uninitialized data has undefined behavior (strftime problem + reported by Robert Elz). + + Check more carefully for unlikely integer overflows, preferring + C23 to overflow checking by hand, as the latter has + had obscure bugs. + + Changes to build procedure + + New Makefile rule check_mild that skips checking whether Link + lines are in the file 'backward'. (Inspired by a suggestion from + Stephen Colebourne.) + + +Release 2022f - 2022-10-28 18:04:57 -0700 + + Briefly: + Mexico will no longer observe DST except near the US border. + Chihuahua moves to year-round -06 on 2022-10-30. + Fiji no longer observes DST. + Move links to 'backward'. + In vanguard form, GMT is now a Zone and Etc/GMT a link. + zic now supports links to links, and vanguard form uses this. + Simplify four Ontario zones. + Fix a Y2438 bug when reading TZif data. + Enable 64-bit time_t on 32-bit glibc platforms. + Omit large-file support when no longer needed. + In C code, use some C23 features if available. + Remove no-longer-needed workaround for Qt bug 53071. + + Changes to future timestamps + + Mexico will no longer observe DST after 2022, except for areas + near the US border that continue to observe US DST rules. + On 2022-10-30 at 02:00 the Mexican state of Chihuahua moves + from -07 (-06 with DST) to year-round -06, thus not changing + its clocks that day. The new law states that Chihuahua + near the US border no longer observes US DST. + (Thanks to gera for the heads-up about Chihuahua.) + + Fiji will not observe DST in 2022/3. (Thanks to Shalvin Narayan.) + For now, assume DST is suspended indefinitely. + + Changes to data + + Move links to 'backward' to ease and simplify link maintenance. + This affects generated data only if you use 'make BACKWARD='. + + GMT is now a Zone and Etc/GMT a link instead of vice versa, + as GMT is needed for leap second support whereas Etc/GMT is not. + However, this change exposes a bug in TZUpdater 2.3.2 so it is + present only in vanguard form for now. + + Vanguard form now uses links to links, as zic now supports this. + + Changes to past timestamps + + Simplify four Ontario zones, as most of the post-1970 differences + seem to have been imaginary. (Problem reported by Chris Walton.) + Move America/Nipigon, America/Rainy_River, and America/Thunder_Bay + to 'backzone'; backward-compatibility links still work, albeit + with some different timestamps before November 2005. + + Changes to code + + zic now supports links to links regardless of input line order. + For example, if Australia/Sydney is a Zone, the lines + Link Australia/Canberra Australia/ACT + Link Australia/Sydney Australia/Canberra + now work correctly, even though the shell commands + ln Australia/Canberra Australia/ACT + ln Australia/Sydney Australia/Canberra + would fail because the first command attempts to use a link + Australia/Canberra that does not exist until after the second + command is executed. Previously, zic had unspecified behavior if + a Link line's target was another link, and zic often misbehaved if + a Link line's target was a later Link line. + + Fix line number in zic's diagnostic for a link to a link. + + Fix a bug that caused localtime to mishandle timestamps starting + in the year 2438 when reading data generated by 'zic -b fat' when + distant-future DST transitions occur at times given in standard + time or in UT, not the usual case of local time. This occurs when + the corresponding .zi Rule lines specify DST transitions with TO + columns of 'max' and AT columns that end in 's' or 'u'. The + number 2438 comes from the 32-bit limit in the year 2038, plus the + 400-year Gregorian cycle. (Problem reported by Bradley White.) + + On glibc 2.34 and later, which optionally supports 64-bit time_t + on platforms like x86 where time_t was traditionally 32 bits, + default time_t to 64 instead of 32 bits. This lets functions like + localtime support timestamps after the year 2038, and fixes + year-2038 problems in zic when accessing files dated after 2038. + To continue to limit time_t to 32 bits on these platforms, use + "make CFLAGS='-D_TIME_BITS=32'". + + In C code, do not enable large-file support on platforms like AIX + and macOS that no longer need it now that tzcode does not use + off_t or related functions like 'stat'. Large-file support is + still enabled by default on GNU/Linux, as it is needed for 64-bit + time_t support. + + In C code, prefer C23 keywords to pre-C23 macros for alignof, + bool, false, and true. Also, use the following C23 features if + available: __has_include, unreachable. + + zic no longer works around Qt bug 53071, as the relevant Qt + releases have been out of support since 2019. This change affects + only fat TZif files, as thin files never had the workaround. + + zdump no longer modifies the environ vector when compiled on + platforms lacking tm_zone or when compiled with -DUSE_LTZ=0. + This avoid undefined behavior on POSIX platforms. + + +Release 2022e - 2022-10-11 11:13:02 -0700 + + Briefly: + Jordan and Syria switch from +02/+03 with DST to year-round +03. + + Changes to future timestamps + + Jordan and Syria are abandoning the DST regime and are changing to + permanent +03, so they will not fall back from +03 to +02 on + 2022-10-28. (Thanks to Steffen Thorsen and Issam Al-Zuwairi.) + + Changes to past timestamps + + On 1922-01-01 Tijuana adopted standard time at 00:00, not 01:00. + + Changes to past time zone abbreviations and DST flags + + The temporary advancement of clocks in central Mexico in summer + 1931 is now treated as daylight saving time, instead of as two + changes to standard time. + + +Release 2022d - 2022-09-23 12:02:57 -0700 + + Briefly: + Palestine transitions are now Saturdays at 02:00. + Simplify three Ukraine zones into one. + + Changes to future timestamps + + Palestine now springs forward and falls back at 02:00 on the + first Saturday on or after March 24 and October 24, respectively. + This means 2022 falls back 10-29 at 02:00, not 10-28 at 01:00. + (Thanks to Heba Hamad.) + + Changes to past timestamps + + Simplify three Ukraine zones to one, since the post-1970 + differences seem to have been imaginary. Move Europe/Uzhgorod and + Europe/Zaporozhye to 'backzone'; backward-compatibility links + still work, albeit with different timestamps before October 1991. + + +Release 2022c - 2022-08-15 17:47:18 -0700 + + Briefly: + Work around awk bug in FreeBSD, macOS, etc. + Improve tzselect on intercontinental Zones. + + Changes to code + + Work around a bug in onetrueawk that broke commands like + 'make traditional_tarballs' on FreeBSD, macOS, etc. + (Problem reported by Deborah Goldsmith.) + + Add code to tzselect that uses experimental structured comments in + zone1970.tab to clarify whether Zones like Africa/Abidjan and + Europe/Istanbul cross continent or ocean boundaries. + (Inspired by a problem reported by Peter Krefting.) + + Fix bug with 'zic -d /a/b/c' when /a is unwritable but the + directory /a/b already exists. + + Remove zoneinfo2tdf.pl, as it was unused and triggered false + malware alarms on some email servers. + + +Release 2022b - 2022-08-10 15:38:32 -0700 + + Briefly: + Chile's DST is delayed by a week in September 2022. + Iran no longer observes DST after 2022. + Rename Europe/Kiev to Europe/Kyiv. + New zic -R option + Vanguard form now uses %z. + Finish moving duplicate-since-1970 zones to 'backzone'. + New build option PACKRATLIST + New tailored_tarballs target, replacing rearguard_tarballs + + Changes to future timestamps + + Chile's 2022 DST start is delayed from September 4 to September 11. + (Thanks to Juan Correa.) + + Iran plans to stop observing DST permanently, after it falls back + on 2022-09-21. (Thanks to Ali Mirjamali.) + + Changes to past timestamps + + Finish moving to 'backzone' the location-based zones whose + timestamps since 1970 are duplicates; adjust links accordingly. + This change ordinarily affects only pre-1970 timestamps, and with + the new PACKRATLIST option it does not affect any timestamps. + In this round the affected zones are Antarctica/Vostok, + Asia/Brunei, Asia/Kuala_Lumpur, Atlantic/Reykjavik, + Europe/Amsterdam, Europe/Copenhagen, Europe/Luxembourg, + Europe/Monaco, Europe/Oslo, Europe/Stockholm, Indian/Christmas, + Indian/Cocos, Indian/Kerguelen, Indian/Mahe, Indian/Reunion, + Pacific/Chuuk, Pacific/Funafuti, Pacific/Majuro, Pacific/Pohnpei, + Pacific/Wake and Pacific/Wallis, and the affected links are + Arctic/Longyearbyen, Atlantic/Jan_Mayen, Iceland, Pacific/Ponape, + Pacific/Truk, and Pacific/Yap. + + From fall 1994 through fall 1995, Shanks wrote that Crimea's + DST transitions were at 02:00 standard time, not at 00:00. + (Thanks to Michael Deckers.) + + Iran adopted standard time in 1935, not 1946. In 1977 it observed + DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on + 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 + transition was on 05-27, not 03-21. + (Thanks to Roozbeh Pournader and Francis Santoni.) + + Chile's observance of -04 from 1946-08-29 through 1947-03-31 was + considered DST, not standard time. Santiago and environs had moved + their clocks back to rejoin the rest of mainland Chile; put this + change at the end of 1946-08-28. (Thanks to Michael Deckers.) + + Some old, small clock transitions have been removed, as people at + the time did not change their clocks. This affects Asia/Hong_Kong + in 1904, Asia/Ho_Chi_Minh in 1906, and Europe/Dublin in 1880. + + Changes to zone name + + Rename Europe/Kiev to Europe/Kyiv, as "Kyiv" is more common in + English now. Spelling of other names in Ukraine has not yet + demonstrably changed in common English practice so for now these + names retain old spellings, as in other countries (e.g., + Europe/Prague not "Praha", and Europe/Sofia not "Sofiya"). + + Changes to code + + zic has a new option '-R @N' to output explicit transitions < N. + (Need suggested by Almaz Mingaleev.) + + 'zic -r @N' no longer outputs bad data when N < first transition. + (Problem introduced in 2021d and reported by Peter Krefting.) + + zic now checks its input for NUL bytes and unterminated lines, and + now supports input line lengths up to 2048 (not 512) bytes. + + gmtime and related code now use the abbreviation "UTC" not "GMT". + POSIX is being revised to require this. + + When tzset and related functions set vestigial static variables + like tzname, they now prefer specified timestamps to unspecified ones. + (Problem reported by Almaz Mingaleev.) + + zic no longer complains "can't determine time zone abbreviation to + use just after until time" when a transition to a new standard + time occurs simultaneously with the first DST fallback transition. + + Changes to build procedure + + Source data in vanguard form now uses the %z notation, introduced + in release 2015f. For example, for America/Sao_Paulo vanguard + form contains the zone continuation line "-3:00 Brazil %z", which + is simpler and more reliable than the line "-3:00 Brazil -03/-02" + used in main and rearguard forms. The plan is for the main form + to use %z eventually; in the meantime maintainers of zi parsers + are encouraged to test the parsers on vanguard.zi. + + The Makefile has a new PACKRATLIST option to select a subset of + 'backzone'. For example, 'make PACKRATDATA=backzone + PACKRATLIST=zone.tab' now generates TZif files identical to those + of the global-tz project. + + The Makefile has a new tailored_tarballs target for generating + special-purpose tarballs. It generalizes and replaces the + rearguard_tarballs target and related targets and macros, which + are now obsolescent. + + 'make install' now defaults LOCALTIME to Factory not GMT, + which means the default abbreviation is now "-00" not "GMT". + + Remove the posix_packrat target, marked obsolescent in 2016a. + + +Release 2022a - 2022-03-15 23:02:01 -0700 + + Briefly: + Palestine will spring forward on 2022-03-27, not -03-26. + zdump -v now outputs better failure indications. + Bug fixes for code that reads corrupted TZif data. + + Changes to future timestamps + + Palestine will spring forward on 2022-03-27, not 2022-03-26. + (Thanks to Heba Hamad.) Predict future transitions for first + Sunday >= March 25. Additionally, predict fallbacks to be the first + Friday on or after October 23, not October's last Friday, to be more + consistent with recent practice. The first differing fallback + prediction is on 2025-10-24, not 2025-10-31. + + Changes to past timestamps + + From 1992 through spring 1996, Ukraine's DST transitions were at + 02:00 standard time, not at 01:00 UTC. (Thanks to Alois Treindl.) + + Chile's Santiago Mean Time and its LMT precursor have been adjusted + eastward by 1 second to align with past and present law. + + Changes to commentary + + Add several references for Chile's 1946/1947 transitions, some of + which only affected portions of the country. + + Changes to code + + Fix bug when mktime gets confused by truncated TZif files with + unspecified local time. (Problem reported by Almaz Mingaleev.) + + Fix bug when 32-bit time_t code reads malformed 64-bit TZif data. + (Problem reported by Christos Zoulas.) + + When reading a version 2 or later TZif file, the TZif reader now + validates the version 1 header and data block only enough to skip + over them, as recommended by RFC 8536 section 4. Also, the TZif + reader no longer mistakenly attempts to parse a version 1 TZIf + file header as a TZ string. + + zdump -v now outputs "(localtime failed)" and "(gmtime failed)" + when local time and UT cannot be determined for a timestamp. + + Changes to build procedure + + Distribution tarballs now use standard POSIX.1-1988 ustar format + instead of GNU format. Although the formats are almost identical + for these tarballs, ustar headers' magic fields contain "ustar" + instead of "ustar ", and their version fields contain "00" instead + of " ". The two formats are planned to diverge more significantly + for tzdb releases after 2242-03-16 12:56:31 UTC, when the ustar + format becomes obsolete and the tarballs switch to pax format, an + extension of ustar. For details about these formats, please see + "pax - portable archive interchange", IEEE Std 1003.1-2017, + . + + +Release 2021e - 2021-10-21 18:41:00 -0700 + + Changes to future timestamps + + Palestine will fall back 10-29 (not 10-30) at 01:00. + (Thanks to P Chan and Heba Hemad.) + + +Release 2021d - 2021-10-15 13:48:18 -0700 + + Briefly: + Fiji suspends DST for the 2021/2022 season. + 'zic -r' marks unspecified timestamps with "-00". + + Changes to future timestamps + + Fiji will suspend observance of DST for the 2021/2022 season. + Assume for now that it will return next year. (Thanks to Jashneel + Kumar and P Chan.) + + Changes to code + + 'zic -r' now uses "-00" time zone abbreviations for intervals + with UT offsets that are unspecified due to -r truncation. + This implements a change in draft Internet RFC 8536bis. + + +Release 2021c - 2021-10-01 14:21:49 -0700 + + Briefly: + Revert most 2021b changes to 'backward'. + Fix 'zic -b fat' bug in pre-1970 32-bit data. + Fix two Link line typos. + Distribute SECURITY file. + + This release is intended as a bugfix release, to fix compatibility + problems and typos reported since 2021b was released. + + Changes to Link directives + + Revert almost all of 2021b's changes to the 'backward' file, + by moving Link directives back to where they were in 2021a. + Although 'zic' doesn't care which source file contains a Link + directive, some downstream uses ran into trouble with the move. + (Problem reported by Stephen Colebourne for Joda-Time.) + + Fix typo that linked Atlantic/Jan_Mayen to the wrong location + (problem reported by Chris Walton). + + Fix 'backzone' typo that linked America/Virgin to the wrong + location (problem reported by Michael Deckers). + + Changes to code + + Fix a bug in 'zic -b fat' that caused old timestamps to be + mishandled in 32-bit-only readers (problem reported by Daniel + Fischer). + + Changes to documentation + + Distribute the SECURITY file (problem reported by Andreas Radke). + + +Release 2021b - 2021-09-24 16:23:00 -0700 + + Briefly: + Jordan now starts DST on February's last Thursday. + Samoa no longer observes DST. + Merge more location-based Zones whose timestamps agree since 1970. + Move some backward-compatibility links to 'backward'. + Rename Pacific/Enderbury to Pacific/Kanton. + Correct many pre-1993 transitions in Malawi, Portugal, etc. + zic now creates each output file or link atomically. + zic -L no longer omits the POSIX TZ string in its output. + zic fixes for truncation and leap second table expiration. + zic now follows POSIX for TZ strings using all-year DST. + Fix some localtime crashes and bugs in obscure cases. + zdump -v now outputs more-useful boundary cases. + tzfile.5 better matches a draft successor to RFC 8536. + A new file SECURITY. + + This release is prompted by recent announcements by Jordan and Samoa. + It incorporates many other changes that had accumulated since 2021a. + However, it omits most proposed changes that merged all Zones + agreeing since 1970, as concerns were raised about doing too many of + these changes at once. It does keeps some of these changes in the + interest of making tzdb more equitable one step at a time; see + "Merge more location-based Zones" below. + + Changes to future timestamps + + Jordan now starts DST on February's last Thursday. + (Thanks to Steffen Thorsen.) + + Samoa no longer observes DST. (Thanks to Geoffrey D. Bennett.) + + Changes to zone name + + Rename Pacific/Enderbury to Pacific/Kanton. When we added + Enderbury in 1993, we did not know that it is uninhabited and that + Kanton (population two dozen) is the only inhabited location in + that timezone. The old name is now a backward-compatibility link. + + Changes to past timestamps + + Correct many pre-1993 transitions, fixing entries originally + derived from Shanks, Whitman, and Mundell. The fixes include: + - Barbados: standard time was introduced in 1911, not 1932; and + DST was observed in 1942-1944 + - Cook Islands: In 1899 they switched from east to west of GMT, + celebrating Christmas for two days. They (and Niue) switched + to standard time in 1952, not 1901. + - Guyana: corrected LMT for Georgetown; the introduction of + standard time in 1911, not 1915; and corrections to 1975 and + 1992 transitions + - Kanton: uninhabited before 1937-08-31 + - Niue: only observed -11:20 from 1952 through 1964, then went to + -11 instead of -11:30 + - Portugal: DST was observed in 1950 + - Tonga: corrected LMT; the introduction of standard time in 1945, + not 1901; and corrections to the transition from +12:20 to +13 + in 1961, not 1941 + Additional fixes to entries in the 'backzone' file include: + - Enderbury: inhabited only 1860/1885 and 1938-03-06/1942-02-09 + - The Gambia: 1933 and 1942 transitions + - Malawi: several 1911 through 1925 transitions + - Sierra Leone: several 1913 through 1941 transitions, and DST + was NOT observed in 1957 through 1962 + (Thanks to P Chan, Michael Deckers, Alexander Krivenyshev and + Alois Treindl.) + + Merge more location-based Zones whose timestamps agree since 1970, + as pre-1970 timestamps are out of scope. This is part of a + process that has been ongoing since 2013. This does not affect + post-1970 timestamps, and timezone historians who build with 'make + PACKRATDATA=backzone' should see no changes to pre-1970 timestamps. + When merging, keep the most-populous location's data, and move + data for other locations to 'backzone' with a backward + link in 'backward'. For example, move America/Creston data to + 'backzone' with a link in 'backward' from America/Phoenix because + the two timezones' timestamps agree since 1970; this change + affects some pre-1968 timestamps in America/Creston because + Creston and Phoenix disagreed before 1968. The affected Zones + are Africa/Accra, America/Atikokan, America/Blanc-Sablon, + America/Creston, America/Curacao, America/Nassau, + America/Port_of_Spain, Antarctica/DumontDUrville, and + Antarctica/Syowa. + + Changes to maintenance procedure + + The new file SECURITY covers how to report security-related bugs. + + Several backward-compatibility links have been moved to the + 'backward' file. These links, which range from Africa/Addis_Ababa + to Pacific/Saipan, are only for compatibility with now-obsolete + guidelines suggesting an entry for every ISO 3166 code. + The intercontinental convenience links Asia/Istanbul and + Europe/Nicosia have also been moved to 'backward'. + + Changes to code + + zic now creates each output file or link atomically, + possibly by creating a temporary file and then renaming it. + This avoids races where a TZ setting would temporarily stop + working while zic was installing a replacement file or link. + + zic -L no longer omits the POSIX TZ string in its output. + Starting with 2020a, zic -L truncated its output according to the + "Expires" directive or "#expires" comment in the leapseconds file. + The resulting TZif files omitted daylight saving transitions after + the leap second table expired, which led to far less accurate + predictions of times after the expiry. Although future timestamps + cannot be converted accurately in the presence of leap seconds, it + is more accurate to convert near-future timestamps with a few + seconds error than with an hour error, so zic -L no longer + truncates output in this way. + + Instead, when zic -L is given the "Expires" directive, it now + outputs the expiration by appending a no-change entry to the leap + second table. Although this should work well with most TZif + readers, it does not conform to Internet RFC 8536 and some pickier + clients (including tzdb 2017c through 2021a) reject it, so + "Expires" directives are currently disabled by default. To enable + them, set the EXPIRES_LINE Makefile variable. If a TZif file uses + this new feature it is marked with a new TZif version number 4, + a format intended to be documented in a successor to RFC 8536. + + zic -L LEAPFILE -r @LO no longer generates an invalid TZif file + that omits leap second information for the range LO..B when LO + falls between two leap seconds A and B. Instead, it generates a + TZif version 4 file that represents the previously missing + information. + + The TZif reader now allows the leap second table to begin with a + correction other than -1 or +1, and to contain adjacent + transitions with equal corrections. This supports TZif version 4. + + The TZif reader now lets leap seconds occur less than 28 days + apart. This supports possible future TZif extensions. + + Fix bug that caused 'localtime' etc. to crash when TZ was + set to a all-year DST string like "EST5EDT4,0/0,J365/25" that does + not conform to POSIX but does conform to Internet RFC 8536. + + Fix another bug that caused 'localtime' etc. to crash when TZ was + set to a POSIX-conforming but unusual TZ string like + "EST5EDT4,0/0,J365/0", where almost all the year is DST. + + Fix yet another bug that caused 'localtime' etc. to mishandle slim + TZif files containing leap seconds after the last explicit + transition in the table, or when handling far-future timestamps + in slim TZif files lacking leap seconds. + + Fix localtime misbehavior involving positive leap seconds. + This change affects only behavior for "right" system time, + which contains leap seconds, and only if the UT offset is + not a multiple of 60 seconds when a positive leap second occurs. + (No such timezone exists in tzdb, luckily.) Without the fix, + the timestamp was ambiguous during a positive leap second. + With the fix, any seconds occurring after a positive leap second + and within the same localtime minute are counted through 60, not + through 59; their UT offset (tm_gmtoff) is the same as before. + Here is how the fix affects timestamps in a timezone with UT + offset +01:23:45 (5025 seconds) and with a positive leap second at + 1972-06-30 23:59:60 UTC (78796800): + + time_t without the fix with the fix + 78796800 1972-07-01 01:23:45 1972-07-01 01:23:45 (leap second) + 78796801 1972-07-01 01:23:45 1972-07-01 01:23:46 + ... + 78796815 1972-07-01 01:23:59 1972-07-01 01:23:60 + 78796816 1972-07-01 01:24:00 1972-07-01 01:24:00 + + Fix an unlikely bug that caused 'localtime' etc. to misbehave if + civil time changes a few seconds before time_t wraps around, when + leap seconds are enabled. + + Fix bug in zic -r; in some cases, the dummy time type after the + last time transition disagreed with the TZ string, contrary to + Internet RFC 8563 section 3.3. + + Fix a bug with 'zic -r @X' when X is a negative leap second that + has a nonnegative correction. Without the fix, the output file + was truncated so that X appeared to be a positive leap second. + Fix a similar, even less likely bug when truncating at a positive + leap second that has a nonpositive correction. + + zic -r now reports an error if given rolling leap seconds, as this + usage has never generally worked and is evidently unused. + + zic now generates a POSIX-conforming TZ string for TZif files + where all-year DST is predicted for the indefinite future. + For example, for all-year Eastern Daylight Time, zic now generates + "XXX3EDT4,0/0,J365/23" where it previously generated + "EST5EDT,0/0,J365/25" or "". (Thanks to Michael Deckers for + noting the possibility of POSIX conformance.) + + zic.c no longer requires sys/wait.h (thanks to spazmodius for + noting it wasn't needed). + + When reading slim TZif files, zdump no longer mishandles leap + seconds on the rare platforms where time_t counts leap seconds, + fixing a bug introduced in 2014g. + + zdump -v now outputs timestamps at boundaries of what localtime + and gmtime can represent, instead of the less useful timestamps + one day after the minimum and one day before the maximum. + (Thanks to Arthur David Olson for prototype code, and to Manuela + Friedrich for debugging help.) + + zdump's -c and -t options are now consistently inclusive for the + lower time bound and exclusive for the upper. Formerly they were + inconsistent. (Confusion noted by Martin Burnicki.) + + Changes to build procedure + + You can now compile with -DHAVE_MALLOC_ERRNO=0 to port to + non-POSIX hosts where malloc doesn't set errno. + (Problem reported by Jan Engelhardt.) + + Changes to documentation + + tzfile.5 better matches a draft successor to RFC 8536 + . + + +Release 2021a - 2021-01-24 10:54:57 -0800 + + Changes to future timestamps + + South Sudan changes from +03 to +02 on 2021-02-01 at 00:00. + (Thanks to Steffen Thorsen.) + + +Release 2020f - 2020-12-29 00:17:46 -0800 + + Change to build procedure + + 'make rearguard_tarballs' no longer generates a bad rearguard.zi, + fixing a 2020e bug. (Problem reported by Deborah Goldsmith.) + + +Release 2020e - 2020-12-22 15:14:34 -0800 + + Briefly: + Volgograd switches to Moscow time on 2020-12-27 at 02:00. + + Changes to future timestamps + + Volgograd changes time zone from +04 to +03 on 2020-12-27 at 02:00. + (Thanks to Alexander Krivenyshev and Stepan Golosunov.) + + Changes to past timestamps + + Correct many pre-1986 transitions, fixing entries originally + derived from Shanks. The fixes include: + - Australia: several 1917 through 1971 transitions + - The Bahamas: several 1941 through 1945 transitions + - Bermuda: several 1917 through 1956 transitions + - Belize: several 1942 through 1968 transitions + - Ghana: several 1915 through 1956 transitions + - Israel and Palestine: several 1940 through 1985 transitions + - Kenya and adjacent: several 1908 through 1960 transitions + - Nigeria and adjacent: correcting LMT in Lagos, and several 1905 + through 1919 transitions + - Seychelles: the introduction of standard time in 1907, not 1906 + - Vanuatu: DST in 1973-1974, and a corrected 1984 transition + (Thanks to P Chan.) + + Because of the Australia change, Australia/Currie (King Island) is + no longer needed, as it is identical to Australia/Hobart for all + timestamps since 1970 and was therefore created by mistake. + Australia/Currie has been moved to the 'backward' file and its + corrected data moved to the 'backzone' file. + + Changes to past time zone abbreviations and DST flags + + To better match legislation in Turks and Caicos, the 2015 shift to + year-round observance of -04 is now modeled as AST throughout before + returning to Eastern Time with US DST in 2018, rather than as + maintaining EDT until 2015-11-01. (Thanks to P Chan.) + + Changes to documentation + + The zic man page now documents zic's coalescing of transitions + when a zone falls back just before DST springs forward. + + +Release 2020d - 2020-10-21 11:24:13 -0700 + + Briefly: + Palestine ends DST earlier than predicted, on 2020-10-24. + + Changes to past and future timestamps + + Palestine ends DST on 2020-10-24 at 01:00, instead of 2020-10-31 + as previously predicted (thanks to Sharef Mustafa.) Its + 2019-10-26 fall-back was at 00:00, not 01:00 (thanks to Steffen + Thorsen.) Its 2015-10-23 transition was at 01:00 not 00:00, and + its spring 2020 transition was on March 28 at 00:00, not March 27 + (thanks to Pierre Cashon.) This affects Asia/Gaza and + Asia/Hebron. Assume future spring and fall transitions will be on + the Saturday preceding the last Sunday of March and October, + respectively. + + +Release 2020c - 2020-10-16 11:15:53 -0700 + + Briefly: + Fiji starts DST later than usual, on 2020-12-20. + + Changes to future timestamps + + Fiji will start DST on 2020-12-20, instead of 2020-11-08 as + previously predicted. DST will still end on 2021-01-17. + (Thanks to Raymond Kumar and Alan Mintz.) Assume for now that + the later-than-usual start date is a one-time departure from the + recent pattern. + + Changes to build procedure + + Rearguard tarballs now contain an empty file pacificnew. + Some older downstream software expects this file to exist. + (Problem reported by Mike Cullinan.) + + +Release 2020b - 2020-10-06 18:35:04 -0700 + + Briefly: + Revised predictions for Morocco's changes starting in 2023. + Canada's Yukon changes to -07 on 2020-11-01, not 2020-03-08. + Macquarie Island has stayed in sync with Tasmania since 2011. + Casey, Antarctica is at +08 in winter and +11 in summer. + zic no longer supports -y, nor the TYPE field of Rules. + + Changes to future timestamps + + Morocco's spring-forward after Ramadan is now predicted to occur + no sooner than two days after Ramadan, instead of one day. + (Thanks to Milamber.) The first altered prediction is for 2023, + now predicted to spring-forward on April 30 instead of April 23. + + Changes to past and future timestamps + + Casey Station, Antarctica has been using +08 in winter and +11 in + summer since 2018. The most recent transition from +08 to +11 was + 2020-10-04 00:01. Also, Macquarie Island has been staying in + sync with Tasmania since 2011. (Thanks to Steffen Thorsen.) + + Changes to past and future time zone abbreviations and DST flags + + Canada's Yukon, represented by America/Whitehorse and + America/Dawson, changes its time zone rules from -08/-07 to + permanent -07 on 2020-11-01, not on 2020-03-08 as 2020a had it. + This change affects only the time zone abbreviation (MST vs PDT) + and daylight saving flag for the period between the two dates. + (Thanks to Andrew G. Smith.) + + Changes to past timestamps + + Correct several transitions for Hungary for 1918/1983. + For example, the 1983-09-25 fall-back was at 01:00, not 03:00. + (Thanks to Géza Nyáry.) Also, the 1890 transition to standard + time was on 11-01, not 10-01 (thanks to Michael Deckers). + + The 1891 French transition was on March 16, not March 15. The + 1911-03-11 French transition was at midnight, not a minute later. + Monaco's transitions were on 1892-06-01 and 1911-03-29, not + 1891-03-15 and 1911-03-11. (Thanks to Michael Deckers.) + + Changes to code + + Support for zic's long-obsolete '-y YEARISTYPE' option has been + removed and, with it, so has support for the TYPE field in Rule + lines, which is now reserved for compatibility with earlier zic. + These features were previously deprecated in release 2015f. + (Thanks to Tim Parenti.) + + zic now defaults to '-b slim' instead of to '-b fat'. + + zic's new '-l -' and '-p -' options uninstall any existing + localtime and posixrules files, respectively. + + The undocumented and ineffective tzsetwall function has been + removed. + + Changes to build procedure + + The Makefile now defaults POSIXRULES to '-', so the posixrules + feature (obsolete as of 2019b) is no longer installed by default. + + Changes to documentation and commentary + + The long-obsolete files pacificnew, systemv, and yearistype.sh have + been removed from the distribution. (Thanks to Tim Parenti.) + + +Release 2020a - 2020-04-23 16:03:47 -0700 + + Briefly: + Morocco springs forward on 2020-05-31, not 2020-05-24. + Canada's Yukon advanced to -07 year-round on 2020-03-08. + America/Nuuk renamed from America/Godthab. + zic now supports expiration dates for leap second lists. + + Changes to future timestamps + + Morocco's second spring-forward transition in 2020 will be May 31, + not May 24 as predicted earlier. (Thanks to Semlali Naoufal.) + Adjust future-year predictions to use the first Sunday after the + day after Ramadan, not the first Sunday after Ramadan. + + Canada's Yukon, represented by America/Whitehorse and + America/Dawson, advanced to -07 year-round, beginning with its + spring-forward transition on 2020-03-08, and will not fall back on + 2020-11-01. Although a government press release calls this + "permanent Pacific Daylight Saving Time", we prefer MST for + consistency with nearby Dawson Creek, Creston, and Fort Nelson. + (Thanks to Tim Parenti.) + + Changes to past timestamps + + Shanghai observed DST in 1919. (Thanks to Phake Nick.) + + Changes to timezone identifiers + + To reflect current usage in English better, America/Godthab has + been renamed to America/Nuuk. A backwards-compatibility link + remains for the old name. + + Changes to code + + localtime.c no longer mishandles timestamps after the last + transition in a TZif file with leap seconds and with daylight + saving time transitions projected into the indefinite future. + For example, with TZ='America/Los_Angeles' with leap seconds, + zdump formerly reported a DST transition on 2038-03-14 + from 01:59:32.999... to 02:59:33 instead of the correct transition + from 01:59:59.999... to 03:00:00. + + zic -L now supports an Expires line in the leapseconds file, and + truncates the TZif output accordingly. This propagates leap + second expiration information into the TZif file, and avoids the + abovementioned localtime.c bug as well as similar bugs present in + many client implementations. If no Expires line is present, zic + -L instead truncates the TZif output based on the #expires comment + present in leapseconds files distributed by tzdb 2018f and later; + however, this usage is obsolescent. For now, the distributed + leapseconds file has an Expires line that is commented out, so + that the file can be fed to older versions of zic which ignore the + commented-out line. Future tzdb distributions are planned to + contain a leapseconds file with an Expires line. + + The configuration macros HAVE_TZNAME and USG_COMPAT should now be + set to 1 if the system library supports the feature, and 2 if not. + As before, these macros are nonzero if tzcode should support the + feature, zero otherwise. + + The configuration macro ALTZONE now has the same values with the + same meaning as HAVE_TZNAME and USG_COMPAT. + + The code's defense against CRLF in leap-seconds.list is now + portable to POSIX awk. (Problem reported by Deborah Goldsmith.) + + Although the undocumented tzsetwall function is not changed in + this release, it is now deprecated in preparation for removal in + future releases. Due to POSIX requirements, tzsetwall has not + worked for some time. Any code that uses it should instead use + tzalloc(NULL) or, if portability trumps thread-safety, should + unset the TZ environment variable. + + Changes to commentary + + The Îles-de-la-Madeleine and the Listuguj reserve are noted as + following America/Halifax, and comments about Yukon's "south" and + "north" have been corrected to say "east" and "west". (Thanks to + Jeffery Nichols.) + + +Release 2019c - 2019-09-11 08:59:48 -0700 + + Briefly: + Fiji observes DST from 2019-11-10 to 2020-01-12. + Norfolk Island starts observing Australian-style DST. + + Changes to future timestamps + + Fiji's next DST transitions will be 2019-11-10 and 2020-01-12 + instead of 2019-11-03 and 2020-01-19. (Thanks to Raymond Kumar.) + Adjust future guesses accordingly. + + Norfolk Island will observe Australian-style DST starting in + spring 2019. The first transition is on 2019-10-06. (Thanks to + Kyle Czech and Michael Deckers.) + + Changes to past timestamps + + Many corrections to time in Turkey from 1940 through 1985. + (Thanks to Oya Vulaş via Alois Treindl, and to Kıvanç Yazan.) + + The Norfolk Island 1975-03-02 transition was at 02:00 standard + time, not 02:00 DST. (Thanks to Michael Deckers.) + + South Korea observed DST from 1948 through 1951. Although this + info was supposed to appear in release 2014j, a typo inadvertently + suppressed the change. (Thanks to Alois Treindl.) + + Detroit observed DST in 1967 and 1968 following the US DST rules, + except that its 1967 DST began on June 14 at 00:01. (Thanks to + Alois Treindl for pointing out that the old data entries were + probably wrong.) + + Fix several errors in pre-1970 transitions in Perry County, IN. + (Thanks to Alois Treindl for pointing out the 1967/9 errors.) + + Edmonton did not observe DST in 1967 or 1969. In 1946 Vancouver + ended DST on 09-29 not 10-13, and Vienna ended DST on 10-07 not + 10-06. In 1945 Königsberg (now Kaliningrad) switched from +01/+02 + to +02/+03 on 04-10 not 01-01, and its +02/+03 is abbreviated + EET/EEST, not CET/CEST. (Thanks to Alois Treindl.) In 1946 + Königsberg switched to +03 on 04-07 not 01-01. + + In 1946 Louisville switched from CST to CDT on 04-28 at 00:01, not + 01-01 at 00:00. (Thanks to Alois Treindl and Michael Deckers.) + Also, it switched from CST to CDT on 1950-04-30, not 1947-04-27. + + The 1892-05-01 transition in Brussels was at 00:17:30, not at noon. + (Thanks to Michael Deckers.) + + Changes to past time zone abbreviations and DST flags + + Hong Kong Winter Time, observed from 1941-10-01 to 1941-12-25, + is now flagged as DST and is abbreviated HKWT not HKT. + + Changes to code + + leapseconds.awk now relies only on its input data, rather than + also relying on its comments. (Inspired by code from Dennis + Ferguson and Chris Woodbury.) + + The code now defends against CRLFs in leap-seconds.list. + (Thanks to Brian Inglis and Chris Woodbury.) + + Changes to documentation and commentary + + theory.html discusses leap seconds. (Thanks to Steve Summit.) + + Nashville's newspapers dueled about the time of day in the 1950s. + (Thanks to John Seigenthaler.) + + Liechtenstein observed Swiss DST in 1941/2. + (Thanks to Alois Treindl.) + + +Release 2019b - 2019-07-01 00:09:53 -0700 + + Briefly: + Brazil no longer observes DST. + 'zic -b slim' outputs smaller TZif files; please try it out. + Palestine's 2019 spring-forward transition was on 03-29, not 03-30. + + Changes to future timestamps + + Brazil has canceled DST and will stay on standard time indefinitely. + (Thanks to Steffen Thorsen, Marcus Diniz, and Daniel Soares de + Oliveira.) + + Predictions for Morocco now go through 2087 instead of 2037, to + work around a problem on newlib when using TZif files output by + zic 2019a or earlier. (Problem reported by David Gauchard.) + + Changes to past and future timestamps + + Palestine's 2019 spring transition was 03-29 at 00:00, not 03-30 + at 01:00. (Thanks to Sharef Mustafa and Even Scharning.) Guess + future transitions to be March's last Friday at 00:00. + + Changes to past timestamps + + Hong Kong's 1941-06-15 spring-forward transition was at 03:00, not + 03:30. Its 1945 transition from JST to HKT was on 11-18 at 02:00, + not 09-15 at 00:00. In 1946 its spring-forward transition was on + 04-21 at 00:00, not the previous day at 03:30. From 1946 through + 1952 its fall-back transitions occurred at 04:30, not at 03:30. + In 1947 its fall-back transition was on 11-30, not 12-30. + (Thanks to P Chan.) + + Changes to past time zone abbreviations + + Italy's 1866 transition to Rome Mean Time was on December 12, not + September 22. This affects only the time zone abbreviation for + Europe/Rome between those dates. (Thanks to Stephen Trainor and + Luigi Rosa.) + + Changes affecting metadata only + + Add info about the Crimea situation in zone1970.tab and zone.tab. + (Problem reported by Serhii Demediuk.) + + Changes to code + + zic's new -b option supports a way to control data bloat and to + test for year-2038 bugs in software that reads TZif files. + 'zic -b fat' and 'zic -b slim' generate larger and smaller output; + for example, changing from fat to slim shrinks the Europe/London + file from 3648 to 1599 bytes, saving about 56%. Fat and slim + files represent the same set of timestamps and use the same TZif + format as documented in tzfile(5) and in Internet RFC 8536. + Fat format attempts to work around bugs or incompatibilities in + older software, notably software that mishandles 64-bit TZif data + or uses obsolete TZ strings like "EET-2EEST" that lack DST rules. + Slim format is more efficient and does not work around 64-bit bugs + or obsolete TZ strings. Currently zic defaults to fat format + unless you compile with -DZIC_BLOAT_DEFAULT=\"slim\"; this + out-of-the-box default is intended to change in future releases + as the buggy software often mishandles timestamps anyway. + + zic no longer treats a set of rules ending in 2037 specially. + Previously, zic assumed that such a ruleset meant that future + timestamps could not be predicted, and therefore omitted a + POSIX-like TZ string in the TZif output. The old behavior is no + longer needed for current tzdata, and caused problems with newlib + when used with older tzdata (reported by David Gauchard). + + zic no longer generates some artifact transitions. For example, + Europe/London no longer has a no-op transition in January 1996. + + Changes to build procedure + + tzdata.zi now assumes zic 2017c or later. This shrinks tzdata.zi + by a percent or so. + + Changes to documentation and commentary + + The Makefile now documents the POSIXRULES macro as being obsolete, + and similarly, zic's -p POSIXRULES option is now documented as + being obsolete. Although the POSIXRULES feature still exists and + works as before, in practice it is rarely used for its intended + purpose, and it does not work either in the default reference + implementation (for timestamps after 2037) or in common + implementations such as GNU/Linux (for contemporary timestamps). + Since POSIXRULES was designed primarily as a temporary transition + facility for System V platforms that died off decades ago, it is + being decommissioned rather than institutionalized. + + New info on Bonin Islands and Marcus (thanks to Wakaba and Phake Nick). + + +Release 2019a - 2019-03-25 22:01:33 -0700 + + Briefly: + Palestine "springs forward" on 2019-03-30 instead of 2019-03-23. + Metlakatla "fell back" to rejoin Alaska Time on 2019-01-20 at 02:00. + + Changes to past and future timestamps + + Palestine will not start DST until 2019-03-30, instead of 2019-03-23 as + previously predicted. Adjust our prediction by guessing that spring + transitions will be between 24 and 30 March, which matches recent practice + since 2016. (Thanks to Even Scharning and Tim Parenti.) + + Metlakatla ended its observance of Pacific standard time, + rejoining Alaska Time, on 2019-01-20 at 02:00. (Thanks to Ryan + Stanley and Tim Parenti.) + + Changes to past timestamps + + Israel observed DST in 1980 (08-02/09-13) and 1984 (05-05/08-25). + (Thanks to Alois Treindl and Isaac Starkman.) + + Changes to time zone abbreviations + + Etc/UCT is now a backward-compatibility link to Etc/UTC, instead + of being a separate zone that generates the abbreviation "UCT", + which nowadays is typically a typo. (Problem reported by Isiah + Meadows.) + + Changes to code + + zic now has an -r option to limit the time range of output data. + For example, 'zic -r @1000000000' limits the output data to + timestamps starting 1000000000 seconds after the Epoch. + This helps shrink output size and can be useful for applications + not needing the full timestamp history, such as TZDIST truncation; + see Internet RFC 8536 section 5.1. (Inspired by a feature request + from Christopher Wong, helped along by bug reports from Wong and + from Tim Parenti.) + + Changes to documentation + + Mention Internet RFC 8536 (February 2019), which documents TZif. + + tz-link.html now cites tzdata-meta + . + + +Release 2018i - 2018-12-30 11:05:43 -0800 + + Briefly: + São Tomé and Príncipe switches from +01 to +00 on 2019-01-01. + + Changes to future timestamps + + Due to a change in government, São Tomé and Príncipe switches back + from +01 to +00 on 2019-01-01 at 02:00. (Thanks to Vadim + Nasardinov and Michael Deckers.) + + +Release 2018h - 2018-12-23 17:59:32 -0800 + + Briefly: + Qyzylorda, Kazakhstan moved from +06 to +05 on 2018-12-21. + New zone Asia/Qostanay because Qostanay, Kazakhstan didn't move. + Metlakatla, Alaska observes PST this winter only. + Guess Morocco will continue to adjust clocks around Ramadan. + Add predictions for Iran from 2038 through 2090. + + Changes to future timestamps + + Guess that Morocco will continue to fall back just before and + spring forward just after Ramadan, the practice since 2012. + (Thanks to Maamar Abdelkader.) This means Morocco will observe + negative DST during Ramadan in main and vanguard formats, and in + rearguard format it stays in the +00 timezone and observes + ordinary DST in all months other than Ramadan. As before, extend + this guesswork to the year 2037. As a consequence, Morocco is + scheduled to observe three DST transitions in some Gregorian years + (e.g., 2033) due to the mismatch between the Gregorian and Islamic + calendars. + + The table of exact transitions for Iranian DST has been extended. + It formerly cut off before the year 2038 in a nod to 32-bit time_t. + It now cuts off before 2091 as there is doubt about how the Persian + calendar will treat 2091. This change predicts DST transitions in + 2038-9, 2042-3, and 2046-7 to occur one day later than previously + predicted. As before, post-cutoff transitions are approximated. + + Changes to past and future timestamps + + Qyzylorda (aka Kyzylorda) oblast in Kazakhstan moved from +06 to + +05 on 2018-12-21. This is a zone split as Qostanay (aka + Kostanay) did not switch, so create a zone Asia/Qostanay. + + Metlakatla moved from Alaska to Pacific standard time on 2018-11-04. + It did not change clocks that day and remains on -08 this winter. + (Thanks to Ryan Stanley.) It will revert to the usual Alaska + rules next spring, so this change affects only timestamps + from 2018-11-04 through 2019-03-10. + + Change to past timestamps + + Kwajalein's 1993-08-20 transition from -12 to +12 was at 24:00, + not 00:00. I transcribed the time incorrectly from Shanks. + (Thanks to Phake Nick.) + + Nauru's 1979 transition was on 02-10 at 02:00, not 05-01 at 00:00. + (Thanks to Phake Nick.) + + Guam observed DST irregularly from 1959 through 1977. + (Thanks to Phake Nick.) + + Hong Kong observed DST in 1941 starting 06-15 (not 04-01), then on + 10-01 changed standard time to +08:30 (not +08). Its transition + back to +08 after WWII was on 1945-09-15, not the previous day. + Its 1904-10-30 change took effect at 01:00 +08 (not 00:00 LMT). + (Thanks to Phake Nick, Steve Allen, and Joseph Myers.) Also, + its 1952 fallback was on 11-02 (not 10-25). + + This release contains many changes to timestamps before 1946 due + to Japanese possession or occupation of Pacific/Chuuk, + Pacific/Guam, Pacific/Kosrae, Pacific/Kwajalein, Pacific/Majuro, + Pacific/Nauru, Pacific/Palau, and Pacific/Pohnpei. + (Thanks to Phake Nick.) + + Assume that the Spanish East Indies was like the Philippines and + observed American time until the end of 1844. This affects + Pacific/Chuuk, Pacific/Kosrae, Pacific/Palau, and Pacific/Pohnpei. + + Changes to past tm_isdst flags + + For the recent Morocco change, the tm_isdst flag should be 1 from + 2018-10-27 00:00 to 2018-10-28 03:00. (Thanks to Michael Deckers.) + Give a URL to the official decree. (Thanks to Matt Johnson.) + + +Release 2018g - 2018-10-26 22:22:45 -0700 + + Briefly: + Morocco switches to permanent +01 on 2018-10-28. + + Changes to future timestamps + + Morocco switches from +00/+01 to permanent +01 effective 2018-10-28, + so its clocks will not fall back as previously scheduled. + (Thanks to Mohamed Essedik Najd and Brian Inglis.) + + Changes to code + + When generating TZif files with leap seconds, zic no longer uses a + format that trips up older 32-bit clients, fixing a bug introduced + in 2018f. (Reported by Daniel Fischer.) Also, the zic workaround + for QTBUG-53071 now also works for TZif files with leap seconds. + + The translator to rearguard format now rewrites the line + "Rule Japan 1948 1951 - Sep Sat>=8 25:00 0 S" to + "Rule Japan 1948 1951 - Sep Sun>=9 1:00 0 S". + This caters to zic before 2007 and to Oracle TZUpdater 2.2.0 + and earlier. (Reported by Christos Zoulas.) + + Changes to past time zone abbreviations + + Change HDT to HWT/HPT for WWII-era abbreviations in Hawaii. + This reverts to 2011h, as the abbreviation change in 2011i was + likely inadvertent. + + Changes to documentation + + tzfile.5 has new sections on interoperability issues. + + +Release 2018f - 2018-10-18 00:14:18 -0700 + + Briefly: + Volgograd moves from +03 to +04 on 2018-10-28. + Fiji ends DST 2019-01-13, not 2019-01-20. + Most of Chile changes DST dates, effective 2019-04-06. + + Changes to future timestamps + + Volgograd moves from +03 to +04 on 2018-10-28 at 02:00. + (Thanks to Alexander Fetisov and Stepan Golosunov.) + + Fiji ends DST 2019-01-13 instead of the 2019-01-20 previously + predicted. (Thanks to Raymond Kumar.) Adjust future predictions + accordingly. + + Most of Chile will end DST on the first Saturday in April at 24:00 mainland + time, and resume DST on the first Saturday in September at 24:00 mainland + time. The changes are effective from 2019-04-06, and do not affect the + Magallanes region modeled by America/Punta_Arenas. (Thanks to Juan Correa + and Tim Parenti.) Adjust future predictions accordingly. + + Changes to past timestamps + + The 2018-05-05 North Korea 30-minute time zone change took place + at 23:30 the previous day, not at 00:00 that day. + + China's 1988 spring-forward transition was on April 17, not + April 10. Its DST transitions in 1986/91 were at 02:00, not 00:00. + (Thanks to P Chan.) + + Fix several issues for Macau before 1992. Macau's pre-1904 LMT + was off by 10 s. Macau switched to +08 in 1904 not 1912, and + temporarily switched to +09/+10 during World War II. Macau + observed DST in 1942/79, not 1961/80, and there were several + errors for transition times and dates. (Thanks to P Chan.) + + The 1948-1951 fallback transitions in Japan were at 25:00 on + September's second Saturday, not at 24:00. (Thanks to Phake Nick.) + zic turns this into 01:00 on the day after September's second + Saturday, which is the best that POSIX or C platforms can do. + + Incorporate 1940-1949 Asia/Shanghai DST transitions from a 2014 + paper by Li Yu, replacing more-questionable data from Shanks. + + Changes to time zone abbreviations + + Use "PST" and "PDT" for Philippine time. (Thanks to Paul Goyette.) + + Changes to code + + zic now always generates TZif files where time type 0 is used for + timestamps before the first transition. This simplifies the + reading of TZif files and should not affect behavior of existing + TZif readers because the same set of time types is used; only + their internal indexes may have changed. This affects only the + legacy zones EST5EDT, CST6CDT, MST7MDT, PST8PDT, CET, MET, and + EET, which previously used nonzero types for these timestamps. + + Because of the type 0 change, zic no longer outputs a dummy + transition at time -2**59 (before the Big Bang), as clients should + no longer need this to handle historical timestamps correctly. + This reverts a change introduced in 2013d and shrinks most TZif + files by a few bytes. + + zic now supports negative time-of-day in Rule and Leap lines, e.g., + "Rule X min max - Apr lastSun -6:00 1:00 -" means the transition + occurs at 18:00 on the Saturday before the last Sunday in April. + This behavior was documented in 2018a but the code did not + entirely match the documentation. + + localtime.c no longer requires at least one time type in TZif + files that lack transitions or have a POSIX-style TZ string. This + future-proofs the code against possible future extensions to the + format that would allow TZif files with POSIX-style TZ strings and + without transitions or time types. + + A read-access subscript error in localtime.c has been fixed. + It could occur only in TZif files with timecnt == 0, something that + does not happen in practice now but could happen in future versions. + + localtime.c no longer ignores TZif POSIX-style TZ strings that + specify only standard time. Instead, these TZ strings now + override the default time type for timestamps after the last + transition (or for all timestamps if there are no transitions), + just as DST strings specifying DST have always done. + + leapseconds.awk now outputs "#updated" and "#expires" comments, + and supports leap seconds at the ends of months other than June + and December. (Inspired by suggestions from Chris Woodbury.) + + Changes to documentation + + New restrictions: A Rule name must start with a character that + is neither an ASCII digit nor "-" nor "+", and an unquoted name + should not use characters in the set "!$%&'()*,/:;<=>?@[\]^`{|}~". + The latter restriction makes room for future extensions (a + possibility noted by Tom Lane). + + tzfile.5 now documents what time types apply before the first and + after the last transition, if any. + + Documentation now uses the spelling "timezone" for a TZ setting + that determines timestamp history, and "time zone" for a + geographic region currently sharing the same standard time. + + The name "TZif" is now used for the tz binary data format. + + tz-link.htm now mentions the A0 TimeZone Migration utilities. + (Thanks to Aldrin Martoq for the link.) + + Changes to build procedure + + New 'make' target 'rearguard_tarballs' to build the rearguard + tarball only. This is a convenience on platforms that lack lzip + if you want to build the rearguard tarball. (Problem reported by + Deborah Goldsmith.) + + tzdata.zi is now more stable from release to release. (Problem + noted by Tom Lane.) It is also a bit shorter. + + tzdata.zi now can contain comment lines documenting configuration + information, such as which data format was selected, which input + files were used, and how leap seconds are treated. (Problems + noted by Lester Caine and Brian Inglis.) If the Makefile defaults + are used these comment lines are absent, for backward + compatibility. A redistributor intending to alter its copy of the + files should also append "-LABEL" to the 'version' file's first + line, where "LABEL" identifies the redistributor's change. + + +Release 2018e - 2018-05-01 23:42:51 -0700 + + Briefly: + + North Korea switches back to +09 on 2018-05-05. + The main format uses negative DST again, for Ireland etc. + 'make tarballs' now also builds a rearguard tarball. + New 's' and 'd' suffixes in SAVE columns of Rule and Zone lines. + + Changes to past and future timestamps + + North Korea switches back from +0830 to +09 on 2018-05-05. + (Thanks to Kang Seonghoon, Arthur David Olson, Seo Sanghyeon, + and Tim Parenti.) + + Bring back the negative-DST changes of 2018a, except be more + compatible with data parsers that do not support negative DST. + Also, this now affects historical timestamps in Namibia and the + former Czechoslovakia, not just Ireland. The main format now uses + negative DST to model timestamps in Europe/Dublin (from 1971 on), + Europe/Prague (1946/7), and Africa/Windhoek (1994/2017). This + does not affect UT offsets, only time zone abbreviations and the + tm_isdst flag. Also, this does not affect rearguard or vanguard + formats; effectively the main format now uses vanguard instead of + rearguard format. Data parsers that do not support negative DST + can still use data from the rearguard tarball described below. + + Changes to build procedure + + The command 'make tarballs' now also builds the tarball + tzdataVERSION-rearguard.tar.gz, which is like tzdataVERSION.tar.gz + except that it uses rearguard format intended for trailing-edge + data parsers. + + Changes to data format and to code + + The SAVE column of Rule and Zone lines can now have an 's' or 'd' + suffix, which specifies whether the adjusted time is standard time + or daylight saving time. If no suffix is given, daylight saving + time is used if and only if the SAVE column is nonzero; this is + the longstanding behavior. Although this new feature is not used + in tzdata, it could be used to specify the legal time in Namibia + 1994-2017, as opposed to the popular time (see below). + + Changes to past timestamps + + From 1994 through 2017 Namibia observed DST in winter, not summer. + That is, it used negative DST, as Ireland still does. This change + does not affect UTC offsets; it affects only the tm_isdst flag and + the abbreviation used during summer, which is now CAT, not WAST. + Although (as noted by Michael Deckers) summer and winter time were + both simply called "standard time" in Namibian law, in common + practice winter time was considered to be DST (as noted by Stephen + Colebourne). The full effect of this change is only in vanguard + and main format; in rearguard format, the tm_isdst flag is still + zero in winter and nonzero in summer. + + In 1946/7 Czechoslovakia also observed negative DST in winter. + The full effect of this change is only in vanguard and main + formats; in rearguard format, it is modeled as plain GMT without + daylight saving. Also, the dates of some 1944/5 DST transitions + in Czechoslovakia have been changed. + + +Release 2018d - 2018-03-22 07:05:46 -0700 + + Briefly: + + Palestine starts DST a week earlier in 2018. + Add support for vanguard and rearguard data consumers. + Add subsecond precision to source data format, though not to data. + + Changes to future timestamps + + In 2018, Palestine starts DST on March 24, not March 31. + Adjust future predictions accordingly. (Thanks to Sharef Mustafa.) + + Changes to past and future timestamps + + Casey Station in Antarctica changed from +11 to +08 on 2018-03-11 + at 04:00. (Thanks to Steffen Thorsen.) + + Changes to past timestamps + + Historical transitions for Uruguay, represented by + America/Montevideo, have been updated per official legal documents, + replacing previous data mainly originating from the inventions of + Shanks & Pottenger. This has resulted in adjustments ranging from + 30 to 90 minutes in either direction over at least two dozen + distinct periods ranging from one day to several years in length. + A mere handful of pre-1991 transitions are unaffected; data since + then has come from more reliable contemporaneous reporting. These + changes affect various timestamps in 1920-1923, 1936, 1939, + 1942-1943, 1959, 1966-1970, 1972, 1974-1980, and 1988-1990. + Additionally, Uruguay's pre-standard-time UT offset has been + adjusted westward by 7 seconds, from UT-03:44:44 to UT-03:44:51, to + match the location of the Observatory of the National Meteorological + Institute in Montevideo. + (Thanks to Jeremie Bonjour, Tim Parenti, and Michael Deckers.) + + East Kiribati skipped New Year's Eve 1994, not New Year's Day 1995. + (Thanks to Kerry Shetline.) + + Fix the 1912-01-01 transition for Portugal and its colonies. + This transition was at 00:00 according to the new UT offset, not + according to the old one. Also assume that Cape Verde switched on + the same date as the rest, not in 1907. This affects + Africa/Bissau, Africa/Sao_Tome, Asia/Macau, Atlantic/Azores, + Atlantic/Cape_Verde, Atlantic/Madeira, and Europe/Lisbon. + (Thanks to Michael Deckers.) + + Fix an off-by-1 error for pre-1913 timestamps in Jamaica and in + Turks & Caicos. + + Changes to past time zone abbreviations + + MMT took effect in Uruguay from 1908-06-10, not 1898-06-28. There + is no clock change associated with the transition. + + Changes to build procedure + + The new DATAFORM macro in the Makefile lets the installer choose + among three source data formats. The idea is to lessen downstream + disruption when data formats are improved. + + * DATAFORM=vanguard installs from the latest, bleeding-edge + format. DATAFORM=main (the default) installs from the format + used in the 'africa' etc. files. DATAFORM=rearguard installs + from a trailing-edge format. Eventually, elements of today's + vanguard format should move to the main format, and similarly + the main format's features should eventually move to the + rearguard format. + + * In the current version, the main and rearguard formats are + identical and match that of 2018c, so this change does not + affect default behavior. The vanguard format currently contains + one feature not in the main format: negative SAVE values. This + improves support for Ireland, which uses Irish Standard Time + (IST, UTC+01) in summer and GMT (UTC) in winter. tzcode has + supported negative SAVE values for decades, and this feature + should move to the main format soon. However, it will not move + to the rearguard format for quite some time because some + downstream parsers do not support it. + + * The build procedure constructs three files vanguard.zi, main.zi, + and rearguard.zi, one for each format. Although the files + represent essentially the same data, they may have minor + discrepancies that users are not likely to notice. The files + are intended for downstream data consumers and are not + installed. Zoneinfo parsers that do not support negative SAVE values + should start using rearguard.zi, so that they will be unaffected + when the negative-DST feature moves from vanguard to main. + Bleeding-edge Zoneinfo parsers that support the new features + already can use vanguard.zi; in this respect, current tzcode is + bleeding-edge. + + The Makefile should now be safe for parallelized builds, and 'make + -j to2050new.tzs' is now much faster on a multiprocessor host + with GNU Make. + + When built with -DSUPPRESS_TZDIR, the tzcode library no longer + prepends TZDIR/ to file names that do not begin with '/'. This is + not recommended for general use, due to its security implications. + (From a suggestion by Manuela Friedrich.) + + Changes to code + + zic now accepts subsecond precision in expressions like + 00:19:32.13, which is approximately the legal time of the + Netherlands from 1835 to 1937. However, because it is + questionable whether the few recorded uses of non-integer offsets + had subsecond precision in practice, there are no plans for tzdata + to use this feature. (Thanks to Steve Allen for pointing out + the limitations of historical data in this area.) + + The code is a bit more portable to MS-Windows. Installers can + compile with -DRESERVE_STD_EXT_IDS on MS-Windows platforms that + reserve identifiers like 'localtime'. (Thanks to Manuela + Friedrich.) + + Changes to documentation and commentary + + theory.html now outlines tzdb's extensions to POSIX's model for + civil time, and has a section "POSIX features no longer needed" + that lists POSIX API components that are now vestigial. + (From suggestions by Steve Summit.) It also better distinguishes + time zones from tz regions. (From a suggestion by Guy Harris.) + + Commentary is now more consistent about using the phrase "daylight + saving time", to match the C name tm_isdst. Daylight saving time + need not occur in summer, and need not have a positive offset from + standard time. + + Commentary about historical transitions in Uruguay has been expanded + with links to many relevant legal documents. + (Thanks to Tim Parenti.) + + Commentary now uses some non-ASCII characters with Unicode value + less than U+0100, as they can be useful and should work even with + older editors such as XEmacs. + + +Release 2018c - 2018-01-22 23:00:44 -0800 + + Briefly: + Revert Irish changes that relied on negative SAVE values. + + Changes to tm_isdst + + Revert the 2018a change to Europe/Dublin. As before, this change + does not affect UT offsets or abbreviations; it affects only + whether timestamps are considered to be standard time or + daylight-saving time, as expressed in the tm_isdst flag of C's + struct tm type. This reversion is intended to be a temporary + workaround for problems discovered with downstream uses of + releases 2018a and 2018b, which implemented Irish time by using + negative SAVE values in the Eire rules of the 'europe' file. + Although negative SAVE values have been part of tzcode for many + years and are supported by many platforms, they were not + documented before 2018a and ICU and OpenJDK do not currently + support them. A mechanism to export data to platforms lacking + support for negative DST is planned to be developed before the + change is reapplied. (Problems reported by Deborah Goldsmith and + Stephen Colebourne.) + + Changes to past timestamps + + Japanese DST transitions (1948-1951) were Sundays at 00:00, not + Saturdays or Sundays at 02:00. (Thanks to Takayuki Nikai.) + + Changes to build procedure + + The build procedure now works around mawk 1.3.3's lack of support + for character class expressions. (Problem reported by Ohyama.) + + +Release 2018b - 2018-01-17 23:24:48 -0800 + + Briefly: + Fix a packaging problem in tz2018a, which was missing 'pacificnew'. + + Changes to build procedure + + The distribution now contains the file 'pacificnew' again. + This file was inadvertently omitted in the 2018a distribution. + (Problem reported by Matias Fonzo.) + + +Release 2018a - 2018-01-12 22:29:21 -0800 + + Briefly: + São Tomé and Príncipe switched from +00 to +01. + Brazil's DST will now start on November's first Sunday. + Ireland's standard time is now in the summer, not the winter. + Use Debian-style installation locations, instead of 4.3BSD-style. + New zic option -t. + + Changes to past and future timestamps + + São Tomé and Príncipe switched from +00 to +01 on 2018-01-01 at + 01:00. (Thanks to Steffen Thorsen and Michael Deckers.) + + Changes to future timestamps + + Starting in 2018 southern Brazil will begin DST on November's + first Sunday instead of October's third Sunday. (Thanks to + Steffen Thorsen.) + + Changes to past timestamps + + A discrepancy of 4 s in timestamps before 1931 in South Sudan has + been corrected. The 'backzone' and 'zone.tab' files did not agree + with the 'africa' and 'zone1970.tab' files. (Problem reported by + Michael Deckers.) + + The abbreviation invented for Bolivia Summer Time (1931-2) is now + BST instead of BOST, to be more consistent with the convention + used for Latvian Summer Time (1918-9) and for British Summer Time. + + Changes to tm_isdst + + Change Europe/Dublin so that it observes Irish Standard Time (UT + +01) in summer and GMT (as negative daylight-saving) in winter, + instead of observing standard time (GMT) in winter and Irish + Summer Time (UT +01) in summer. This change does not affect UT + offsets or abbreviations; it affects only whether timestamps are + considered to be standard time or daylight-saving time, as + expressed in the tm_isdst flag of C's struct tm type. + (Discrepancy noted by Derick Rethans.) + + Changes to build procedure + + The default installation locations have been changed to mostly + match Debian circa 2017, instead of being designed as an add-on to + 4.3BSD circa 1986. This affects the Makefile macros TOPDIR, + TZDIR, MANDIR, and LIBDIR. New Makefile macros TZDEFAULT, USRDIR, + USRSHAREDIR, BINDIR, ZDUMPDIR, and ZICDIR let installers tailor + locations more precisely. (This responds to suggestions from + Brian Inglis and from Steve Summit.) + + The default installation procedure no longer creates the + backward-compatibility link US/Pacific-New, which causes + confusion during user setup (e.g., see Debian bug 815200). + Use 'make BACKWARD="backward pacificnew"' to create the link + anyway, for now. Eventually we plan to remove the link entirely. + + tzdata.zi now contains a version-number comment. + (Suggested by Tom Lane.) + + The Makefile now quotes values like BACKWARD more carefully when + passing them to the shell. (Problem reported by Zefram.) + + Builders no longer need to specify -DHAVE_SNPRINTF on platforms + that have snprintf and use pre-C99 compilers. (Problem reported + by Jon Skeet.) + + Changes to code + + zic has a new option -t FILE that specifies the location of the + file that determines local time when TZ is unset. The default for + this location can be configured via the new TZDEFAULT makefile + macro, which defaults to /etc/localtime. + + Diagnostics and commentary now distinguish UT from UTC more + carefully; see theory.html for more information about UT vs UTC. + + zic has been ported to GCC 8's -Wstringop-truncation option. + (Problem reported by Martin Sebor.) + + Changes to documentation and commentary + + The zic man page now documents the longstanding behavior that + times and years can be out of the usual range, with negative times + counting backwards from midnight and with year 0 preceding year 1. + (Problem reported by Michael Deckers.) + + The theory.html file now mentions the POSIX limit of six chars + per abbreviation, and lists alphabetic abbreviations used. + + The files tz-art.htm and tz-link.htm have been renamed to + tz-art.html and tz-link.html, respectively, for consistency with + other file names and to simplify web server configuration. + + +Release 2017c - 2017-10-20 14:49:34 -0700 + + Briefly: + Northern Cyprus switches from +03 to +02/+03 on 2017-10-29. + Fiji ends DST 2018-01-14, not 2018-01-21. + Namibia switches from +01/+02 to +02 on 2018-04-01. + Sudan switches from +03 to +02 on 2017-11-01. + Tonga likely switches from +13/+14 to +13 on 2017-11-05. + Turks & Caicos switches from -04 to -05/-04 on 2018-11-04. + A new file tzdata.zi now holds a small text copy of all data. + The zic input format has been regularized slightly. + + Changes to future timestamps + + Northern Cyprus has decided to resume EU rules starting + 2017-10-29, thus reinstituting winter time. + + Fiji ends DST 2018-01-14 instead of the 2018-01-21 previously + predicted. (Thanks to Dominic Fok.) Adjust future predictions + accordingly. + + Namibia will switch from +01 with DST to +02 all year on + 2017-09-03 at 02:00. This affects UT offsets starting 2018-04-01 + at 02:00. (Thanks to Steffen Thorsen.) + + Sudan will switch from +03 to +02 on 2017-11-01. (Thanks to Ahmed + Atyya and Yahia Abdalla.) South Sudan is not switching, so + Africa/Juba is no longer a link to Africa/Khartoum. + + Tonga has likely ended its experiment with DST, and will not + adjust its clocks on 2017-11-05. Although Tonga has not announced + whether it will continue to observe DST, the IATA is assuming that + it will not. (Thanks to David Wade.) + + Turks & Caicos will switch from -04 all year to -05 with US DST on + 2018-03-11 at 03:00. This affects UT offsets starting 2018-11-04 + at 02:00. (Thanks to Steffen Thorsen.) + + Changes to past timestamps + + Namibia switched from +02 to +01 on 1994-03-21, not 1994-04-03. + (Thanks to Arthur David Olson.) + + Detroit did not observe DST in 1967. + + Use railway time for Asia/Kolkata before 1941, by switching to + Madras local time (UT +052110) in 1870, then to IST (UT +0530) in + 1906. Also, treat 1941-2's +0630 as DST, like 1942-5. + + Europe/Dublin's 1946 and 1947 fallback transitions occurred at + 02:00 standard time, not 02:00 DST. (Thanks to Michael Deckers.) + + Pacific/Apia and Pacific/Pago_Pago switched from Antipodean to + American time in 1892, not 1879. (Thanks to Michael Deckers.) + + Adjust the 1867 transition in Alaska to better reflect the + historical record, by changing it to occur on 1867-10-18 at 15:30 + Sitka time rather than at the start of 1867-10-17 local time. + Although strictly speaking this is accurate only for Sitka, + the rest of Alaska's blanks need to be filled in somehow. + + Fix off-by-one errors in UT offsets for Adak and Nome before 1867. + (Thanks to Michael Deckers.) + + Add 7 s to the UT offset in Asia/Yangon before 1920. + + Changes to zone names + + Remove Canada/East-Saskatchewan from the 'backward' file, as it + exceeded the 14-character limit and was an unused misnomer anyway. + + Changes to build procedure + + To support applications that prefer to read time zone data in text + form, two zic input files tzdata.zi and leapseconds are now + installed by default. The commands 'zic tzdata.zi' and 'zic -L + leapseconds tzdata.zi' can reproduce the tzdata binary files + without and with leap seconds, respectively. To prevent these two + new files from being installed, use 'make TZDATA_TEXT=', and to + suppress leap seconds from the tzdata text installation, use 'make + TZDATA_TEXT=tzdata.zi'. + + 'make BACKWARD=' now suppresses backward-compatibility names + like 'US/Pacific' that are defined in the 'backward' and + 'pacificnew' files. + + 'make check' now works on systems that lack a UTF-8 locale, + or that lack the nsgmls program. Set UTF8_LOCALE to configure + the name of a UTF-8 locale, if you have one. + + Y2K runtime checks are no longer enabled by default. Add + -DDEPRECATE_TWO_DIGIT_YEARS to CFLAGS to enable them, instead of + adding -DNO_RUN_TIME_WARNINGS_ABOUT_YEAR_2000_PROBLEMS_THANK_YOU + to disable them. (New name suggested by Brian Inglis.) + + The build procedure for zdump now works on AIX 7.1. + (Problem reported by Kees Dekker.) + + Changes to code + + zic and the reference runtime now reject multiple leap seconds + within 28 days of each other, or leap seconds before the Epoch. + As a result, support for double leap seconds, which was + obsolescent and undocumented, has been removed. Double leap + seconds were an error in the C89 standard; they have never existed + in civil timekeeping. (Thanks to Robert Elz and Bradley White for + noticing glitches in the code that uncovered this problem.) + + zic now warns about use of the obsolescent and undocumented -y + option, and about use of the obsolescent TYPE field of Rule lines. + + zic now allows unambiguous abbreviations like "Sa" and "Su" for + weekdays; formerly it rejected them due to a bug. Conversely, zic + no longer considers non-prefixes to be abbreviations; for example, + it no longer accepts "lF" as an abbreviation for "lastFriday". + Also, zic warns about the undocumented usage with a "last-" + prefix, e.g., "last-Fri". + + Similarly, zic now accepts the unambiguous abbreviation "L" for + "Link" in ordinary context and for "Leap" in leap-second context. + Conversely, zic no longer accepts non-prefixes such as "La" as + abbreviations for words like "Leap". + + zic no longer accepts leap second lines in ordinary input, or + ordinary lines in leap second input. Formerly, zic sometimes + warned about this undocumented usage and handled it incorrectly. + + The new macro HAVE_TZNAME governs whether the tzname external + variable is exported, instead of USG_COMPAT. USG_COMPAT now + governs only the external variables "timezone" and "daylight". + This change is needed because the three variables are not in the + same category: although POSIX requires tzname, it specifies the + other two variables as optional. Also, USG_COMPAT is now 1 or 0: + if not defined, the code attempts to guess it from other macros. + + localtime.c and difftime.c no longer require stdio.h, and .c files + other than zic.c no longer require sys/wait.h. + + zdump.c no longer assumes snprintf. (Reported by Jonathan Leffler.) + + Calculation of time_t extrema works around a bug in GCC 4.8.4 + (Reported by Stan Shebs and Joseph Myers.) + + zic.c no longer mistranslates formats of line numbers in non-English + locales. (Problem reported by Benno Schulenberg.) + + Several minor changes have been made to the code to make it a + bit easier to port to MS-Windows and Solaris. (Thanks to Kees + Dekker for reporting the problems.) + + Changes to documentation and commentary + + The two new files 'theory.html' and 'calendars' contain the + contents of the removed file 'Theory'. The goal is to document + tzdb theory more accessibly. + + The zic man page now documents abbreviation rules. + + tz-link.htm now covers how to apply tzdata changes to clients. + (Thanks to Jorge Fábregas for the AIX link.) It also mentions MySQL. + + The leap-seconds.list URL has been updated to something that is + more reliable for tzdb. (Thanks to Tim Parenti and Brian Inglis.) + +Release 2017b - 2017-03-17 07:30:38 -0700 + + Briefly: Haiti has resumed DST. + + Changes to past and future timestamps + + Haiti resumed observance of DST in 2017. (Thanks to Steffen Thorsen.) + + Changes to past timestamps + + Liberia changed from -004430 to +00 on 1972-01-07, not 1972-05-01. + + Use "MMT" to abbreviate Liberia's time zone before 1972, as "-004430" + is one byte over the POSIX limit. (Problem reported by Derick Rethans.) + + Changes to code + + The reference localtime implementation now falls back on the + current US daylight-saving transition rules rather than the + 1987-2006 rules. This fallback occurs only when (1) the TZ + environment variable has a value like "AST4ADT" that asks + for daylight saving time but does not specify the rules, (2) there + is no file by that name, and (3) the TZDEFRULES file cannot be + loaded. (Thanks to Tom Lane.) + + +Release 2017a - 2017-02-28 00:05:36 -0800 + + Briefly: Southern Chile moves from -04/-03 to -03, and Mongolia + discontinues DST. + + Changes to future timestamps + + Mongolia no longer observes DST. (Thanks to Ganbold Tsagaankhuu.) + + Chile's Region of Magallanes moves from -04/-03 to -03 year-round. + Its clocks diverge from America/Santiago starting 2017-05-13 at + 23:00, hiving off a new zone America/Punta_Arenas. Although the + Chilean government says this change expires in May 2019, for now + assume it's permanent. (Thanks to Juan Correa and Deborah + Goldsmith.) This also affects Antarctica/Palmer. + + Changes to past timestamps + + Fix many entries for historical timestamps for Europe/Madrid + before 1979, to agree with tables compiled by Pere Planesas of the + National Astronomical Observatory of Spain. As a side effect, + this changes some timestamps for Africa/Ceuta before 1929, which + are probably guesswork anyway. (Thanks to Steve Allen and + Pierpaolo Bernardi for the heads-ups, and to Michael Deckers for + correcting the 1901 transition.) + + Ecuador observed DST from 1992-11-28 to 1993-02-05. + (Thanks to Alois Treindl.) + + Asia/Atyrau and Asia/Oral were at +03 (not +04) before 1930-06-21. + (Thanks to Stepan Golosunov.) + + Changes to past and future time zone abbreviations + + Switch to numeric time zone abbreviations for South America, as + part of the ongoing project of removing invented abbreviations. + This avoids the need to invent an abbreviation for the new Chilean + new zone. Similarly, switch from invented to numeric time zone + abbreviations for Afghanistan, American Samoa, the Azores, + Bangladesh, Bhutan, the British Indian Ocean Territory, Brunei, + Cape Verde, Chatham Is, Christmas I, Cocos (Keeling) Is, Cook Is, + Dubai, East Timor, Eucla, Fiji, French Polynesia, Greenland, + Indochina, Iran, Iraq, Kiribati, Lord Howe, Macquarie, Malaysia, + the Maldives, Marshall Is, Mauritius, Micronesia, Mongolia, + Myanmar, Nauru, Nepal, New Caledonia, Niue, Norfolk I, Palau, + Papua New Guinea, the Philippines, Pitcairn, Qatar, Réunion, St + Pierre & Miquelon, Samoa, Saudi Arabia, Seychelles, Singapore, + Solomon Is, Tokelau, Tuvalu, Wake, Vanuatu, Wallis & Futuna, and + Xinjiang; for 20-minute daylight saving time in Ghana before 1943; + for half-hour daylight saving time in Belize before 1944 and in + the Dominican Republic before 1975; and for Canary Islands before + 1946, for Guinea-Bissau before 1975, for Iceland before 1969, for + Indian Summer Time before 1942, for Indonesia before around 1964, + for Kenya before 1960, for Liberia before 1973, for Madeira before + 1967, for Namibia before 1943, for the Netherlands in 1937-9, for + Pakistan before 1971, for Western Sahara before 1977, and for + Zaporozhye in 1880-1924. + + For Alaska time from 1900 through 1967, instead of "CAT" use the + abbreviation "AST", the abbreviation commonly used at the time + (Atlantic Standard Time had not been standardized yet). Use "AWT" + and "APT" instead of the invented abbreviations "CAWT" and "CAPT". + + Use "CST" and "CDT" instead of invented abbreviations for Macau + before 1999 and Taiwan before 1938, and use "JST" instead of the + invented abbreviation "JCST" for Japan and Korea before 1938. + + Change to database entry category + + Move the Pacific/Johnston link from 'australasia' to 'backward', + since Johnston is now uninhabited. + + Changes to code + + zic no longer mishandles some transitions in January 2038 when it + attempts to work around Qt bug 53071. This fixes a bug affecting + Pacific/Tongatapu that was introduced in zic 2016e. localtime.c + now contains a workaround, useful when loading a file generated by + a buggy zic. (Problem and localtime.c fix reported by Bradley + White.) + + zdump -i now outputs non-hour numeric time zone abbreviations + without a colon, e.g., "+0530" rather than "+05:30". This agrees + with zic %z and with common practice, and simplifies auditing of + zdump output. + + zdump is now buildable again with -DUSE_LTZ=0. + (Problem reported by Joseph Myers.) + + zdump.c now always includes private.h, to avoid code duplication + with private.h. (Problem reported by Kees Dekker.) + + localtime.c no longer mishandles early or late timestamps + when TZ is set to a POSIX-style string that specifies DST. + (Problem reported by Kees Dekker.) + + date and strftime now cause %z to generate "-0000" instead of + "+0000" when the UT offset is zero and the time zone abbreviation + begins with "-". + + Changes to documentation and commentary + + The 'Theory' file now better documents choice of historical time + zone abbreviations. (Problems reported by Michael Deckers.) + + tz-link.htm now covers leap smearing, which is popular in clouds. + + +Release 2016j - 2016-11-22 23:17:13 -0800 + + Briefly: Saratov, Russia moves from +03 to +04 on 2016-12-04. + + Changes to future timestamps + + Saratov, Russia switches from +03 to +04 on 2016-12-04 at 02:00. + This hives off a new zone Europe/Saratov from Europe/Volgograd. + (Thanks to Yuri Konotopov and Stepan Golosunov.) + + Changes to past timestamps + + The new zone Asia/Atyrau for Atyraū Region, Kazakhstan, is like + Asia/Aqtau except it switched from +05/+06 to +04/+05 in spring + 1999, not fall 1994. (Thanks to Stepan Golosunov.) + + Changes to past time zone abbreviations + + Asia/Gaza and Asia/Hebron now use "EEST", not "EET", to denote + summer time before 1948. The old use of "EET" was a typo. + + Changes to code + + zic no longer mishandles file systems that lack hard links, fixing + bugs introduced in 2016g. (Problems reported by Tom Lane.) + Also, when the destination already contains symbolic links, zic + should now work better on systems where the 'link' system call + does not follow symbolic links. + + Changes to documentation and commentary + + tz-link.htm now documents the relationship between release version + numbers and development-repository commit tags. (Suggested by + Paul Koning.) + + The 'Theory' file now documents UT. + + iso3166.tab now accents "Curaçao", and commentary now mentions + the names "Cabo Verde" and "Czechia". (Thanks to Jiří Boháč.) + + +Release 2016i - 2016-11-01 23:19:52 -0700 + + Briefly: Cyprus split into two time zones on 2016-10-30, and Tonga + reintroduces DST on 2016-11-06. + + Changes to future timestamps + + Pacific/Tongatapu begins DST on 2016-11-06 at 02:00, ending on + 2017-01-15 at 03:00. Assume future observances in Tonga will be + from the first Sunday in November through the third Sunday in + January, like Fiji. (Thanks to Pulu ʻAnau.) Switch to numeric + time zone abbreviations for this zone. + + Changes to past and future timestamps + + Northern Cyprus is now +03 year round, causing a split in Cyprus + time zones starting 2016-10-30 at 04:00. This creates a zone + Asia/Famagusta. (Thanks to Even Scharning and Matt Johnson.) + + Antarctica/Casey switched from +08 to +11 on 2016-10-22. + (Thanks to Steffen Thorsen.) + + Changes to past timestamps + + Several corrections were made for pre-1975 timestamps in Italy. + These affect Europe/Malta, Europe/Rome, Europe/San_Marino, and + Europe/Vatican. + + First, the 1893-11-01 00:00 transition in Italy used the new UT + offset (+01), not the old (+00:49:56). (Thanks to Michael + Deckers.) + + Second, rules for daylight saving in Italy were changed to agree + with Italy's National Institute of Metrological Research (INRiM) + except for 1944, as follows (thanks to Pierpaolo Bernardi, Brian + Inglis, and Michael Deckers): + + The 1916-06-03 transition was at 24:00, not 00:00. + + The 1916-10-01, 1919-10-05, and 1920-09-19 transitions were at + 00:00, not 01:00. + + The 1917-09-30 and 1918-10-06 transitions were at 24:00, not + 01:00. + + The 1944-09-17 transition was at 03:00, not 01:00. This + particular change is taken from Italian law as INRiM's table, + (which says 02:00) appears to have a typo here. Also, keep the + 1944-04-03 transition for Europe/Rome, as Rome was controlled by + Germany then. + + The 1967-1970 and 1972-1974 fallback transitions were at 01:00, + not 00:00. + + Changes to code + + The code should now be buildable on AmigaOS merely by setting the + appropriate Makefile variables. (From a patch by Carsten Larsen.) + + +Release 2016h - 2016-10-19 23:17:57 -0700 + + Changes to future timestamps + + Asia/Gaza and Asia/Hebron end DST on 2016-10-29 at 01:00, not + 2016-10-21 at 00:00. (Thanks to Sharef Mustafa.) Predict that + future fall transitions will be on the last Saturday of October + at 01:00, which is consistent with predicted spring transitions + on the last Saturday of March. (Thanks to Tim Parenti.) + + Changes to past timestamps + + In Turkey, transitions in 1986-1990 were at 01:00 standard time + not at 02:00, and the spring 1994 transition was on March 20, not + March 27. (Thanks to Kıvanç Yazan.) + + Changes to past and future time zone abbreviations + + Asia/Colombo now uses numeric time zone abbreviations like "+0530" + instead of alphabetic ones like "IST" and "LKT". Various + English-language sources use "IST", "LKT" and "SLST", with no + working consensus. (Usage of "SLST" mentioned by Sadika + Sumanapala.) + + Changes to code + + zic no longer mishandles relativizing file names when creating + symbolic links like /etc/localtime, when these symbolic links + are outside the usual directory hierarchy. This fixes a bug + introduced in 2016g. (Problem reported by Andreas Stieger.) + + Changes to build procedure + + New rules 'traditional_tarballs' and 'traditional_signatures' for + building just the traditional-format distribution. (Requested by + Deborah Goldsmith.) + + The file 'version' is now put into the tzdata tarball too. + (Requested by Howard Hinnant.) + + Changes to documentation and commentary + + The 'Theory' file now has a section on interface stability. + (Requested by Paul Koning.) It also mentions features like + tm_zone and localtime_rz that have long been supported by the + reference code. + + tz-link.htm has improved coverage of time zone boundaries suitable + for geolocation. (Thanks to heads-ups from Evan Siroky and Matt + Johnson.) + + The US commentary now mentions Allen and the "day of two noons". + + The Fiji commentary mentions the government's 2016-10-03 press + release. (Thanks to Raymond Kumar.) + + +Release 2016g - 2016-09-13 08:56:38 -0700 + + Changes to future timestamps + + Turkey switched from EET/EEST (+02/+03) to permanent +03, + effective 2016-09-07. (Thanks to Burak AYDIN.) Use "+03" rather + than an invented abbreviation for the new time. + + New leap second 2016-12-31 23:59:60 UTC as per IERS Bulletin C 52. + (Thanks to Tim Parenti.) + + Changes to past timestamps + + For America/Los_Angeles, spring-forward transition times have been + corrected from 02:00 to 02:01 in 1948, and from 02:00 to 01:00 in + 1950-1966. + + For zones using Soviet time on 1919-07-01, transitions to UT-based + time were at 00:00 UT, not at 02:00 local time. The affected + zones are Europe/Kirov, Europe/Moscow, Europe/Samara, and + Europe/Ulyanovsk. (Thanks to Alexander Belopolsky.) + + Changes to past and future time zone abbreviations + + The Factory zone now uses the time zone abbreviation -00 instead + of a long English-language string, as -00 is now the normal way to + represent an undefined time zone. + + Several zones in Antarctica and the former Soviet Union, along + with zones intended for ships at sea that cannot use POSIX TZ + strings, now use numeric time zone abbreviations instead of + invented or obsolete alphanumeric abbreviations. The affected + zones are Antarctica/Casey, Antarctica/Davis, + Antarctica/DumontDUrville, Antarctica/Mawson, Antarctica/Rothera, + Antarctica/Syowa, Antarctica/Troll, Antarctica/Vostok, + Asia/Anadyr, Asia/Ashgabat, Asia/Baku, Asia/Bishkek, Asia/Chita, + Asia/Dushanbe, Asia/Irkutsk, Asia/Kamchatka, Asia/Khandyga, + Asia/Krasnoyarsk, Asia/Magadan, Asia/Omsk, Asia/Sakhalin, + Asia/Samarkand, Asia/Srednekolymsk, Asia/Tashkent, Asia/Tbilisi, + Asia/Ust-Nera, Asia/Vladivostok, Asia/Yakutsk, Asia/Yekaterinburg, + Asia/Yerevan, Etc/GMT-14, Etc/GMT-13, Etc/GMT-12, Etc/GMT-11, + Etc/GMT-10, Etc/GMT-9, Etc/GMT-8, Etc/GMT-7, Etc/GMT-6, Etc/GMT-5, + Etc/GMT-4, Etc/GMT-3, Etc/GMT-2, Etc/GMT-1, Etc/GMT+1, Etc/GMT+2, + Etc/GMT+3, Etc/GMT+4, Etc/GMT+5, Etc/GMT+6, Etc/GMT+7, Etc/GMT+8, + Etc/GMT+9, Etc/GMT+10, Etc/GMT+11, Etc/GMT+12, Europe/Kaliningrad, + Europe/Minsk, Europe/Samara, Europe/Volgograd, and + Indian/Kerguelen. For Europe/Moscow the invented abbreviation MSM + was replaced by +05, whereas MSK and MSD were kept as they are not + our invention and are widely used. + + Changes to zone names + + Rename Asia/Rangoon to Asia/Yangon, with a backward compatibility link. + (Thanks to David Massoud.) + + Changes to code + + zic no longer generates binary files containing POSIX TZ-like + strings that disagree with the local time type after the last + explicit transition in the data. This fixes a bug with + Africa/Casablanca and Africa/El_Aaiun in some year-2037 timestamps + on the reference platform. (Thanks to Alexander Belopolsky for + reporting the bug and suggesting a way forward.) + + If the installed localtime and/or posixrules files are symbolic + links, zic now keeps them symbolic links when updating them, for + compatibility with platforms like OpenSUSE where other programs + configure these files as symlinks. + + zic now avoids hard linking to symbolic links, avoids some + unnecessary mkdir and stat system calls, and uses shorter file + names internally. + + zdump has a new -i option to generate transitions in a + smaller but still human-readable format. This option is + experimental, and the output format may change in future versions. + (Thanks to Jon Skeet for suggesting that an option was needed, + and thanks to Tim Parenti and Chris Rovick for further comments.) + + Changes to build procedure + + An experimental distribution format is available, in addition + to the traditional format which will continue to be distributed. + The new format is a tarball tzdb-VERSION.tar.lz with signature + file tzdb-VERSION.tar.lz.asc. It unpacks to a top-level directory + tzdb-VERSION containing the code and data of the traditional + two-tarball format, along with extra data that may be useful. + (Thanks to Antonio Diaz Diaz, Oscar van Vlijmen, and many others + for comments about the experimental format.) + + The release version number is now more accurate in the usual case + where releases are built from a Git repository. For example, if + 23 commits and some working-file changes have been made since + release 2016g, the version number is now something like + '2016g-23-g50556e3-dirty' instead of the misleading '2016g'. + Tagged releases use the same version number format as before, + e.g., '2016g'. To support the more accurate version number, its + specification has moved from a line in the Makefile to a new + source file 'version'. + + The experimental distribution contains a file to2050.tzs that + contains what should be the output of 'zdump -i -c 2050' on + primary zones. If this file is available, 'make check' now checks + that zdump generates this output. + + 'make check_web' now works on Fedora-like distributions. + + Changes to documentation and commentary + + tzfile.5 now documents the new restriction on POSIX TZ-like + strings that is now implemented by zic. + + Comments now cite URLs for some 1917-1921 Russian DST decrees. + (Thanks to Alexander Belopolsky.) + + tz-link.htm mentions JuliaTime (thanks to Curtis Vogt) and Time4J + (thanks to Meno Hochschild) and ThreeTen-Extra, and its + description of Java 8 has been brought up to date (thanks to + Stephen Colebourne). Its description of local time on Mars has + been updated to match current practice, and URLs have been updated + and some obsolete ones removed. + + +Release 2016f - 2016-07-05 16:26:51 +0200 + + Changes affecting future timestamps + + The Egyptian government changed its mind on short notice, and + Africa/Cairo will not introduce DST starting 2016-07-07 after all. + (Thanks to Mina Samuel.) + + Asia/Novosibirsk switches from +06 to +07 on 2016-07-24 at 02:00. + (Thanks to Stepan Golosunov.) + + Changes to past and future timestamps + + Asia/Novokuznetsk and Asia/Novosibirsk now use numeric time zone + abbreviations instead of invented ones. + + Changes affecting past timestamps + + Europe/Minsk's 1992-03-29 spring-forward transition was at 02:00 not 00:00. + (Thanks to Stepan Golosunov.) + + +Release 2016e - 2016-06-14 08:46:16 -0700 + + Changes affecting future timestamps + + Africa/Cairo observes DST in 2016 from July 7 to the end of October. + Guess October 27 and 24:00 transitions. (Thanks to Steffen Thorsen.) + For future years, guess April's last Thursday to October's last + Thursday except for Ramadan. + + Changes affecting past timestamps + + Locations while uninhabited now use '-00', not 'zzz', as a + placeholder time zone abbreviation. This is inspired by Internet + RFC 3339 and is more consistent with numeric time zone + abbreviations already used elsewhere. The change affects several + arctic and antarctic locations, e.g., America/Cambridge_Bay before + 1920 and Antarctica/Troll before 2005. + + Asia/Baku's 1992-09-27 transition from +04 (DST) to +04 (non-DST) was + at 03:00, not 23:00 the previous day. (Thanks to Michael Deckers.) + + Changes to code + + zic now outputs a dummy transition at time 2**31 - 1 in zones + whose POSIX-style TZ strings contain a '<'. This mostly works + around Qt bug 53071 . + (Thanks to Zhanibek Adilbekov for reporting the Qt bug.) + + Changes affecting documentation and commentary + + tz-link.htm says why governments should give plenty of notice for + time zone or DST changes, and refers to Matt Johnson's blog post. + + tz-link.htm mentions Tzdata for Elixir. (Thanks to Matt Johnson.) + + +Release 2016d - 2016-04-17 22:50:29 -0700 + + Changes affecting future timestamps + + America/Caracas switches from -0430 to -04 on 2016-05-01 at 02:30. + (Thanks to Alexander Krivenyshev for the heads-up.) + + Asia/Magadan switches from +10 to +11 on 2016-04-24 at 02:00. + (Thanks to Alexander Krivenyshev and Matt Johnson.) + + New zone Asia/Tomsk, split off from Asia/Novosibirsk. It covers + Tomsk Oblast, Russia, which switches from +06 to +07 on 2016-05-29 + at 02:00. (Thanks to Stepan Golosunov.) + + Changes affecting past timestamps + + New zone Europe/Kirov, split off from Europe/Volgograd. It covers + Kirov Oblast, Russia, which switched from +04/+05 to +03/+04 on + 1989-03-26 at 02:00, roughly a year after Europe/Volgograd made + the same change. (Thanks to Stepan Golosunov.) + + Russia and nearby locations had daylight-saving transitions on + 1992-03-29 at 02:00 and 1992-09-27 at 03:00, instead of on + 1992-03-28 at 23:00 and 1992-09-26 at 23:00. (Thanks to Stepan + Golosunov.) + + Many corrections to historical time in Kazakhstan from 1991 + through 2005. (Thanks to Stepan Golosunov.) Replace Kazakhstan's + invented time zone abbreviations with numeric abbreviations. + + Changes to commentary + + Mention Internet RFCs 7808 (TZDIST) and 7809 (CalDAV time zone references). + + +Release 2016c - 2016-03-23 00:51:27 -0700 + + Changes affecting future timestamps + + Azerbaijan no longer observes DST. (Thanks to Steffen Thorsen.) + + Chile reverts from permanent to seasonal DST. (Thanks to Juan + Correa for the heads-up, and to Tim Parenti for corrections.) + Guess that future transitions are August's and May's second + Saturdays at 24:00 mainland time. Also, call the period from + 2014-09-07 through 2016-05-14 daylight saving time instead of + standard time, as that seems more appropriate now. + + Changes affecting past timestamps + + Europe/Kaliningrad and Europe/Vilnius changed from +03/+04 to + +02/+03 on 1989-03-26, not 1991-03-31. Europe/Volgograd changed + from +04/+05 to +03/+04 on 1988-03-27, not 1989-03-26. + (Thanks to Stepan Golosunov.) + + Changes to commentary + + Several updates and URLs for historical and proposed Russian changes. + (Thanks to Stepan Golosunov, Matt Johnson, and Alexander Krivenyshev.) + + +Release 2016b - 2016-03-12 17:30:14 -0800 + + Compatibility note + + Starting with release 2016b, some data entries cause zic implementations + derived from tz releases 2005j through 2015e to issue warnings like + "time zone abbreviation differs from POSIX standard (+03)". + These warnings should not otherwise affect zic's output and can safely be + ignored on today's platforms, as the warnings refer to a restriction in + POSIX.1-1988 that was removed in POSIX.1-2001. One way to suppress the + warnings is to upgrade to zic derived from tz releases 2015f and later. + + Changes affecting future timestamps + + New zones Europe/Astrakhan and Europe/Ulyanovsk for Astrakhan and + Ulyanovsk Oblasts, Russia, both of which will switch from +03 to +04 on + 2016-03-27 at 02:00 local time. They need distinct zones since their + post-1970 histories disagree. New zone Asia/Barnaul for Altai Krai and + Altai Republic, Russia, which will switch from +06 to +07 on the same date + and local time. The Astrakhan change is already official; the others have + passed the first reading in the State Duma and are extremely likely. + Also, Asia/Sakhalin moves from +10 to +11 on 2016-03-27 at 02:00. + (Thanks to Alexander Krivenyshev for the heads-up, and to Matt Johnson + and Stepan Golosunov for followup.) + + As a trial of a new system that needs less information to be made up, + the new zones use numeric time zone abbreviations like "+04" + instead of invented abbreviations like "ASTT". + + Haiti will not observe DST in 2016. (Thanks to Jean Antoine via + Steffen Thorsen.) + + Palestine's spring-forward transition on 2016-03-26 is at 01:00, not 00:00. + (Thanks to Hannah Kreitem.) Guess future transitions will be March's last + Saturday at 01:00, not March's last Friday at 24:00. + + Changes affecting past timestamps + + Europe/Chisinau observed DST during 1990, and switched from +04 to + +03 at 1990-05-06 02:00, instead of switching from +03 to +02. + (Thanks to Stepan Golosunov.) + + 1991 abbreviations in Europe/Samara should be SAMT/SAMST, not + KUYT/KUYST. (Thanks to Stepan Golosunov.) + + Changes to code + + tzselect's diagnostics and checking, and checktab.awk's checking, + have been improved. (Thanks to J William Piggott.) + + tzcode now builds under MinGW. (Thanks to Ian Abbott and Esben Haabendal.) + + tzselect now tests Julian-date TZ settings more accurately. + (Thanks to J William Piggott.) + + Changes to commentary + + Comments in zone tables have been improved. (Thanks to J William Piggott.) + + tzselect again limits its menu comments so that menus fit on a + 24×80 alphanumeric display. + + A new web page tz-how-to.html. (Thanks to Bill Seymour.) + + In the Theory file, the description of possible time zone abbreviations in + tzdata has been cleaned up, as the old description was unclear and + inconsistent. (Thanks to Alain Mouette for reporting the problem.) + + +Release 2016a - 2016-01-26 23:28:02 -0800 + + Changes affecting future timestamps + + America/Cayman will not observe daylight saving this year after all. + Revert our guess that it would. (Thanks to Matt Johnson.) + + Asia/Chita switches from +0800 to +0900 on 2016-03-27 at 02:00. + (Thanks to Alexander Krivenyshev.) + + Asia/Tehran now has DST predictions for the year 2038 and later, + to be March 21 00:00 to September 21 00:00. This is likely better + than predicting no DST, albeit off by a day every now and then. + + Changes affecting past and future timestamps + + America/Metlakatla switched from PST all year to AKST/AKDT on + 2015-11-01 at 02:00. (Thanks to Steffen Thorsen.) + + America/Santa_Isabel has been removed, and replaced with a + backward compatibility link to America/Tijuana. Its contents were + apparently based on a misreading of Mexican legislation. + + Changes affecting past timestamps + + Asia/Karachi's two transition times in 2002 were off by a minute. + (Thanks to Matt Johnson.) + + Changes affecting build procedure + + An installer can now combine leap seconds with use of the backzone file, + e.g., with 'make PACKRATDATA=backzone REDO=posix_right zones'. + The old 'make posix_packrat' rule is now marked as obsolescent. + (Thanks to Ian Abbott for an initial implementation.) + + Changes affecting documentation and commentary + + A new file LICENSE makes it easier to see that the code and data + are mostly public-domain. (Thanks to James Knight.) The three + non-public-domain files now use the current (3-clause) BSD license + instead of older versions of that license. + + tz-link.htm mentions the BDE library (thanks to Andrew Paprocki), + CCTZ (thanks to Tim Parenti), TimeJones.com, and has a new section + on editing tz source files (with a mention of Sublime zoneinfo, + thanks to Gilmore Davidson). + + The Theory and asia files now mention the 2015 book "The Global + Transformation of Time, 1870-1950", and cite a couple of reviews. + + The America/Chicago entry now documents the informal use of US + central time in Fort Pierre, South Dakota. (Thanks to Rick + McDermid, Matt Johnson, and Steve Jones.) + + +Release 2015g - 2015-10-01 00:39:51 -0700 + + Changes affecting future timestamps + + Turkey's 2015 fall-back transition is scheduled for Nov. 8, not Oct. 25. + (Thanks to Fatih.) + + Norfolk moves from +1130 to +1100 on 2015-10-04 at 02:00 local time. + (Thanks to Alexander Krivenyshev.) + + Fiji's 2016 fall-back transition is scheduled for January 17, not 24. + (Thanks to Ken Rylander.) + + Fort Nelson, British Columbia will not fall back on 2015-11-01. It has + effectively been on MST (-0700) since it advanced its clocks on 2015-03-08. + New zone America/Fort_Nelson. (Thanks to Matt Johnson.) + + Changes affecting past timestamps + + Norfolk observed DST from 1974-10-27 02:00 to 1975-03-02 02:00. + + Changes affecting code + + localtime no longer mishandles America/Anchorage after 2037. + (Thanks to Bradley White for reporting the bug.) + + On hosts with signed 32-bit time_t, localtime no longer mishandles + Pacific/Fiji after 2038-01-16 14:00 UTC. + + The localtime module allows the variables 'timezone', 'daylight', + and 'altzone' to be in common storage shared with other modules, + and declares them in case the system does not. + (Problems reported by Kees Dekker.) + + On platforms with tm_zone, strftime.c now assumes it is not NULL. + This simplifies the code and is consistent with zdump.c. + (Problem reported by Christos Zoulas.) + + Changes affecting documentation + + The tzfile man page now documents that transition times denote the + starts (not the ends) of the corresponding time periods. + (Ambiguity reported by Bill Seymour.) + + +Release 2015f - 2015-08-10 18:06:56 -0700 + + Changes affecting future timestamps + + North Korea switches to +0830 on 2015-08-15. (Thanks to Steffen Thorsen.) + The abbreviation remains "KST". (Thanks to Robert Elz.) + + Uruguay no longer observes DST. (Thanks to Steffen Thorsen + and Pablo Camargo.) + + Changes affecting past and future timestamps + + Moldova starts and ends DST at 00:00 UTC, not at 01:00 UTC. + (Thanks to Roman Tudos.) + + Changes affecting data format and code + + zic's '-y YEARISTYPE' option is no longer documented. The TYPE + field of a Rule line should now be '-'; the old values 'even', + 'odd', 'uspres', 'nonpres', 'nonuspres' were already undocumented. + Although the implementation has not changed, these features do not + work in the default installation, they are not used in the data, + and they are now considered obsolescent. + + zic now checks that two rules don't take effect at the same time. + (Thanks to Jon Skeet and Arthur David Olson.) Constraints on + simultaneity are now documented. + + The two characters '%z' in a zone format now stand for the UT + offset, e.g., '-07' for seven hours behind UT and '+0530' for + five hours and thirty minutes ahead. This better supports time + zone abbreviations conforming to POSIX.1-2001 and later. + + Changes affecting installed data files + + Comments for America/Halifax and America/Glace_Bay have been improved. + (Thanks to Brian Inglis.) + + Data entries have been simplified for Atlantic/Canary, Europe/Simferopol, + Europe/Sofia, and Europe/Tallinn. This yields slightly smaller + installed data files for Europe/Simferopol and Europe/Tallinn. + It does not affect timestamps. (Thanks to Howard Hinnant.) + + Changes affecting code + + zdump and zic no longer warn about valid time zone abbreviations + like '-05'. + + Some Visual Studio 2013 warnings have been suppressed. + (Thanks to Kees Dekker.) + + 'date' no longer sets the time of day and its -a, -d, -n and -t + options have been removed. Long obsolescent, the implementation + of these features had porting problems. Builders no longer need + to configure HAVE_ADJTIME, HAVE_SETTIMEOFDAY, or HAVE_UTMPX_H. + (Thanks to Kees Dekker for pointing out the problem.) + + Changes affecting documentation + + The Theory file mentions naming issues earlier, as these seem to be + poorly publicized (thanks to Gilmore Davidson for reporting the problem). + + tz-link.htm mentions Time Zone Database Parser (thanks to Howard Hinnant). + + Mention that Herbert Samuel introduced the term "Summer Time". + + +Release 2015e - 2015-06-13 10:56:02 -0700 + + Changes affecting future timestamps + + Morocco will suspend DST from 2015-06-14 03:00 through 2015-07-19 02:00, + not 06-13 and 07-18 as we had guessed. (Thanks to Milamber.) + + Assume Cayman Islands will observe DST starting next year, using US rules. + Although it isn't guaranteed, it is the most likely. + + Changes affecting data format + + The file 'iso3166.tab' now uses UTF-8, so that its entries can better + spell the names of Åland Islands, Côte d'Ivoire, and Réunion. + + Changes affecting code + + When displaying data, tzselect converts it to the current locale's + encoding if the iconv command works. (Problem reported by random832.) + + tzselect no longer mishandles Dominica, fixing a bug introduced + in Release 2014f. (Problem reported by Owen Leibman.) + + zic -l no longer fails when compiled with -DTZDEFAULT=\"/etc/localtime\". + This fixes a bug introduced in Release 2014f. + (Problem reported by Leonardo Chiquitto.) + + +Release 2015d - 2015-04-24 08:09:46 -0700 + + Changes affecting future timestamps + + Egypt will not observe DST in 2015 and will consider canceling it + permanently. For now, assume no DST indefinitely. + (Thanks to Ahmed Nazmy and Tim Parenti.) + + Changes affecting past timestamps + + America/Whitehorse switched from UT -09 to -08 on 1967-05-28, not + 1966-07-01. Also, Yukon's time zone history is documented better. + (Thanks to Brian Inglis and Dennis Ferguson.) + + Change affecting past and future time zone abbreviations + + The abbreviations for Hawaii-Aleutian standard and daylight times + have been changed from HAST/HADT to HST/HDT, as per US Government + Printing Office style. This affects only America/Adak since 1983, + as America/Honolulu was already using the new style. + + Changes affecting code + + zic has some minor performance improvements. + + +Release 2015c - 2015-04-11 08:55:55 -0700 + + Changes affecting future timestamps + + Egypt's spring-forward transition is at 24:00 on April's last Thursday, + not 00:00 on April's last Friday. 2015's transition will therefore be on + Thursday, April 30 at 24:00, not Friday, April 24 at 00:00. Similar fixes + apply to 2026, 2037, 2043, etc. (Thanks to Steffen Thorsen.) + + Changes affecting past timestamps + + The following changes affect some pre-1991 Chile-related timestamps + in America/Santiago, Antarctica/Palmer, and Pacific/Easter. + + The 1910 transition was January 10, not January 1. + + The 1918 transition was September 10, not September 1. + + The UT -04 time observed from 1932 to 1942 is now considered to + be standard time, not year-round DST. + + Santiago observed DST (UT -03) from 1946-07-15 through + 1946-08-31, then reverted to standard time, then switched to -05 + on 1947-04-01. + + Assume transitions before 1968 were at 00:00, since we have no data + saying otherwise. + + The spring 1988 transition was 1988-10-09, not 1988-10-02. + The fall 1990 transition was 1990-03-11, not 1990-03-18. + + Assume no UT offset change for Pacific/Easter on 1890-01-01, + and omit all transitions on Pacific/Easter from 1942 through 1946 + since we have no data suggesting that they existed. + + One more zone has been turned into a link, as it differed + from an existing zone only for older timestamps. As usual, + this change affects UT offsets in pre-1970 timestamps only. + The zone's old contents have been moved to the 'backzone' file. + The affected zone is America/Montreal. + + Changes affecting commentary + + Mention the TZUpdater tool. + + Mention "The Time Now". (Thanks to Brandon Ramsey.) + + +Release 2015b - 2015-03-19 23:28:11 -0700 + + Changes affecting future timestamps + + Mongolia will start observing DST again this year, from the last + Saturday in March at 02:00 to the last Saturday in September at 00:00. + (Thanks to Ganbold Tsagaankhuu.) + + Palestine will start DST on March 28, not March 27. Also, + correct the fall 2014 transition from September 26 to October 24. + Adjust future predictions accordingly. (Thanks to Steffen Thorsen.) + + Changes affecting past timestamps + + The 1982 zone shift in Pacific/Easter has been corrected, fixing a 2015a + regression. (Thanks to Stuart Bishop for reporting the problem.) + + Some more zones have been turned into links, when they differed + from existing zones only for older timestamps. As usual, + these changes affect UT offsets in pre-1970 timestamps only. + Their old contents have been moved to the 'backzone' file. + The affected zones are: America/Antigua, America/Cayman, + Pacific/Midway, and Pacific/Saipan. + + Changes affecting time zone abbreviations + + Correct the 1992-2010 DST abbreviation in Volgograd from "MSK" to "MSD". + (Thanks to Hank W.) + + Changes affecting code + + Fix integer overflow bug in reference 'mktime' implementation. + (Problem reported by Jörg Richter.) + + Allow -Dtime_tz=time_t compilations, and allow -Dtime_tz=... libraries + to be used in the same executable as standard-library time_t functions. + (Problems reported by Bradley White.) + + Changes affecting commentary + + Cite the recent Mexican decree changing Quintana Roo's time zone. + (Thanks to Carlos Raúl Perasso.) + + Likewise for the recent Chilean decree. (Thanks to Eduardo Romero Urra.) + + Update info about Mars time. + + +Release 2015a - 2015-01-29 22:35:20 -0800 + + Changes affecting future timestamps + + The Mexican state of Quintana Roo, represented by America/Cancun, + will shift from Central Time with DST to Eastern Time without DST + on 2015-02-01 at 02:00. (Thanks to Steffen Thorsen and Gwillim Law.) + + Chile will not change clocks in April or thereafter; its new standard time + will be its old daylight saving time. This affects America/Santiago, + Pacific/Easter, and Antarctica/Palmer. (Thanks to Juan Correa.) + + New leap second 2015-06-30 23:59:60 UTC as per IERS Bulletin C 49. + (Thanks to Tim Parenti.) + + Changes affecting past timestamps + + Iceland observed DST in 1919 and 1921, and its 1939 fallback + transition was Oct. 29, not Nov. 29. Remove incorrect data from + Shanks about time in Iceland between 1837 and 1908. + + Some more zones have been turned into links, when they differed + from existing zones only for older timestamps. As usual, + these changes affect UT offsets in pre-1970 timestamps only. + Their old contents have been moved to the 'backzone' file. + The affected zones are: Asia/Aden, Asia/Bahrain, Asia/Kuwait, + and Asia/Muscat. + + Changes affecting code + + tzalloc now scrubs time zone abbreviations compatibly with the way + that tzset always has, by replacing invalid bytes with '_' and by + shortening too-long abbreviations. + + tzselect ports to POSIX awk implementations, no longer mishandles + POSIX TZ settings when GNU awk is used, and reports POSIX TZ + settings to the user. (Thanks to Stefan Kuhn.) + + Changes affecting build procedure + + 'make check' now checks for links to links in the data. + One such link (for Africa/Asmera) has been fixed. + (Thanks to Stephen Colebourne for pointing out the problem.) + + Changes affecting commentary + + The leapseconds file commentary now mentions the expiration date. + (Problem reported by Martin Burnicki.) + + Update Mexican Library of Congress URL. + + +Release 2014j - 2014-11-10 17:37:11 -0800 + + Changes affecting current and future timestamps + + Turks & Caicos' switch from US eastern time to UT -04 year-round + did not occur on 2014-11-02 at 02:00. It's currently scheduled + for 2015-11-01 at 02:00. (Thanks to Chris Walton.) + + Changes affecting past timestamps + + Many pre-1989 timestamps have been corrected for Asia/Seoul and + Asia/Pyongyang, based on sources for the Korean-language Wikipedia + entry for time in Korea. (Thanks to Sanghyuk Jung.) Also, no + longer guess that Pyongyang mimicked Seoul time after World War II, + as this is politically implausible. + + Some more zones have been turned into links, when they differed + from existing zones only for older timestamps. As usual, + these changes affect UT offsets in pre-1970 timestamps only. + Their old contents have been moved to the 'backzone' file. + The affected zones are: Africa/Addis_Ababa, Africa/Asmara, + Africa/Dar_es_Salaam, Africa/Djibouti, Africa/Kampala, + Africa/Mogadishu, Indian/Antananarivo, Indian/Comoro, and + Indian/Mayotte. + + Changes affecting commentary + + The commentary is less enthusiastic about Shanks as a source, + and is more careful to distinguish UT from UTC. + + +Release 2014i - 2014-10-21 22:04:57 -0700 + + Changes affecting future timestamps + + Pacific/Fiji will observe DST from 2014-11-02 02:00 to 2015-01-18 03:00. + (Thanks to Ken Rylander for the heads-up.) Guess that future + years will use a similar pattern. + + A new Zone Pacific/Bougainville, for the part of Papua New Guinea + that plans to switch from UT +10 to +11 on 2014-12-28 at 02:00. + (Thanks to Kiley Walbom for the heads-up.) + + Changes affecting time zone abbreviations + + Since Belarus is not changing its clocks even though Moscow is, + the time zone abbreviation in Europe/Minsk is changing from FET + to its more traditional value MSK on 2014-10-26 at 01:00. + (Thanks to Alexander Bokovoy for the heads-up about Belarus.) + + The new abbreviation IDT stands for the pre-1976 use of UT +08 in + Indochina, to distinguish it better from ICT (+07). + + Changes affecting past timestamps + + Many timestamps have been corrected for Asia/Ho_Chi_Minh before 1976 + (thanks to Trần Ngọc Quân for an indirect pointer to Trần Tiến Bình's + authoritative book). Asia/Ho_Chi_Minh has been added to + zone1970.tab, to give tzselect users in Vietnam two choices, + since north and south Vietnam disagreed after our 1970 cutoff. + + Asia/Phnom_Penh and Asia/Vientiane have been turned into links, as + they differed from existing zones only for older timestamps. As + usual, these changes affect pre-1970 timestamps only. Their old + contents have been moved to the 'backzone' file. + + Changes affecting code + + The time-related library functions now set errno on failure, and + some crashes in the new tzalloc-related library functions have + been fixed. (Thanks to Christos Zoulas for reporting most of + these problems and for suggesting fixes.) + + If USG_COMPAT is defined and the requested timestamp is standard time, + the tz library's localtime and mktime functions now set the extern + variable timezone to a value appropriate for that timestamp; and + similarly for ALTZONE, daylight saving time, and the altzone variable. + This change is a companion to the tzname change in 2014h, and is + designed to make timezone and altzone more compatible with tzname. + + The tz library's functions now set errno to EOVERFLOW if they fail + because the result cannot be represented. ctime and ctime_r now + return NULL and set errno when a timestamp is out of range, rather + than having undefined behavior. + + Some bugs associated with the new 2014g functions have been fixed. + This includes a bug that largely incapacitated the new functions + time2posix_z and posix2time_z. (Thanks to Christos Zoulas.) + It also includes some uses of uninitialized variables after tzalloc. + The new code uses the standard type 'ssize_t', which the Makefile + now gives porting advice about. + + Changes affecting commentary + + Updated URLs for NRC Canada (thanks to Matt Johnson and Brian Inglis). + + +Release 2014h - 2014-09-25 18:59:03 -0700 + + Changes affecting past timestamps + + America/Jamaica's 1974 spring-forward transition was Jan. 6, not Apr. 28. + + Shanks says Asia/Novokuznetsk switched from LMT (not "NMT") on 1924-05-01, + not 1920-01-06. The old entry was based on a misinterpretation of Shanks. + + Some more zones have been turned into links, when they differed + from existing zones only for older timestamps. As usual, + these changes affect UT offsets in pre-1970 timestamps only. + Their old contents have been moved to the 'backzone' file. + The affected zones are: Africa/Blantyre, Africa/Bujumbura, + Africa/Gaborone, Africa/Harare, Africa/Kigali, Africa/Lubumbashi, + Africa/Lusaka, Africa/Maseru, and Africa/Mbabane. + + Changes affecting code + + zdump -V and -v now output gmtoff= values on all platforms, + not merely on platforms defining TM_GMTOFF. + + The tz library's localtime and mktime functions now set tzname to a value + appropriate for the requested timestamp, and zdump now uses this + on platforms not defining TM_ZONE, fixing a 2014g regression. + (Thanks to Tim Parenti for reporting the problem.) + + The tz library no longer sets tzname if localtime or mktime fails. + + zdump -c no longer mishandles transitions near year boundaries. + (Thanks to Tim Parenti for reporting the problem.) + + An access to uninitialized data has been fixed. + (Thanks to Jörg Richter for reporting the problem.) + + When THREAD_SAFE is defined, the code ports to the C11 memory model. + A memory leak has been fixed if ALL_STATE and THREAD_SAFE are defined + and two threads race to initialize data used by gmtime-like functions. + (Thanks to Andy Heninger for reporting the problems.) + + Changes affecting build procedure + + 'make check' now checks better for properly sorted data. + + Changes affecting documentation and commentary + + zdump's gmtoff=N output is now documented, and its isdst=D output + is now documented to possibly output D values other than 0 or 1. + + zdump -c's treatment of years is now documented to use the + Gregorian calendar and Universal Time without leap seconds, + and its behavior at cutoff boundaries is now documented better. + (Thanks to Arthur David Olson and Tim Parenti for reporting the problems.) + + Programs are now documented to use the proleptic Gregorian calendar. + (Thanks to Alan Barrett for the suggestion.) + + Fractional-second GMT offsets have been documented for civil time + in 19th-century Chennai, Jakarta, and New York. + + +Release 2014g - 2014-08-28 12:31:23 -0700 + + Changes affecting future timestamps + + Turks & Caicos is switching from US eastern time to UT -04 + year-round, modeled as a switch on 2014-11-02 at 02:00. + [As noted in 2014j, this switch was later delayed.] + + Changes affecting past timestamps + + Time in Russia or the USSR before 1926 or so has been corrected by + a few seconds in the following zones: Asia/Irkutsk, + Asia/Krasnoyarsk, Asia/Omsk, Asia/Samarkand, Asia/Tbilisi, + Asia/Vladivostok, Asia/Yakutsk, Europe/Riga, Europe/Samara. For + Asia/Yekaterinburg the correction is a few minutes. (Thanks to + Vladimir Karpinsky.) + + The Portuguese decree of 1911-05-26 took effect on 1912-01-01. + This affects 1911 timestamps in Africa/Bissau, Africa/Luanda, + Atlantic/Azores, and Atlantic/Madeira. Also, Lisbon's pre-1912 + GMT offset was -0:36:45 (rounded from -0:36:44.68), not -0:36:32. + (Thanks to Stephen Colebourne for pointing to the decree.) + + Asia/Dhaka ended DST on 2009-12-31 at 24:00, not 23:59. + + A new file 'backzone' contains data which may appeal to + connoisseurs of old timestamps, although it is out of scope for + the tz database, is often poorly sourced, and contains some data + that is known to be incorrect. The new file is not recommended + for ordinary use and its entries are not installed by default. + (Thanks to Lester Caine for the high-quality Jersey, Guernsey, and + Isle of Man entries.) + + Some more zones have been turned into links, when they differed + from existing zones only for older timestamps. As usual, + these changes affect UT offsets in pre-1970 timestamps only. + Their old contents have been moved to the 'backzone' file. + The affected zones are: Africa/Bangui, Africa/Brazzaville, + Africa/Douala, Africa/Kinshasa, Africa/Libreville, Africa/Luanda, + Africa/Malabo, Africa/Niamey, and Africa/Porto-Novo. + + Changes affecting code + + Unless NETBSD_INSPIRED is defined to 0, the tz library now + supplies functions for creating and using objects that represent + timezones. The new functions are tzalloc, tzfree, localtime_rz, + mktime_z, and (if STD_INSPIRED is also defined) posix2time_z and + time2posix_z. They are intended for performance: for example, + localtime_rz (unlike localtime_r) is trivially thread-safe without + locking. (Thanks to Christos Zoulas for proposing NetBSD-inspired + functions, and to Alan Barrett and Jonathan Lennox for helping to + debug the change.) + + zdump now builds with the tz library unless USE_LTZ is defined to 0, + This lets zdump use tz features even if the system library lacks them. + To build zdump with the system library, use 'make CFLAGS=-DUSE_LTZ=0 + TZDOBJS=zdump.o CHECK_TIME_T_ALTERNATIVES='. + + zdump now uses localtime_rz if available, as it's significantly faster, + and it can help zdump better diagnose invalid timezone names. + Define HAVE_LOCALTIME_RZ to 0 to suppress this. HAVE_LOCALTIME_RZ + defaults to 1 if NETBSD_INSPIRED && USE_LTZ. When localtime_rz is + not available, zdump now uses localtime_r and tzset if available, + as this is a bit cleaner and faster than plain localtime. Compile + with -DHAVE_LOCALTIME_R=0 and/or -DHAVE_TZSET=0 if your system + lacks these two functions. + + If THREAD_SAFE is defined to 1, the tz library is now thread-safe. + Although not needed for tz's own applications, which are single-threaded, + this supports POSIX better if the tz library is used in multithreaded apps. + + Some crashes have been fixed when zdump or the tz library is given + invalid or outlandish input. + + The tz library no longer mishandles leap seconds on platforms with + unsigned time_t in timezones that lack ordinary transitions after 1970. + + The tz code now attempts to infer TM_GMTOFF and TM_ZONE if not + already defined, to make it easier to configure on common platforms. + Define NO_TM_GMTOFF and NO_TM_ZONE to suppress this. + + Unless the new macro UNINIT_TRAP is defined to 1, the tz code now + assumes that reading uninitialized memory yields garbage values + but does not cause other problems such as traps. + + If TM_GMTOFF is defined and UNINIT_TRAP is 0, mktime is now + more likely to guess right for ambiguous timestamps near + transitions where tm_isdst does not change. + + If HAVE_STRFTIME_L is defined to 1, the tz library now defines + strftime_l for compatibility with recent versions of POSIX. + Only the C locale is supported, though. HAVE_STRFTIME_L defaults + to 1 on recent POSIX versions, and to 0 otherwise. + + tzselect -c now uses a hybrid distance measure that works better + in Africa. (Thanks to Alan Barrett for noting the problem.) + + The C source code now ports to NetBSD when GCC_DEBUG_FLAGS is used, + or when time_tz is defined. + + When HAVE_UTMPX_H is set the 'date' command now builds on systems + whose file does not define WTMPX_FILE, and when setting + the date it updates the wtmpx file if _PATH_WTMPX is defined. + This affects GNU/Linux and similar systems. + + For easier maintenance later, some C code has been simplified, + some lint has been removed, and the code has been tweaked so that + plain 'make' is more likely to work. + + The C type 'bool' is now used for boolean values, instead of 'int'. + + The long-obsolete LOCALE_HOME code has been removed. + + The long-obsolete 'gtime' function has been removed. + + Changes affecting build procedure + + 'zdump' no longer links in ialloc.o, as it's not needed. + + 'make check_time_t_alternatives' no longer assumes GNU diff. + + Changes affecting distribution tarballs + + The files checktab.awk and zoneinfo2tdf.pl are now distributed in + the tzdata tarball instead of the tzcode tarball, since they help + maintain the data. The NEWS and Theory files are now also + distributed in the tzdata tarball, as they're relevant for data. + (Thanks to Alan Barrett for pointing this out.) Also, the + leapseconds.awk file is no longer distributed in the tzcode + tarball, since it belongs in the tzdata tarball (where 2014f + inadvertently also distributed it). + + Changes affecting documentation and commentary + + A new file CONTRIBUTING is distributed. (Thanks to Tim Parenti for + suggesting a CONTRIBUTING file, and to Tony Finch and Walter Harms + for debugging it.) + + The man pages have been updated to use function prototypes, + to document thread-safe variants like localtime_r, and to document + the NetBSD-inspired functions tzalloc, tzfree, localtime_rz, and + mktime_z. + + The fields in Link lines have been renamed to be more descriptive + and more like the parameters of 'ln'. LINK-FROM has become TARGET, + and LINK-TO has become LINK-NAME. + + tz-link.htm mentions the IETF's tzdist working group; Windows + Runtime etc. (thanks to Matt Johnson); and HP-UX's tztab. + + Some broken URLs have been fixed in the commentary. (Thanks to + Lester Caine.) + + Commentary about Philippines DST has been updated, and commentary + on pre-1970 time in India has been added. + + +Release 2014f - 2014-08-05 17:42:36 -0700 + + Changes affecting future timestamps + + Russia will subtract an hour from most of its time zones on 2014-10-26 + at 02:00 local time. (Thanks to Alexander Krivenyshev.) + There are a few exceptions: Magadan Oblast (Asia/Magadan) and Zabaykalsky + Krai are subtracting two hours; conversely, Chukotka Autonomous Okrug + (Asia/Anadyr), Kamchatka Krai (Asia/Kamchatka), Kemerovo Oblast + (Asia/Novokuznetsk), and the Samara Oblast and the Udmurt Republic + (Europe/Samara) are not changing their clocks. The changed zones are + Europe/Kaliningrad, Europe/Moscow, Europe/Simferopol, Europe/Volgograd, + Asia/Yekaterinburg, Asia/Omsk, Asia/Novosibirsk, Asia/Krasnoyarsk, + Asia/Irkutsk, Asia/Yakutsk, Asia/Vladivostok, Asia/Khandyga, + Asia/Sakhalin, and Asia/Ust-Nera; Asia/Magadan will have two hours + subtracted; and Asia/Novokuznetsk's time zone abbreviation is affected, + but not its UTC offset. Two zones are added: Asia/Chita (split + from Asia/Yakutsk, and also with two hours subtracted) and + Asia/Srednekolymsk (split from Asia/Magadan, but with only one hour + subtracted). (Thanks to Tim Parenti for much of the above.) + + Changes affecting time zone abbreviations + + Australian eastern time zone abbreviations are now AEST/AEDT not EST, + and similarly for the other Australian zones. That is, for eastern + standard and daylight saving time the abbreviations are AEST and AEDT + instead of the former EST for both; similarly, ACST/ACDT, ACWST/ACWDT, + and AWST/AWDT are now used instead of the former CST, CWST, and WST. + This change does not affect UT offsets, only time zone abbreviations. + (Thanks to Rich Tibbett and many others.) + + Asia/Novokuznetsk shifts from NOVT to KRAT (remaining on UT +07) + effective 2014-10-26 at 02:00 local time. + + The time zone abbreviation for Xinjiang Time (observed in Ürümqi) + has been changed from URUT to XJT. (Thanks to Luther Ma.) + + Prefer MSK/MSD for Moscow time in Russia, even in other cities. + Similarly, prefer EET/EEST for eastern European time in Russia. + + Change time zone abbreviations in (western) Samoa to use "ST" and + "DT" suffixes, as this is more likely to match common practice. + Prefix "W" to (western) Samoa time when its standard-time offset + disagrees with that of American Samoa. + + America/Metlakatla now uses PST, not MeST, to abbreviate its time zone. + + Time zone abbreviations have been updated for Japan's two time + zones used 1896-1937. JWST now stands for Western Standard + Time, and JCST for Central Standard Time (formerly this was CJT). + These abbreviations are now used for time in Korea, Taiwan, + and Sakhalin while controlled by Japan. + + Changes affecting past timestamps + + China's five zones have been simplified to two, since the post-1970 + differences in the other three seem to have been imaginary. The + zones Asia/Harbin, Asia/Chongqing, and Asia/Kashgar have been + removed; backwards-compatibility links still work, albeit with + different behaviors for timestamps before May 1980. Asia/Urumqi's + 1980 transition to UT +08 has been removed, so that it is now at + +06 and not +08. (Thanks to Luther Ma and to Alois Treindl; + Treindl sent helpful translations of two papers by Guo Qingsheng.) + + Some zones have been turned into links, when they differed from existing + zones only for older UT offsets where data entries were likely invented. + These changes affect UT offsets in pre-1970 timestamps only. This is + similar to the change in release 2013e, except this time for western + Africa. The affected zones are: Africa/Bamako, Africa/Banjul, + Africa/Conakry, Africa/Dakar, Africa/Freetown, Africa/Lome, + Africa/Nouakchott, Africa/Ouagadougou, Africa/Sao_Tome, and + Atlantic/St_Helena. This also affects the backwards-compatibility + link Africa/Timbuktu. (Thanks to Alan Barrett, Stephen Colebourne, + Tim Parenti, and David Patte for reporting problems in earlier + versions of this change.) + + Asia/Shanghai's pre-standard-time UT offset has been changed from + 8:05:57 to 8:05:43, the location of Xujiahui Observatory. Its + transition to standard time has been changed from 1928 to 1901. + + Asia/Taipei switched to JWST on 1896-01-01, then to JST on 1937-10-01, + then to CST on 1945-09-21 at 01:00, and did not observe DST in 1945. + In 1946 it observed DST from 05-15 through 09-30; in 1947 + from 04-15 through 10-31; and in 1979 from 07-01 through 09-30. + (Thanks to Yu-Cheng Chuang.) + + Asia/Riyadh's transition to standard time is now 1947-03-14, not 1950. + + Europe/Helsinki's 1942 fall-back transition was 10-04 at 01:00, not + 10-03 at 00:00. (Thanks to Konstantin Hyppönen.) + + Pacific/Pago_Pago has been changed from UT -11:30 to -11 for the + period from 1911 to 1950. + + Pacific/Chatham has been changed to New Zealand standard time plus + 45 minutes for the period before 1957, reflecting a 1956 remark in + the New Zealand parliament. + + Europe/Budapest has several pre-1946 corrections: in 1918 the transition + out of DST was on 09-16, not 09-29; in 1919 it was on 11-24, not 09-15; in + 1945 it was on 11-01, not 11-03; in 1941 the transition to DST was 04-08 + not 04-06 at 02:00; and there was no DST in 1920. + + Africa/Accra is now assumed to have observed DST from 1920 through 1935. + + Time in Russia before 1927 or so has been corrected by a few seconds in + the following zones: Europe/Moscow, Asia/Irkutsk, Asia/Tbilisi, + Asia/Tashkent, Asia/Vladivostok, Asia/Yekaterinburg, Europe/Helsinki, and + Europe/Riga. Also, Moscow's location has been changed to its Kilometer 0 + point. (Thanks to Vladimir Karpinsky for the Moscow changes.) + + Changes affecting data format + + A new file 'zone1970.tab' supersedes 'zone.tab' in the installed data. + The new file's extended format allows multiple country codes per zone. + The older file is still installed but is deprecated; its format is + not changing and it will still be distributed for a while, but new + applications should use the new file. + + The new file format simplifies maintenance of obscure locations. + To test this, it adds coverage for the Crozet Islands and the + Scattered Islands. (Thanks to Tobias Conradi and Antoine Leca.) + + The file 'iso3166.tab' is planned to switch from ASCII to UTF-8. + It is still ASCII now, but commentary about the switch has been added. + The new file 'zone1970.tab' already uses UTF-8. + + Changes affecting code + + 'localtime', 'mktime', etc. now use much less stack space if ALL_STATE + is defined. (Thanks to Elliott Hughes for reporting the problem.) + + 'zic' no longer mishandles input when ignoring case in locales that + are not compatible with English, e.g., unibyte Turkish locales when + compiled with HAVE_GETTEXT. + + Error diagnostics of 'zic' and 'yearistype' have been reworded so that + they no longer use ASCII '-' as if it were a dash. + + 'zic' now rejects output file names that contain '.' or '..' components. + (Thanks to Tim Parenti for reporting the problem.) + + 'zic -v' now warns about output file names that do not follow + POSIX rules, or that contain a digit or '.'. (Thanks to Arthur + David Olson for starting the ball rolling on this.) + + Some lint has been removed when using GCC_DEBUG_FLAGS with GCC 4.9.0. + + Changes affecting build procedure + + 'zic' no longer links in localtime.o and asctime.o, as they're not needed. + (Thanks to John Cochran.) + + Changes affecting documentation and commentary + + The 'Theory' file documents legacy names, the longstanding + exceptions to the POSIX-inspired file name rules. + + The 'zic' documentation clarifies the role of time types when + interpreting dates. (Thanks to Arthur David Olson.) + + Documentation and commentary now prefer UTF-8 to US-ASCII, + allowing the use of proper accents in foreign words and names. + Code and data have not changed because of this. (Thanks to + Garrett Wollman, Ian Abbott, and Guy Harris for helping to debug + this.) + + Non-HTML documentation and commentary now use plain-text URLs instead of + HTML insertions, and are more consistent about bracketing URLs when they + are not already surrounded by white space. (Thanks to suggestions by + Steffen Nurpmeso.) + + There is new commentary about Xujiahui Observatory, the five time-zone + project in China from 1918 to 1949, timekeeping in Japanese-occupied + Shanghai, and Tibet Time in the 1950s. The sharp-eyed can spot the + warlord Jin Shuren in the data. + + Commentary about the coverage of each Russian zone has been standardized. + (Thanks to Tim Parenti.) + + There is new commentary about contemporary timekeeping in Ethiopia. + + Obsolete comments about a 2007 proposal for DST in Kuwait has been removed. + + There is new commentary about time in Poland in 1919. + + Proper credit has been given to DST inventor George Vernon Hudson. + + Commentary about time in Metlakatla, AK and Resolute, NU has been + improved, with a new source for the former. + + In zone.tab, Pacific/Easter no longer mentions Salas y Gómez, as it + is uninhabited. + + Commentary about permanent Antarctic bases has been updated. + + Several typos have been corrected. (Thanks to Tim Parenti for + contributing some of these fixes.) + + tz-link.htm now mentions the JavaScript libraries Moment Timezone, + TimezoneJS.Date, Walltime-js, and Timezone. (Thanks to a heads-up + from Matt Johnson.) Also, it mentions the Go 'latlong' package. + (Thanks to a heads-up from Dirkjan Ochtman.) + + The files usno1988, usno1989, usno1989a, usno1995, usno1997, and usno1998 + have been removed. These obsolescent US Naval Observatory entries were no + longer helpful for maintenance. (Thanks to Tim Parenti for the suggestion.) + + +Release 2014e - 2014-06-12 21:53:52 -0700 + + Changes affecting near-future timestamps + + Egypt's 2014 Ramadan-based transitions are June 26 and July 31 at 24:00. + (Thanks to Imed Chihi.) Guess that from 2015 on Egypt will temporarily + switch to standard time at 24:00 the last Thursday before Ramadan, and + back to DST at 00:00 the first Friday after Ramadan. + + Similarly, Morocco's are June 28 at 03:00 and August 2 at 02:00. (Thanks + to Milamber Space Network.) Guess that from 2015 on Morocco will + temporarily switch to standard time at 03:00 the last Saturday before + Ramadan, and back to DST at 02:00 the first Saturday after Ramadan. + + Changes affecting past timestamps + + The abbreviation "MSM" (Moscow Midsummer Time) is now used instead of + "MSD" for Moscow's double daylight time in summer 1921. Also, a typo + "VLASST" has been repaired to be "VLAST" for Vladivostok summer time + in 1991. (Thanks to Hank W. for reporting the problems.) + + Changes affecting commentary + + tz-link.htm now cites RFC 7265 for jCal, mentions PTP and the + draft CalDAV extension, updates URLs for TSP, TZInfo, IATA, and + removes stale pointers to World Time Explorer and WORLDTIME. + + +Release 2014d - 2014-05-27 21:34:40 -0700 + + Changes affecting code + + zic no longer generates files containing timestamps before the Big Bang. + This works around GNOME glib bug 878 + + (Thanks to Leonardo Chiquitto for reporting the bug, and to + Arthur David Olson and James Cloos for suggesting improvements to the fix.) + + Changes affecting documentation + + tz-link.htm now mentions GNOME. + + +Release 2014c - 2014-05-13 07:44:13 -0700 + + Changes affecting near-future timestamps + + Egypt observes DST starting 2014-05-15 at 24:00. + (Thanks to Ahmad El-Dardiry and Gunther Vermier.) + Details have not been announced, except that DST will not be observed + during Ramadan. Guess that DST will stop during the same Ramadan dates as + Morocco, and that Egypt's future spring and fall transitions will be the + same as 2010 when it last observed DST, namely April's last Friday at + 00:00 to September's last Thursday at 23:00 standard time. Also, guess + that Ramadan transitions will be at 00:00 standard time. + + Changes affecting code + + zic now generates transitions for minimum time values, eliminating guesswork + when handling low-valued timestamps. (Thanks to Arthur David Olson.) + + Port to Cygwin sans glibc. (Thanks to Arthur David Olson.) + + Changes affecting commentary and documentation + + Remove now-confusing comment about Jordan. (Thanks to Oleksii Nochovnyi.) + + +Release 2014b - 2014-03-24 21:28:50 -0700 + + Changes affecting near-future timestamps + + Crimea switches to Moscow time on 2014-03-30 at 02:00 local time. + (Thanks to Alexander Krivenyshev.) Move its zone.tab entry from UA to RU. + + New entry for Troll station, Antarctica. (Thanks to Paul-Inge Flakstad and + Bengt-Inge Larsson.) This is currently an approximation; a better version + will require the zic and localtime fixes mentioned below, and the plan is + to wait for a while until at least the zic fixes propagate. + + Changes affecting code + + 'zic' and 'localtime' no longer reject locations needing four transitions + per year for the foreseeable future. (Thanks to Andrew Main (Zefram).) + Also, 'zic' avoids some unlikely failures due to integer overflow. + + Changes affecting build procedure + + 'make check' now detects Rule lines defined but never used. + The NZAQ rules, an instance of this problem, have been removed. + + Changes affecting commentary and documentation + + Fix Tuesday/Thursday typo in description of time in Israel. + (Thanks to Bert Katz via Pavel Kharitonov and Mike Frysinger.) + + Microsoft Windows 8.1 doesn't support tz database names. (Thanks + to Donald MacQueen.) Instead, the Microsoft Windows Store app + library supports them. + + Add comments about Johnston Island time in the 1960s. + (Thanks to Lyle McElhaney.) + + Morocco's 2014 DST start will be as predicted. + (Thanks to Sebastien Willemijns.) + + +Release 2014a - 2014-03-07 23:30:29 -0800 + + Changes affecting near-future timestamps + + Turkey begins DST on 2014-03-31, not 03-30. (Thanks to Faruk Pasin for + the heads-up, and to Tim Parenti for simplifying the update.) + + Changes affecting past timestamps + + Fiji ended DST on 2014-01-19 at 02:00, not the previously scheduled 03:00. + (Thanks to Steffen Thorsen.) + + Ukraine switched from Moscow to Eastern European time on 1990-07-01 + (not 1992-01-01), and observed DST during the entire next winter. + (Thanks to Vladimir in Moscow via Alois Treindl.) + + In 1988 Israel observed DST from 04-10 to 09-04, not 04-09 to 09-03. + (Thanks to Avigdor Finkelstein.) + + Changes affecting code + + A uninitialized-storage bug in 'localtime' has been fixed. + (Thanks to Logan Chien.) + + Changes affecting the build procedure + + The settings for 'make check_web' now default to Ubuntu 13.10. + + Changes affecting commentary and documentation + + The boundary of the US Pacific time zone is given more accurately. + (Thanks to Alan Mintz.) + + Chile's 2014 DST will be as predicted. (Thanks to José Miguel Garrido.) + + Paraguay's 2014 DST will be as predicted. (Thanks to Carlos Raúl Perasso.) + + Better descriptions of countries with same time zone history as + Trinidad and Tobago since 1970. (Thanks to Alan Barrett for suggestion.) + + Several changes affect tz-link.htm, the main web page. + + Mention Time.is (thanks to Even Scharning) and WX-now (thanks to + David Braverman). + + Mention xCal (Internet RFC 6321) and jCal. + + Microsoft has some support for tz database names. + + CLDR data formats include both XML and JSON. + + Mention Maggiolo's map of solar vs standard time. + (Thanks to Arthur David Olson.) + + Mention TZ4Net. (Thanks to Matt Johnson.) + + Mention the timezone-olson Haskell package. + + Mention zeitverschiebung.net. (Thanks to Martin Jäger.) + + Remove moribund links to daylight-savings-time.info and to + Simple Timer + Clocks. + + Update two links. (Thanks to Oscar van Vlijmen.) + + Fix some formatting glitches, e.g., remove random newlines from + abbr elements' title attributes. + + +Release 2013i - 2013-12-17 07:25:23 -0800 + + Changes affecting near-future timestamps: + + Jordan switches back to standard time at 00:00 on December 20, 2013. + The 2006-2011 transition schedule is planned to resume in 2014. + (Thanks to Steffen Thorsen.) + + Changes affecting past timestamps: + + In 2004, Cuba began DST on March 28, not April 4. + (Thanks to Steffen Thorsen.) + + Changes affecting code + + The compile-time flag NOSOLAR has been removed, as nowadays the + benefit of slightly shrinking runtime table size is outweighed by the + cost of disallowing potential future updates that exceed old limits. + + Changes affecting documentation and commentary + + The files solar87, solar88, and solar89 are no longer distributed. + They were a negative experiment - that is, a demonstration that + tz data can represent solar time only with some difficulty and error. + Their presence in the distribution caused confusion, as Riyadh + civil time was generally not solar time in those years. + + tz-link.htm now mentions Noda Time. (Thanks to Matt Johnson.) + + +Release 2013h - 2013-10-25 15:32:32 -0700 + + Changes affecting current and future timestamps: + + Libya has switched its UT offset back to +02 without DST, instead + of +01 with DST. (Thanks to Even Scharning.) + + Western Sahara (Africa/El_Aaiun) uses Morocco's DST rules. + (Thanks to Gwillim Law.) + + Changes affecting future timestamps: + + Acre and (we guess) western Amazonas will switch from UT -04 to -05 + on 2013-11-10. This affects America/Rio_Branco and America/Eirunepe. + (Thanks to Steffen Thorsen.) + + Add entries for DST transitions in Morocco in the year 2038. + This avoids some year-2038 glitches introduced in 2013g. + (Thanks to Yoshito Umaoka for reporting the problem.) + + Changes affecting API + + The 'tzselect' command no longer requires the 'select' command, + and should now work with /bin/sh on more platforms. It also works + around a bug in BusyBox awk before version 1.21.0. (Thanks to + Patrick 'P. J.' McDermott and Alan Barrett.) + + Changes affecting code + + Fix localtime overflow bugs with 32-bit unsigned time_t. + + zdump no longer assumes sscanf returns maximal values on overflow. + + Changes affecting the build procedure + + The builder can specify which programs to use, if any, instead of + 'ar' and 'ranlib', and libtz.a is now built locally before being + installed. (Thanks to Michael Forney.) + + A dependency typo in the 'zdump' rule has been fixed. + (Thanks to Andrew Paprocki.) + + The Makefile has been simplified by assuming that 'mkdir -p' and 'cp -f' + work as specified by POSIX.2-1992 or later; this is portable nowadays. + + 'make clean' no longer removes 'leapseconds', since it's + host-independent and is part of the distribution. + + The unused makefile macros TZCSRCS, TZDSRCS, DATESRCS have been removed. + + Changes affecting documentation and commentary + + tz-link.htm now mentions TC TIMEZONE's draft time zone service protocol + (thanks to Mike Douglass) and TimezoneJS.Date (thanks to Jim Fehrle). + + Update URLs in tz-link page. Add URLs for Microsoft Windows, since + 8.1 introduces tz support. Remove URLs for Tru64 and UnixWare (no + longer maintained) and for old advisories. SOFA now does C. + +Release 2013g - 2013-09-30 21:08:26 -0700 + + Changes affecting current and near-future timestamps + + Morocco now observes DST from the last Sunday in March to the last + Sunday in October, not April to September respectively. (Thanks + to Steffen Thorsen.) + + Changes affecting 'zic' + + 'zic' now runs on platforms that lack both hard links and symlinks. + (Thanks to Theo Veenker for reporting the problem, for MinGW.) + Also, fix some bugs on platforms that lack hard links but have symlinks. + + 'zic -v' again warns that Asia/Tehran has no POSIX environment variable + to predict the far future, fixing a bug introduced in 2013e. + + Changes affecting the build procedure + + The 'leapseconds' file is again put into the tzdata tarball. + Also, 'leapseconds.awk', so tzdata is self-contained. (Thanks to + Matt Burgess and Ian Abbott.) The timestamps of these and other + dependent files in tarballs are adjusted more consistently. + + Changes affecting documentation and commentary + + The README file is now part of the data tarball as well as the code. + It now states that files are public domain unless otherwise specified. + (Thanks to Andrew Main (Zefram) for asking for clarifications.) + Its details about the 1989 release moved to a place of honor near + the end of NEWS. + + +Release 2013f - 2013-09-24 23:37:36 -0700 + + Changes affecting near-future timestamps + + Tocantins will very likely not observe DST starting this spring. + (Thanks to Steffen Thorsen.) + + Jordan will likely stay at UT +03 indefinitely, and will not fall + back this fall. + + Palestine will fall back at 00:00, not 01:00. (Thanks to Steffen Thorsen.) + + Changes affecting API + + The types of the global variables 'timezone' and 'altzone' (if present) + have been changed back to 'long'. This is required for 'timezone' + by POSIX, and for 'altzone' by common practice, e.g., Solaris 11. + These variables were originally 'long' in the tz code, but were + mistakenly changed to 'time_t' in 1987; nobody reported the + incompatibility until now. The difference matters on x32, where + 'long' is 32 bits and 'time_t' is 64. (Thanks to Elliott Hughes.) + + Changes affecting the build procedure + + Avoid long strings in leapseconds.awk to work around a mawk bug. + (Thanks to Cyril Baurand.) + + Changes affecting documentation and commentary + + New file 'NEWS' that contains release notes like this one. + + Paraguay's law does not specify DST transition time; 00:00 is customary. + (Thanks to Waldemar Villamayor-Venialbo.) + + Minor capitalization fixes. + + Changes affecting version-control only + + The experimental GitHub repository now contains annotated and + signed tags for recent releases, e.g., '2013e' for Release 2013e. + Releases are tagged starting with 2012e; earlier releases were + done differently, and tags would either not have a simple name or + not exactly match what was released. + + 'make set-timestamps' is now simpler and a bit more portable. + + +Release 2013e - 2013-09-19 23:50:04 -0700 + + Changes affecting near-future timestamps + + This year Fiji will start DST on October 27, not October 20. + (Thanks to David Wheeler for the heads-up.) For now, guess that + Fiji will continue to spring forward the Sunday before the fourth + Monday in October. + + Changes affecting current and future time zone abbreviations + + Use WIB/WITA/WIT rather than WIT/CIT/EIT for alphabetic Indonesian + time zone abbreviations since 1932. (Thanks to George Ziegler, + Priyadi Iman Nurcahyo, Zakaria, Jason Grimes, Martin Pitt, and + Benny Lin.) This affects Asia/Dili, Asia/Jakarta, Asia/Jayapura, + Asia/Makassar, and Asia/Pontianak. + + Use ART (UT -03, standard time), rather than WARST (also -03, but + daylight saving time) for San Luis, Argentina since 2009. + + Changes affecting Godthåb timestamps after 2037 if version mismatch + + Allow POSIX-like TZ strings where the transition time's hour can + range from -167 through 167, instead of the POSIX-required 0 + through 24. E.g., TZ='FJT-12FJST,M10.3.1/146,M1.3.4/75' for the + new Fiji rules. This is a more compact way to represent + far-future timestamps for America/Godthab, America/Santiago, + Antarctica/Palmer, Asia/Gaza, Asia/Hebron, Asia/Jerusalem, + Pacific/Easter, and Pacific/Fiji. Other zones are unaffected by + this change. (Derived from a suggestion by Arthur David Olson.) + + Allow POSIX-like TZ strings where daylight saving time is in + effect all year. E.g., TZ='WART4WARST,J1/0,J365/25' for Western + Argentina Summer Time all year. This supports a more compact way + to represent the 2013d data for America/Argentina/San_Luis. + Because of the change for San Luis noted above this change does not + affect the current data. (Thanks to Andrew Main (Zefram) for + suggestions that improved this change.) + + Where these two TZ changes take effect, there is a minor extension + to the tz file format in that it allows new values for the + embedded TZ-format string, and the tz file format version number + has therefore been increased from 2 to 3 as a precaution. + Version-2-based client code should continue to work as before for + all timestamps before 2038. Existing version-2-based client code + (tzcode, GNU/Linux, Solaris) has been tested on version-3-format + files, and typically works in practice even for timestamps after + 2037; the only known exception is America/Godthab. + + Changes affecting timestamps before 1970 + + Pacific/Johnston is now a link to Pacific/Honolulu. This corrects + some errors before 1947. + + Some zones have been turned into links, when they differ from existing + zones only in older data entries that were likely invented or that + differ only in LMT or transitions from LMT. These changes affect + only timestamps before 1943. The affected zones are: + Africa/Juba, America/Anguilla, America/Aruba, America/Dominica, + America/Grenada, America/Guadeloupe, America/Marigot, + America/Montserrat, America/St_Barthelemy, America/St_Kitts, + America/St_Lucia, America/St_Thomas, America/St_Vincent, + America/Tortola, and Europe/Vaduz. (Thanks to Alois Treindl for + confirming that the old Europe/Vaduz zone was wrong and the new + link is better for WWII-era times.) + + Change Kingston Mean Time from -5:07:12 to -5:07:11. This affects + America/Cayman, America/Jamaica and America/Grand_Turk timestamps + from 1890 to 1912. + + Change the UT offset of Bern Mean Time from 0:29:44 to 0:29:46. + This affects Europe/Zurich timestamps from 1853 to 1894. (Thanks + to Alois Treindl.) + + Change the date of the circa-1850 Zurich transition from 1849-09-12 + to 1853-07-16, overriding Shanks with data from Messerli about + postal and telegraph time in Switzerland. + + Changes affecting time zone abbreviations before 1970 + + For Asia/Jakarta, use BMT (not JMT) for mean time from 1923 to 1932, + as Jakarta was called Batavia back then. + + Changes affecting API + + The 'zic' command now outputs a dummy transition when far-future + data can't be summarized using a TZ string, and uses a 402-year + window rather than a 400-year window. For the current data, this + affects only the Asia/Tehran file. It does not affect any of the + timestamps that this file represents, so zdump outputs the same + information as before. (Thanks to Andrew Main (Zefram).) + + The 'date' command has a new '-r' option, which lets you specify + the integer time to display, a la FreeBSD. + + The 'tzselect' command has two new options '-c' and '-n', which lets you + select a zone based on latitude and longitude. + + The 'zic' command's '-v' option now warns about constructs that + require the new version-3 binary file format. (Thanks to Arthur + David Olson for the suggestion.) + + Support for floating-point time_t has been removed. + It was always dicey, and POSIX no longer requires it. + (Thanks to Eric Blake for suggesting to the POSIX committee to + remove it, and thanks to Alan Barrett, Clive D.W. Feather, Andy + Heninger, Arthur David Olson, and Alois Treindl, for reporting + bugs and elucidating some of the corners of the old floating-point + implementation.) + + The signatures of 'offtime', 'timeoff', and 'gtime' have been + changed back to the old practice of using 'long' to represent UT + offsets. This had been inadvertently and mistakenly changed to + 'int_fast32_t'. (Thanks to Christos Zoulas.) + + The code avoids undefined behavior on integer overflow in some + more places, including gmtime, localtime, mktime and zdump. + + Changes affecting the zdump utility + + zdump now outputs "UT" when referring to Universal Time, not "UTC". + "UTC" does not make sense for timestamps that predate the introduction + of UTC, whereas "UT", a more generic term, does. (Thanks to Steve Allen + for clarifying UT vs UTC.) + + Data changes affecting behavior of tzselect and similar programs + + Country code BQ is now called the more common name "Caribbean Netherlands" + rather than the more official "Bonaire, St Eustatius & Saba". + + Remove from zone.tab the names America/Montreal, America/Shiprock, + and Antarctica/South_Pole, as they are equivalent to existing + same-country-code zones for post-1970 timestamps. The data entries for + these names are unchanged, so the names continue to work as before. + + Changes affecting code internals + + zic -c now runs way faster on 64-bit hosts when given large numbers. + + zic now uses vfprintf to avoid allocating and freeing some memory. + + tzselect now computes the list of continents from the data, + rather than have it hard-coded. + + Minor changes pacify GCC 4.7.3 and GCC 4.8.1. + + Changes affecting the build procedure + + The 'leapseconds' file is now generated automatically from a + new file 'leap-seconds.list', which is a copy of + + A new source file 'leapseconds.awk' implements this. + The goal is simplification of the future maintenance of 'leapseconds'. + + When building the 'posix' or 'right' subdirectories, if the + subdirectory would be a copy of the default subdirectory, it is + now made a symbolic link if that is supported. This saves about + 2 MB of file system space. + + The links America/Shiprock and Antarctica/South_Pole have been + moved to the 'backward' file. This affects only nondefault builds + that omit 'backward'. + + Changes affecting version-control only + + .gitignore now ignores 'date'. + + Changes affecting documentation and commentary + + Changes to the 'tzfile' man page + + It now mentions that the binary file format may be extended in + future versions by appending data. + + It now refers to the 'zdump' and 'zic' man pages. + + Changes to the 'zic' man page + + It lists conditions that elicit a warning with '-v'. + + It says that the behavior is unspecified when duplicate names + are given, or if the source of one link is the target of another. + + Its examples are updated to match the latest data. + + The definition of white space has been clarified slightly. + (Thanks to Michael Deckers.) + + Changes to the 'Theory' file + + There is a new section about the accuracy of the tz database, + describing the many ways that errors can creep in, and + explaining why so many of the pre-1970 timestamps are wrong or + misleading (thanks to Steve Allen, Lester Caine, and Garrett + Wollman for discussions that contributed to this). + + The 'Theory' file describes LMT better (this follows a + suggestion by Guy Harris). + + It refers to the 2013 edition of POSIX rather than the 2004 edition. + + It's mentioned that excluding 'backward' should not affect the + other data, and it suggests at least one zone.tab name per + inhabited country (thanks to Stephen Colebourne). + + Some longstanding restrictions on names are documented, e.g., + 'America/New_York' precludes 'America/New_York/Bronx'. + + It gives more reasons for the 1970 cutoff. + + It now mentions which time_t variants are supported, such as + signed integer time_t. (Thanks to Paul Goyette for reporting + typos in an experimental version of this change.) + + (Thanks to Philip Newton for correcting typos in these changes.) + + Documentation and commentary is more careful to distinguish UT in + general from UTC in particular. (Thanks to Steve Allen.) + + Add a better source for the Zurich 1894 transition. + (Thanks to Pierre-Yves Berger.) + + Update shapefile citations in tz-link.htm. (Thanks to Guy Harris.) + + +Release 2013d - 2013-07-05 07:38:01 -0700 + + Changes affecting future timestamps: + + Morocco's midsummer transitions this year are July 7 and August 10, + not July 9 and August 8. (Thanks to Andrew Paprocki.) + + Israel now falls back on the last Sunday of October. + (Thanks to Ephraim Silverberg.) + + Changes affecting past timestamps: + + Specify Jerusalem's location more precisely; this changes the pre-1880 + times by 2 s. + + Changing affecting metadata only: + + Fix typos in the entries for country codes BQ and SX. + + Changes affecting code: + + Rework the code to fix a bug with handling Australia/Macquarie on + 32-bit hosts (thanks to Arthur David Olson). + + Port to platforms like NetBSD, where time_t can be wider than long. + + Add support for testing time_t types other than the system's. + Run 'make check_time_t_alternatives' to try this out. + Currently, the tests fail for unsigned time_t; + this should get fixed at some point. + + Changes affecting documentation and commentary: + + Deemphasize the significance of national borders. + + Update the zdump man page. + + Remove obsolete NOID comment (thanks to Denis Excoffier). + + Update several URLs and comments in the web pages. + + Spelling fixes (thanks to Kevin Lyda and Jonathan Leffler). + + Update URL for CLDR Zone->Tzid table (thanks to Yoshito Umaoka). + + +Release 2013c - 2013-04-19 16:17:40 -0700 + + Changes affecting current and future timestamps: + + Palestine observed DST starting March 29, 2013. (Thanks to + Steffen Thorsen.) From 2013 on, Gaza and Hebron both observe DST, + with the predicted rules being the last Thursday in March at 24:00 + to the first Friday on or after September 21 at 01:00. + + Assume that the recent change to Paraguay's DST rules is permanent, + by moving the end of DST to the 4th Sunday in March every year. + (Thanks to Carlos Raúl Perasso.) + + Changes affecting past timestamps: + + Fix some historical data for Palestine to agree with that of + timeanddate.com, as follows: + + The spring 2008 change in Gaza and Hebron was on 00:00 Mar 28, not + 00:00 Apr 1. + + The fall 2009 change in Gaza and Hebron on Sep 4 was at 01:00, not + 02:00. + + The spring 2010 change in Hebron was 00:00 Mar 26, not 00:01 Mar 27. + + The spring 2011 change in Gaza was 00:01 Apr 1, not 12:01 Apr 2. + + The spring 2011 change in Hebron on Apr 1 was at 00:01, not 12:01. + + The fall 2011 change in Hebron on Sep 30 was at 00:00, not 03:00. + + Fix times of habitation for Macquarie to agree with the Tasmania + Parks & Wildlife Service history, which indicates that permanent + habitation was 1899-1919 and 1948 on. + + Changing affecting metadata only: + + Macquarie Island is politically part of Australia, not Antarctica. + (Thanks to Tobias Conradi.) + + Sort Macquarie more consistently with other parts of Australia. + (Thanks to Tim Parenti.) + + +Release 2013b - 2013-03-10 22:33:40 -0700 + + Changes affecting current and future timestamps: + + Haiti uses US daylight-saving rules this year, and presumably future years. + This changes timestamps starting today. (Thanks to Steffen Thorsen.) + + Paraguay will end DST on March 24 this year. + (Thanks to Steffen Thorsen.) For now, assume it's just this year. + + Morocco does not observe DST during Ramadan; + try to predict Ramadan in Morocco as best we can. + (Thanks to Erik Homoet for the heads-up.) + + Changes affecting commentary: + + Update URLs in tz-link page. Add URLs for webOS, BB10, iOS. + Update URL for Solaris. Mention Internet RFC 6557. + Update Internet RFCs 2445->5545, 2822->5322. + Switch from FTP to HTTP for Internet RFCs. + + +Release 2013a - 2013-02-27 09:20:35 -0800 + + Change affecting binary data format: + + The zone offset at the end of version-2-format zone files is now + allowed to be 24:00, as per POSIX.1-2008. (Thanks to Arthur David Olson.) + + Changes affecting current and future timestamps: + + Chile's 2013 rules, and we guess rules for 2014 and later, will be + the same as 2012, namely Apr Sun>=23 03:00 UTC to Sep Sun>=2 04:00 UTC. + (Thanks to Steffen Thorsen and Robert Elz.) + + New Zones Asia/Khandyga, Asia/Ust-Nera, Europe/Busingen. + (Thanks to Tobias Conradi and Arthur David Olson.) + + Many changes affect historical timestamps before 1940. + These were deduced from: Milne J. Civil time. Geogr J. 1899 + Feb;13(2):173-94 . + + Changes affecting the code: + + Fix zic bug that mishandled Egypt's 2010 changes (this also affected + the data). (Thanks to Arthur David Olson.) + + Fix localtime bug when time_t is unsigned and data files were generated + by a signed time_t system. (Thanks to Doug Bailey for reporting and + to Arthur David Olson for fixing.) + + Allow the email address for bug reports to be set by the packager. + The default is tz@iana.org, as before. (Thanks to Joseph S. Myers.) + + Update HTML checking to be compatible with Ubuntu 12.10. + + Check that files are a safe subset of ASCII. At some point we may + relax this requirement to a safe subset of UTF-8. Without the + check, some non-UTF-8 encodings were leaking into the distribution. + + Commentary changes: + + Restore a comment about copyright notices that was inadvertently deleted. + (Thanks to Arthur David Olson.) + + Improve the commentary about which districts observe what times + in Russia. (Thanks to Oscar van Vlijmen and Arthur David Olson.) + + Add web page links to tz.js. + + Add "Run by the Monkeys" to tz-art. (Thanks to Arthur David Olson.) + + +Release 2012j - 2012-11-12 18:34:49 -0800 + + Libya moved to CET this weekend, but with DST planned next year. + (Thanks to Even Scharning, Steffen Thorsen, and Tim Parenti.) + + Signatures now have the extension .asc, not .sign, as that's more + standard. (Thanks to Phil Pennock.) + + The output of 'zdump --version', and of 'zic --version', now + uses a format that is more typical for --version. + (Thanks to Joseph S. Myers.) + + The output of 'tzselect --help', 'zdump --help', and 'zic --help' + now uses tz@iana.org rather than the old elsie address. + + zic -v now complains about abbreviations that are less than 3 + or more than 6 characters, as per Posix. Formerly, it checked + for abbreviations that were more than 3. + + 'make public' no longer puts its temporary directory under /tmp, + and uses the just-built zic rather than the system zic. + + Various fixes to documentation and commentary. + + +Release 2012i - 2012-11-03 12:57:09 -0700 + + Cuba switches from DST tomorrow at 01:00. (Thanks to Steffen Thorsen.) + + Linker flags can now be specified via LDFLAGS. + AWK now defaults to 'awk', not 'nawk'. + The shell in tzselect now defaults to /bin/bash, but this can + be overridden by specifying KSHELL. + The main web page now mentions the unofficial GitHub repository. + (Thanks to Mike Frysinger.) + + Tarball signatures can now be built by running 'make signatures'. + There are also new makefile rules 'tarballs', 'check_public', and + separate makefile rules for each tarball and signature file. + A few makefile rules are now more portable to strict POSIX. + + The main web page now lists the canonical IANA URL. + + +Release 2012h - 2012-10-26 22:49:10 -0700 + + Bahia no longer has DST. (Thanks to Kelley Cook.) + + Tocantins has DST. (Thanks to Rodrigo Severo.) + + Israel has new DST rules next year. (Thanks to Ephraim Silverberg.) + + Jordan stays on DST this winter. (Thanks to Steffen Thorsen.) + + Web page updates. + + More C modernization, except that at Arthur David Olson's suggestion + the instances of 'register' were kept. + + +Release 2012g - 2012-10-17 20:59:45 -0700 + + Samoa fall 2012 and later. (Thanks to Nicholas Pereira and Robert Elz.) + + Palestine fall 2012. (Thanks to Steffen Thorsen.) + + Assume C89. + + To attack the version-number problem, this release ships the file + 'Makefile' (which contains the release number) in both the tzcode and + the tzdata tarballs. The two Makefiles are identical, and should be + identical in any matching pair of tarballs, so it shouldn't matter + which order you extract the tarballs. Perhaps we can come up with a + better version-number scheme at some point; this scheme does have the + virtue of not adding more files. + + +Release 2012f - 2012-09-12 23:17:03 -0700 + + * australasia (Pacific/Fiji): Fiji DST is October 21 through January + 20 this year. (Thanks to Steffen Thorsen.) + + +Release 2012e - 2012-08-02 20:44:55 -0700 + + * australasia (Pacific/Fakaofo): Tokelau is UT +13, not +14. + (Thanks to Steffen Thorsen.) + + * Use a single version number for both code and data. + + * .gitignore: New file. + + * Remove trailing white space. + + +Release code2012c-data2012d - 2012-07-19 16:35:33 -0700 + + Changes for Morocco's timestamps, which take effect in a couple of + hours, along with infrastructure changes to accommodate how the tz + code and data are released on IANA. + + +Release data2012c - 2012-03-27 12:17:25 -0400 + + africa + Summer time changes for Morocco (to start late April 2012) + + asia + Changes for 2012 for Gaza & the West Bank (Hebron) and Syria + + northamerica + Haiti following US/Canada rules for 2012 (and we're assuming, + for now anyway, for the future). + + +Release 2012b - 2012-03-02 12:29:15 +0700 + + There is just one change to tzcode2012b (compared with 2012a): + the Makefile that was accidentally included with 2012a has been + replaced with the version that should have been there, which is + identical with the previous version (from tzcode2011i). + + There are just two changes in tzdata2012b compared with 2012a. + + Most significantly, summer time in Cuba has been delayed 3 weeks + (now starts April 1 rather than March 11). Since Mar 11 (the old start + date, as listed in 2012a) is just a little over a week away, this + change is urgent. + + Less importantly, an excess tab in one of the changes in zone.tab + in 2012a has been removed. + + +Release 2012a - 2012-03-01 18:28:10 +0700 + + The changes in tzcode2012a (compared to the previous version, 2011i) + are entirely to the README and tz-art.htm and tz-link.htm files, if + none of those concern you, you can ignore the code update. The changes + reflect the changed addresses for the mailing list and the code and + data distribution points & methods (and a link to DateTime::TimeZone::Tzfile + has been added to tz-link.htm). + + In tzdata2012a (compared to the previous release, which was 2011n) + the major changes are: + Chile 2011/2012 and 2012/2013 summer time date adjustments. + Falkland Islands onto permanent summer time (we're assuming for the + foreseeable future, though 2012 is all we're fairly certain of.) + Armenia has abolished Summer Time. + Tokelau jumped the International Date Line back last December + (just the same as their near neighbour, Samoa). + America/Creston is a new zone for a small area of British Columbia + There will be a leapsecond 2012-06-30 23:59:60 UTC. + + Other minor changes are: + Corrections to 1918 Canadian summer time end dates. + Updated URL for UK time zone history (in comments) + A few typos in Le Corre's list of free French place names (comments) + + +Release data2011n - 2011-10-30 14:57:54 +0700 + + There are three changes of note - most urgently, Cuba (America/Havana) + has extended summer time by two weeks, now to end on Nov 13, rather than + the (already past) Oct 30. Second, the Pridnestrovian Moldavian Republic + (Europe/Tiraspol) decided not to split from the rest of Moldova after + all, and consequently that zone has been removed (again) and reinstated + in the "backward" file as a link to Europe/Chisinau. And third, the + end date for Fiji's summer time this summer was moved forward from the + earlier planned Feb 26, to Jan 22. + + Apart from that, Moldova (MD) returns to a single entry in zone.tab + (and the incorrect syntax that was in the 2011m version of that file + is so fixed - it would have been fixed in a different way had this + change not happened - that's the "missing" sccs version id). + + +Release data2011m - 2011-10-24 21:42:16 +0700 + + In particular, the typos in comments in the data (2011-11-17 should have + been 2011-10-17 as Alan Barrett noted, and spelling of Tiraspol that + Tim Parenti noted) have been fixed, and the change for Ukraine has been + made in all 4 Ukrainian zones, rather than just Kiev (again, thanks to + Tim Parenti, and also Denys Gavrysh) + + In addition, I added Europe/Tiraspol to zone.tab. + + This time, all the files have new version numbers... (including the files + otherwise unchanged in 2011m that were changed in 2011l but didn't get new + version numbers there...) + + +Release data2011l - 2011-10-10 11:15:43 +0700 + + There are just 2 changes that cause different generated tzdata files from + zic, to Asia/Hebron and Pacific/Fiji - the possible change for Bahia, Brazil + is included, but commented out. Compared with the diff I sent out last week, + this version also includes attributions for the sources for the changes + (in much the same format as ado used, but the html tags have not been + checked, verified, or used in any way at all, so if there are errors there, + please let me know.) + + +Release data2011k - 2011-09-20 17:54:03 -0400 + + [not summarized] + + +Release data2011j - 2011-09-12 09:22:49 -0400 + + (contemporary changes for Samoa; past changes for Kenya, Uganda, and + Tanzania); there are also two spelling corrections to comments in + the australasia file (with thanks to Christos Zoulas). + + +Release 2011i - 2011-08-29 05:56:32 -0400 + + [not summarized] + + +Release data2011h - 2011-06-15 18:41:48 -0400 + + Russia and Curaçao changes + + +Release 2011g - 2011-04-25 09:07:22 -0400 + + update the rules for Egypt to reflect its abandonment of DST this year + + +Release 2011f - 2011-04-06 17:14:53 -0400 + + [not summarized] + + +Release 2011e - 2011-03-31 16:04:38 -0400 + + Morocco, Chile, and tz-link changes + + +Release 2011d - 2011-03-14 09:18:01 -0400 + + changes that impact present-day timestamps in Cuba, Samoa, and Turkey + + +Release 2011c - 2011-03-07 09:30:09 -0500 + + These do affect current timestamps in Chile and Annette Island, Canada. + + +Release 2011b - 2011-02-07 08:44:50 -0500 + + [not summarized] + + +Release 2011a - 2011-01-24 10:30:16 -0500 + + [not summarized] + + +Release data2010o - 2010-11-01 09:18:23 -0400 + + change to the end of DST in Fiji in 2011 + + +Release 2010n - 2010-10-25 08:19:17 -0400 + + [not summarized] + + +Release 2010m - 2010-09-27 09:24:48 -0400 + + Hong Kong, Vostok, and zic.c changes + + +Release 2010l - 2010-08-16 06:57:25 -0400 + + [not summarized] + + +Release 2010k - 2010-07-26 10:42:27 -0400 + + [not summarized] + + +Release 2010j - 2010-05-10 09:07:48 -0400 + + changes for Bahía de Banderas and for version naming + + +Release data2010i - 2010-04-16 18:50:45 -0400 + + the end of DST in Morocco on 2010-08-08 + + +Release data2010h - 2010-04-05 09:58:56 -0400 + + [not summarized] + + +Release data2010g - 2010-03-24 11:14:53 -0400 + + [not summarized] + + +Release 2010f - 2010-03-22 09:45:46 -0400 + + [not summarized] + + +Release data2010e - 2010-03-08 14:24:27 -0500 + + corrects the Dhaka bug found by Danvin Ruangchan + + +Release data2010d - 2010-03-06 07:26:01 -0500 + + [not summarized] + + +Release 2010c - 2010-03-01 09:20:58 -0500 + + changes including KRE's suggestion for earlier initialization of + "goahead" and "goback" structure elements + + +Release code2010a - 2010-02-16 10:40:04 -0500 + + [not summarized] + + +Release data2010b - 2010-01-20 12:37:01 -0500 + + Mexico changes + + +Release data2010a - 2010-01-18 08:30:04 -0500 + + changes to Dhaka + + +Release data2009u - 2009-12-26 08:32:28 -0500 + + changes to DST in Bangladesh + + +Release 2009t - 2009-12-21 13:24:27 -0500 + + [not summarized] + + +Release data2009s - 2009-11-14 10:26:32 -0500 + + (cosmetic) Antarctica change and the DST-in-Fiji-in-2009-and-2010 change + + +Release 2009r - 2009-11-09 10:10:31 -0500 + + "antarctica" and "tz-link.htm" changes + + +Release 2009q - 2009-11-02 09:12:40 -0500 + + with two corrections as reported by Eric Muller and Philip Newton + + +Release data2009p - 2009-10-23 15:05:27 -0400 + + Argentina (including San Luis) changes (with the correction from + Mariano Absatz) + + +Release data2009o - 2009-10-14 16:49:38 -0400 + + Samoa (commentary only), Pakistan, and Bangladesh changes + + +Release data2009n - 2009-09-22 15:13:38 -0400 + + added commentary for Argentina and a change to the end of DST in + 2009 in Pakistan + + +Release data2009m - 2009-09-03 10:23:43 -0400 + + Samoa and Palestine changes + + +Release data2009l - 2009-08-14 09:13:07 -0400 + + Samoa (comments only) and Egypt + + +Release 2009k - 2009-07-20 09:46:08 -0400 + + [not summarized] + + +Release data2009j - 2009-06-15 06:43:59 -0400 + + Bangladesh change (with a short turnaround since the DST change is + impending) + + +Release 2009i - 2009-06-08 09:21:22 -0400 + + updating for DST in Bangladesh this year + + +Release 2009h - 2009-05-26 09:19:14 -0400 + + [not summarized] + + +Release data2009g - 2009-04-20 16:34:07 -0400 + + Cairo + + +Release data2009f - 2009-04-10 11:00:52 -0400 + + correct DST in Pakistan + + +Release 2009e - 2009-04-06 09:08:11 -0400 + + [not summarized] + + +Release 2009d - 2009-03-23 09:38:12 -0400 + + Morocco, Tunisia, Argentina, and American Astronomical Society changes + + +Release data2009c - 2009-03-16 09:47:51 -0400 + + change to the start of Cuban DST + + +Release 2009b - 2009-02-09 11:15:22 -0500 + + [not summarized] + + +Release 2009a - 2009-01-21 10:09:39 -0500 + + [not summarized] + + +Release data2008i - 2008-10-21 12:10:25 -0400 + + southamerica and zone.tab files, with Argentina DST rule changes and + United States zone reordering and recommenting + + +Release 2008h - 2008-10-13 07:33:56 -0400 + + [not summarized] + + +Release 2008g - 2008-10-06 09:03:18 -0400 + + Fix a broken HTML anchor and update Brazil's DST transitions; + there's also a slight reordering of information in tz-art.htm. + + +Release data2008f - 2008-09-09 22:33:26 -0400 + + [not summarized] + + +Release 2008e - 2008-07-28 14:11:17 -0400 + + changes by Arthur David Olson and Jesper Nørgaard Welen + + +Release data2008d - 2008-07-07 09:51:38 -0400 + + changes by Arthur David Olson, Paul Eggert, and Rodrigo Severo + + +Release data2008c - 2008-05-19 17:48:03 -0400 + + Pakistan, Morocco, and Mongolia + + +Release data2008b - 2008-03-24 08:30:59 -0400 + + including renaming Asia/Calcutta to Asia/Kolkata, with a backward + link provided + + +Release 2008a - 2008-03-08 05:42:16 -0500 + + [not summarized] + + +Release 2007k - 2007-12-31 10:25:22 -0500 + + most importantly, changes to the "southamerica" file based on + Argentina's readoption of daylight saving time + + +Release 2007j - 2007-12-03 09:51:01 -0500 + + 1. eliminate the "P" (parameter) macro; + + 2. the "noncontroversial" changes circulated on the time zone + mailing list (less the changes to "logwtmp.c"); + + 3. eliminate "too many transition" errors when "min" is used in time + zone rules; + + 4. changes by Paul Eggert (including updated information for Venezuela). + + +Release data2007i - 2007-10-30 10:28:11 -0400 + + changes for Cuba and Syria + + +Release 2007h - 2007-10-01 10:05:51 -0400 + + changes by Paul Eggert, as well as an updated link to the ICU + project in tz-link.htm + + +Release 2007g - 2007-08-20 10:47:59 -0400 + + changes by Paul Eggert + + The "leapseconds" file has been updated to incorporate the most + recent International Earth Rotation and Reference Systems Service + (IERS) bulletin. + + There's an addition to tz-art.htm regarding the television show "Medium". + + +Release 2007f - 2007-05-07 10:46:46 -0400 + + changes by Paul Eggert (including Haiti, Turks and Caicos, and New + Zealand) + + changes to zic.c to allow hour values greater than 24 (along with + Paul's improved time value overflow checking) + + +Release 2007e - 2007-04-02 10:11:52 -0400 + + Syria and Honduras changes by Paul Eggert + + zic.c variable renaming changes by Arthur David Olson + + +Release 2007d - 2007-03-20 08:48:30 -0400 + + changes by Paul Eggert + + the elimination of white space at the ends of lines + + +Release 2007c - 2007-02-26 09:09:37 -0500 + + changes by Paul Eggert + + +Release 2007b - 2007-02-12 09:34:20 -0500 + + Paul Eggert's proposed change to the quotation handling logic in zic.c. + + changes to the commentary in "leapseconds" reflecting the IERS + announcement that there is to be no positive leap second at the end + of June 2007. + + +Release 2007a - 2007-01-08 12:28:29 -0500 + + changes by Paul Eggert + + Derick Rethans's Asmara change + + Oscar van Vlijmen's Easter Island local mean time change + + symbolic link changes + + +Release 2006p - 2006-11-27 08:54:27 -0500 + + changes by Paul Eggert + + +Release 2006o - 2006-11-06 09:18:07 -0500 + + changes by Paul Eggert + + +Release 2006n - 2006-10-10 11:32:06 -0400 + + changes by Paul Eggert + + +Release 2006m - 2006-10-02 15:32:35 -0400 + + changes for Uruguay, Palestine, and Egypt by Paul Eggert + + (minimalist) changes to zic.8 to clarify "until" information + + +Release data2006l - 2006-09-18 12:58:11 -0400 + + Paul's best-effort work on this coming weekend's Egypt time change + + +Release 2006k - 2006-08-28 12:19:09 -0400 + + changes by Paul Eggert + + +Release 2006j - 2006-08-21 09:56:32 -0400 + + changes by Paul Eggert + + +Release code2006i - 2006-08-07 12:30:55 -0400 + + localtime.c fixes + + Ken Pizzini's conversion script + + +Release code2006h - 2006-07-24 09:19:37 -0400 + + adds public domain notices to four files + + includes a fix for transition times being off by a second + + adds a new recording to the "arts" file (information courtesy Colin Bowern) + + +Release 2006g - 2006-05-08 17:18:09 -0400 + + northamerica changes by Paul Eggert + + +Release 2006f - 2006-05-01 11:46:00 -0400 + + a missing version number problem is fixed (with thanks to Bradley + White for catching the problem) + + +Release 2006d - 2006-04-17 14:33:43 -0400 + + changes by Paul Eggert + + added new items to tz-arts.htm that were found by Paul + + +Release 2006c - 2006-04-03 10:09:32 -0400 + + two sets of data changes by Paul Eggert + + a fencepost error fix in zic.c + + changes to zic.c and the "europe" file to minimize differences + between output produced by the old 32-bit zic and the new 64-bit + version + + +Release 2006b - 2006-02-20 10:08:18 -0500 + [tz32code2006b + tz64code2006b + tzdata2006b] + + 64-bit code + + All SCCS IDs were bumped to "8.1" for this release. + + +Release 2006a - 2006-01-30 08:59:31 -0500 + + changes by Paul Eggert (in particular, Indiana time zone moves) + + an addition to the zic manual page to describe how special-case + transitions are handled + + +Release 2005r - 2005-12-27 09:27:13 -0500 + + Canadian changes by Paul Eggert + + They also add "
" directives to time zone data files and reflect
+  changes to warning message logic in "zdump.c" (but with calls to
+  "gettext" kept unbundled at the suggestion of Ken Pizzini).
+
+
+Release 2005q - 2005-12-13 09:17:09 -0500
+
+  Nothing earth-shaking here:
+	1.  Electronic mail addresses have been removed.
+	2.  Casts of the return value of exit have been removed.
+	3.  Casts of the argument of is.* macros have been added.
+	4.  Indentation in one section of zic.c has been fixed.
+	5.  References to dead URLs in the data files have been dealt with.
+
+
+Release 2005p - 2005-12-05 10:30:53 -0500
+
+  "systemv", "tz-link.htm", and "zdump.c" changes
+  (less the casts of arguments to the is* macros)
+
+
+Release 2005o - 2005-11-28 10:55:26 -0500
+
+  Georgia, Cuba, Nicaragua, and Jordan changes by Paul Eggert
+
+  zdump.c lint fixes by Arthur David Olson
+
+
+Release 2005n - 2005-10-03 09:44:09 -0400
+
+  changes by Paul Eggert (both the Uruguay changes and the Kyrgyzstan
+  et al. changes)
+
+
+Release 2005m - 2005-08-29 12:15:40 -0400
+
+  changes by Paul Eggert (with a small tweak to the tz-art change)
+
+  a declaration of an unused variable has been removed from zdump.c
+
+
+Release 2005l - 2005-08-22 12:06:39 -0400
+
+  changes by Paul Eggert
+
+  overflow/underflow checks by Arthur David Olson, minus changes to
+  the "Theory" file about the pending addition of 64-bit data (I grow
+  less confident of the changes being accepted with each passing day,
+  and the changes no longer increase the data files nine-fold--there's
+  less than a doubling in size by my local Sun's reckoning)
+
+
+Release 2005k - 2005-07-14 14:14:24 -0400
+
+  The "leapseconds" file has been edited to reflect the recently
+  announced leap second at the end of 2005.
+
+  I've also deleted electronic mail addresses from the files as an
+  anti-spam measure.
+
+
+Release 2005j - 2005-06-13 14:34:13 -0400
+
+  These reflect changes to limit the length of time zone abbreviations
+  and the characters used in those abbreviations.
+
+  There are also changes to handle POSIX-style "quoted" timezone
+  environment variables.
+
+  The changes were circulated on the time zone mailing list; the only
+  change since then was the removal of a couple of minimum-length of
+  abbreviation checks.
+
+
+Release data2005i - 2005-04-21 15:04:16 -0400
+
+  changes (most importantly to Nicaragua and Haiti) by Paul Eggert
+
+
+Release 2005h - 2005-04-04 11:24:47 -0400
+
+  changes by Paul Eggert
+
+  minor changes to Makefile and zdump.c to produce more useful output
+  when doing a "make typecheck"
+
+
+Release 2005g - 2005-03-14 10:11:21 -0500
+
+  changes by Paul Eggert (a change to current DST rules in Uruguay and
+  an update to a link to time zone software)
+
+
+Release 2005f - 2005-03-01 08:45:32 -0500
+
+  data and documentation changes by Paul Eggert
+
+
+Release 2005e - 2005-02-10 15:59:44 -0500
+
+  [not summarized]
+
+
+Release code2005d - 2005-01-31 09:21:47 -0500
+
+  make zic complain about links to links if the -v flag is used
+
+  have "make public" do more code checking
+
+  add an include to "localtime.c" for the benefit of gcc systems
+
+
+Release 2005c - 2005-01-17 18:36:29 -0500
+
+  get better results when mktime runs on a system where time_t is double
+
+  changes to the data files (most importantly to Paraguay)
+
+
+Release 2005b - 2005-01-10 09:19:54 -0500
+
+  Get localtime and gmtime working on systems with exotic time_t types.
+
+  Update the leap second commentary in the "leapseconds" file.
+
+
+Release 2005a - 2005-01-01 13:13:44 -0500
+
+  [not summarized]
+
+
+Release code2004i - 2004-12-14 13:42:58 -0500
+
+  Deal with systems where time_t is unsigned.
+
+
+Release code2004h - 2004-12-07 11:40:18 -0500
+
+  64-bit-time_t changes
+
+
+Release 2004g - 2004-11-02 09:06:01 -0500
+
+  update to Cuba (taking effect this weekend)
+
+  other changes by Paul Eggert
+
+  correction of the spelling of Oslo
+
+  changed versions of difftime.c and private.h
+
+
+Release code2004f - 2004-10-21 10:25:22 -0400
+
+  Cope with wide-ranging tm_year values.
+
+
+Release 2004e - 2004-10-11 14:47:21 -0400
+
+  Brazil/Argentina/Israel changes by Paul Eggert
+
+  changes to tz-link.htm by Paul
+
+  one small fix to Makefile
+
+
+Release 2004d - 2004-09-22 08:27:29 -0400
+
+  Avoid overflow problems when TM_YEAR_BASE is added to an integer.
+
+
+Release 2004c - 2004-08-11 12:06:26 -0400
+
+  asctime-related changes
+
+  (variants of) some of the documentation changes suggested by Paul Eggert
+
+
+Release 2004b - 2004-07-19 14:33:35 -0400
+
+  data changes by Paul Eggert - most importantly, updates for Argentina
+
+
+Release 2004a - 2004-05-27 12:00:47 -0400
+
+  changes by Paul Eggert
+
+  Handle DST transitions that occur at the end of a month in some
+  years but at the start of the following month in other years.
+
+  Add a copy of the correspondence that's the basis for claims about
+  DST in the Navajo Nation.
+
+
+Release 2003e - 2003-12-15 09:36:47 -0500
+
+  changes by Arthur David Olson (primarily code changes)
+
+  changes by Paul Eggert (primarily data changes)
+
+  minor changes to "Makefile" and "northamerica" (in the latter case,
+  optimization of the "Toronto" rules)
+
+
+Release 2003d - 2003-10-06 09:34:44 -0400
+
+  changes by Paul Eggert
+
+
+Release 2003c - 2003-09-16 10:47:05 -0400
+
+  Fix bad returns in zic.c's inleap function.
+  Thanks to Bradley White for catching the problem!
+
+
+Release 2003b - 2003-09-16 07:13:44 -0400
+
+  Add a "--version" option (and documentation) to the zic and zdump commands.
+
+  changes to overflow/underflow checking in zic
+
+  a localtime typo fix.
+
+  Update the leapseconds and tz-art.htm files.
+
+
+Release 2003a - 2003-03-24 09:30:54 -0500
+
+  changes by Paul Eggert
+
+  a few additions and modifications to the tz-art.htm file
+
+
+Release 2002d - 2002-10-15 13:12:42 -0400
+
+  changes by Paul Eggert, less the "Britain (UK)" change in iso3166.tab
+
+  There's also a new time zone quote in "tz-art.htm".
+
+
+Release 2002c - 2002-04-04 11:55:20 -0500
+
+  changes by Paul Eggert
+
+  Change zic.c to avoid creating symlinks to files that don't exist.
+
+
+Release 2002b - 2002-01-28 12:56:03 -0500
+
+  [These change notes are for Release 2002a, which was corrupted.
+  2002b was a corrected version of 2002a.]
+
+  changes by Paul Eggert
+
+  Update the "leapseconds" file to note that there'll be no leap
+  second at the end of June, 2002.
+
+  Change "zic.c" to deal with a problem in handling the "Asia/Bishkek" zone.
+
+  Change to "difftime.c" to avoid sizeof problems.
+
+
+Release 2001d - 2001-10-09 13:31:32 -0400
+
+  changes by Paul Eggert
+
+
+Release 2001c - 2001-06-05 13:59:55 -0400
+
+  changes by Paul Eggert and Andrew Brown
+
+
+Release 2001b - 2001-04-05 16:44:38 -0400
+
+  changes by Paul Eggert (modulo jnorgard's typo fix)
+
+  tz-art.htm has been HTMLified.
+
+
+Release 2001a - 2001-03-13 12:57:44 -0500
+
+  changes by Paul Eggert
+
+  An addition to the "leapseconds" file: comments with the text of the
+  latest IERS leap second notice.
+
+  Trailing white space has been removed from data file lines, and
+  repeated spaces in "Rule Jordan" lines in the "asia" file have been
+  converted to tabs.
+
+
+Release 2000h - 2000-12-14 15:33:38 -0500
+
+  changes by Paul Eggert
+
+  one typo fix in the "art" file
+
+  With providence, this is the last update of the millennium.
+
+
+Release 2000g - 2000-10-10 11:35:22 -0400
+
+  changes by Paul Eggert
+
+  correction of John Mackin's name submitted by Robert Elz
+
+  Garry Shandling's Daylight Saving Time joke (!?!) from the recent
+  Emmy Awards broadcast.
+
+
+Release 2000f - 2000-08-10 09:31:58 -0400
+
+  changes by Paul Eggert
+
+  Added information in "tz-art.htm" on a Seinfeld reference to DST.
+
+  Error checking and messages in the "yearistype" script have been
+  improved.
+
+
+Release 2000e - 2000-07-31 09:27:54 -0400
+
+  data changes by Paul Eggert
+
+  a change to the default value of the defined constant HAVE_STRERROR
+
+  the addition of a Dave Barry quote on DST to the tz-arts file
+
+
+Release 2000d - 2000-04-20 15:43:04 -0400
+
+  changes to the documentation and code of strftime for C99 conformance
+
+  a bug fix for date.c
+
+  These are based on (though modified from) changes by Paul Eggert.
+
+
+Release 2000c - 2000-03-04 10:31:43 -0500
+
+  changes by Paul Eggert
+
+
+Release 2000b - 2000-02-21 12:16:29 -0500
+
+  changes by Paul Eggert and Joseph Myers
+
+  modest tweaks to the tz-art.htm and tz-link.htm files
+
+
+Release 2000a - 2000-01-18 09:21:26 -0500
+
+  changes by Paul Eggert
+
+  The two hypertext documents have also been renamed.
+
+
+Release code1999i-data1999j - 1999-11-15 18:43:22 -0500
+
+  Paul Eggert's changes
+
+  additions to the "zic" manual page and the "Arts.htm" file
+
+
+Release code1999h-data1999i - 1999-11-08 14:55:21 -0500
+
+  [not summarized]
+
+
+Release data1999h - 1999-10-07 03:50:29 -0400
+
+  changes by Paul Eggert to "europe" (most importantly, fixing
+  Lithuania and Estonia)
+
+
+Release 1999g - 1999-09-28 11:06:18 -0400
+
+  data changes by Paul Eggert (most importantly, the change for
+  Lebanon that buys correctness for this coming Sunday)
+
+  The "code" file contains changes to "Makefile" and "checktab.awk" to
+  allow better checking of time zone files before they are published.
+
+
+Release 1999f - 1999-09-23 09:48:14 -0400
+
+  changes by Arthur David Olson and Paul Eggert
+
+
+Release 1999e - 1999-08-17 15:20:54 -0400
+
+  changes circulated by Paul Eggert, although the change to handling
+  of DST-specifying timezone names has been commented out for now
+  (search for "XXX" in "localtime.c" for details).  These files also
+  do not make any changes to the start of DST in Brazil.
+
+  In addition to Paul's changes, there are updates to "Arts.htm" and
+  cleanups of URLs.
+
+
+Release 1999d - 1999-03-30 11:31:07 -0500
+
+  changes by Paul Eggert
+
+  The Makefile's "make public" rule has also been changed to do a test
+  compile of each individual time zone data file (which should help
+  avoid problems such as the one we had with Nicosia).
+
+
+Release 1999c - 1999-03-25 09:47:47 -0500
+
+  changes by Paul Eggert, most importantly the change for Chile.
+
+
+Release 1999b - 1999-02-01 17:51:44 -0500
+
+  changes by Paul Eggert
+
+  code changes (suggested by Mani Varadarajan, mani at be.com) for
+  correct handling of symbolic links when building using a relative directory
+
+  code changes to generate correct messages for failed links
+
+  updates to the URLs in Arts.htm
+
+
+Release 1999a - 1999-01-19 16:20:29 -0500
+
+  error message internationalizations and corrections in zic.c and
+  zdump.c (as suggested by Vladimir Michl, vladimir.michl at upol.cz,
+  to whom thanks!)
+
+
+Release code1998h-data1998i - 1998-10-01 09:56:10 -0400
+
+  changes for Brazil, Chile, and Germany
+
+  support for use of "24:00" in the input files for the time zone compiler
+
+
+Release code1998g-data1998h - 1998-09-24 10:50:28 -0400
+
+  changes by Paul Eggert
+
+  correction to a define in the "private.h" file
+
+
+Release data1998g - 1998-08-11 03:28:35 -0000
+  [tzdata1998g.tar.gz is missing!]
+
+  Lithuanian change provided by mgedmin at pub.osf.it
+
+  Move creation of the GMT link with Etc/GMT to "etcetera" (from
+  "backward") to ensure that the GMT file is created even where folks
+  don't want the "backward" links (as suggested by Paul Eggert).
+
+
+Release data1998f - 1998-07-20 13:50:00 -0000
+  [tzdata1998f.tar.gz is missing!]
+
+  Update the "leapseconds" file to include the newly announced
+  insertion at the end of 1998.
+
+
+Release code1998f - 1998-06-01 10:18:31 -0400
+
+  addition to localtime.c by Guy Harris
+
+
+Release 1998e - 1998-05-28 09:56:26 -0400
+
+  The Makefile is changed to produce zoneinfo-posix rather than
+  zoneinfo/posix, and to produce zoneinfo-leaps rather than
+  zoneinfo/right.
+
+  data changes by Paul Eggert
+
+  changes from Guy Harris to provide asctime_r and ctime_r
+
+  A usno1998 file (substantially identical to usno1997) has been added.
+
+
+Release 1998d - 1998-05-14 11:58:34 -0400
+
+  changes to comments (in particular, elimination of references to CIA maps).
+  "Arts.htm", "WWW.htm", "asia", and "australasia" are the only places
+  where changes occur.
+
+
+Release 1998c - 1998-02-28 12:32:26 -0500
+
+  changes by Paul Eggert (save the "French correction," on which I'll
+  wait for the dust to settle)
+
+  symlink changes
+
+  changes and additions to Arts.htm
+
+
+Release 1998b - 1998-01-17 14:31:51 -0500
+
+  URL cleanups and additions
+
+
+Release 1998a - 1998-01-13 12:37:35 -0500
+
+  changes by Paul Eggert
+
+
+Release code1997i-data1997k - 1997-12-29 09:53:41 -0500
+
+  changes by Paul Eggert, with minor modifications from Arthur David
+  Olson to make the files more browser friendly
+
+
+Release code1997h-data1997j - 1997-12-18 17:47:35 -0500
+
+  minor changes to put "TZif" at the start of each timezone information file
+
+  a rule has also been added to the Makefile so you can
+	make zones
+  to just recompile the zone information files (rather than doing a
+  full "make install" with its other effects).
+
+
+Release data1997i - 1997-10-07 08:45:38 -0400
+
+  changes to Africa by Paul Eggert
+
+
+Release code1997g-data1997h - 1997-09-04 16:56:54 -0400
+
+  corrections for Uruguay (and other locations)
+
+  Arthur David Olson's simple-minded fix allowing mktime to both
+  correctly handle leap seconds and correctly handle tm_sec values
+  upon which arithmetic has been performed.
+
+
+Release code1997f-data1997g - 1997-07-19 13:15:02 -0400
+
+  Paul Eggert's updates
+
+  a small change to a function prototype;
+
+  "Music" has been renamed "Arts.htm", HTMLified, and augmented to
+  include information on Around the World in Eighty Days.
+
+
+Release code1997e-data1997f - 1997-05-03 18:52:34 -0400
+
+  fixes to zic's error handling
+
+  changes inspired by the item circulated on Slovenia
+
+  The description of Web resources has been HTMLified for browsing
+  convenience.
+
+  A new piece of tz-related music has been added to the "Music" file.
+
+
+Release code1997d-data1997e - 1997-03-29 12:48:52 -0500
+
+  Paul Eggert's latest suggestions
+
+
+Release code1997c-data1997d - 1997-03-07 20:37:54 -0500
+
+  changes to "zic.c" to correct performance of the "-s" option
+
+  a new file "usno1997"
+
+
+Release data1997c - 1997-03-04 09:58:18 -0500
+
+  changes in Israel
+
+
+Release 1997b - 1997-02-27 18:34:19 -0500
+
+  The data file incorporates the 1997 leap second.
+
+  The code file incorporates Arthur David Olson's take on the
+  zic/multiprocessor/directory-creation situation.
+
+
+Release 1997a - 1997-01-21 09:11:10 -0500
+
+  Paul Eggert's Antarctica (and other changes)
+
+  Arthur David Olson finessed the "getopt" issue by checking against
+  both -1 and EOF (regardless of POSIX, SunOS 4.1.1's manual says -1
+  is returned while SunOS 5.5's manual says EOF is returned).
+
+
+Release code1996o-data1996n - 1996-12-27 21:42:05 -0500
+
+  Paul Eggert's latest changes
+
+
+Release code1996n - 1996-12-16 09:42:02 -0500
+
+  link snapping fix from Bruce Evans (via Garrett Wollman)
+
+
+Release data1996m - 1996-11-24 02:37:34 -0000
+  [tzdata1996m.tar.gz is missing!]
+
+  Paul Eggert's batch of changes
+
+
+Release code1996m-data1996l - 1996-11-05 14:00:12 -0500
+
+  No functional changes here; the files have simply been changed to
+  make more use of ISO style dates in comments. The names of the above
+  files now include the year in full.
+
+
+Release code96l - 1996-09-08 17:12:20 -0400
+
+  tzcode96k was missing a couple of pieces.
+
+
+Release 96k - 1996-09-08 16:06:22 -0400
+
+  the latest round of changes from Paul Eggert
+
+  the recent Year 2000 material
+
+
+Release code96j - 1996-07-30 13:18:53 -0400
+
+  Set sp->typecnt as suggested by Timothy Patrick Murphy.
+
+
+Release code96i - 1996-07-27 20:11:35 -0400
+
+  Paul's suggested patch for strftime %V week numbers
+
+
+Release data96i - 1996-07-01 18:13:04 -0400
+
+  "northamerica" and "europe" changes by Paul Eggert
+
+
+Release code96h - 1996-06-05 08:02:21 -0400
+
+  fix for handling transitions specified in Universal Time
+
+  Some "public domain" notices have also been added.
+
+
+Release code96g - 1996-05-16 14:00:26 -0400
+
+  fix for the simultaneous-DST-and-zone-change challenge
+
+
+Release data96h - 1996-05-09 17:40:51 -0400
+
+  changes by Paul Eggert
+
+
+Release code96f-data96g - 1996-05-03 03:09:59 -0000
+  [tzcode96f.tar.gz + tzdata96g.tar.gz are both missing!]
+
+  The changes get us some of the way to fixing the problems noted in Paul
+  Eggert's letter yesterday (in addition to a few others).  The approach
+  has been to make zic a bit smarter about figuring out what time zone
+  abbreviations apply just after the time specified in the "UNTIL" part
+  of a zone line.  Putting the smarts in zic means avoiding having
+  transition times show up in both "Zone" lines and "Rule" lines, which
+  in turn avoids multiple transition time entries in time zone files.
+  (This also makes the zic input files such as "europe" a bit shorter and
+  should ease maintenance.)
+
+
+Release data96f - 1996-04-19 19:20:03 -0000
+  [tzdata96f.tar.gz is missing!]
+
+  The only changes are to the "northamerica" file; the time zone
+  abbreviation for Denver is corrected to MST (and MDT), and the
+  comments for Mexico have been updated.
+
+
+Release data96e - 1996-03-19 17:37:26 -0500
+
+  Proposals by Paul Eggert, in particular the Portugal change that
+  comes into play at the end of this month.
+
+
+Release data96d - 1996-03-18 20:49:39 -0500
+
+  [not summarized]
+
+
+Release code96e - 1996-02-29 15:43:27 -0000
+  [tzcode96e.tar.gz is missing!]
+
+  internationalization changes and the fix to the documentation for strftime
+
+
+Release code96d-data96c - 1996-02-12 11:05:27 -0500
+
+  The "code" file simply updates Bob Kridle's electronic address.
+
+  The "data" file updates rules for Mexico.
+
+
+Release data96b - 1996-01-27 15:44:42 -0500
+
+  Kiribati change
+
+
+Release code96c - 1996-01-16 16:58:15 -0500
+
+  leap-year streamlining and binary-search changes
+
+  fix to newctime.3
+
+
+Release code96b - 1996-01-10 20:42:39 -0500
+
+  fixes and enhancements from Paul Eggert, including code that
+  emulates the behavior of recent versions of the SunOS "date"
+  command.
+
+
+Release 96a - 1996-01-06 09:08:24 -0500
+
+  Israel updates
+
+  fixes to strftime.c for correct ISO 8601 week number generation,
+  plus support for two new formats ('G' and 'g') to give ISO 8601 year
+  numbers (which are not necessarily the same as calendar year numbers)
+
+
+Release code95i-data95m - 1995-12-21 12:46:47 -0500
+
+  The latest revisions from Paul Eggert are included, the usno1995
+  file has been updated, and a new file ("WWW") covering useful URLs
+  has been added.
+
+
+Release code95h-data95l - 1995-12-19 18:10:12 -0500
+
+  A simplification of a macro definition, a change to data for Sudan,
+  and (for last minute shoppers) notes in the "Music" file on the CD
+  "Old Man Time".
+
+
+Release code95g-data95k - 1995-10-30 10:32:47 -0500
+
+  (slightly reformatted) 8-bit-clean proposed patch
+
+  minor patch: US/Eastern -> America/New_York
+
+  snapshot of the USNO's latest data ("usno1995")
+
+  some other minor cleanups
+
+
+Release code95f-data95j - 1995-10-28 21:01:34 -0000
+  [tzcode95f.tar.gz + tzdata95j.tar.gz are both missing!]
+
+  European cleanups
+
+  support for 64-bit time_t's
+
+  optimization in localtime.c
+
+
+Release code95e - 1995-10-13 13:23:57 -0400
+
+  the mktime change to scan from future to past when trying to find time zone
+  offsets
+
+
+Release data95i - 1995-09-26 10:43:26 -0400
+
+  For Canada/Central, guess that the Sun customer's "one week too
+  early" was just a approximation, and the true error is one month
+  too early.  This is consistent with the rest of Canada.
+
+
+Release data95h - 1995-09-21 11:26:48 -0400
+
+  latest changes from Paul Eggert
+
+
+Release code95d - 1995-09-14 11:14:45 -0400
+
+  the addition of a "Music" file, which documents four recorded
+  versions of the tune "Save That Time".
+
+
+Release data95g - 1995-09-01 17:21:36 -0400
+
+  "yearistype" correction
+
+
+Release data95f - 1995-08-28 20:46:56 -0400
+
+  Paul Eggert's change to the australasia file
+
+
+Release data95e - 1995-07-08 18:02:34 -0400
+
+  The only change is a leap second at the end of this year.
+  Thanks to Bradley White for forwarding news on the leap second.
+
+
+Release data95d - 1995-07-03 13:26:22 -0400
+
+  Paul Eggert's changes
+
+
+Release data95c - 1995-07-02 19:19:28 -0400
+
+  changes to "asia", "backward", "europe", and "southamerica"
+  (read: northamericacentrics need not apply)
+
+
+Release code95c - 1995-03-13 14:00:46 -0500
+
+  one-line fix for sign extension problems in detzcode
+
+
+Release 95b - 1995-03-04 11:22:38 -0500
+
+  Minor changes in both:
+
+  The "code" file contains a workaround for the lack of "unistd.h" in
+  Microsoft C++ version 7.
+
+  The "data" file contains a fixed "Link" for America/Shiprock.
+
+
+Release 94h - 1994-12-10 12:51:14 -0500
+
+  The files:
+
+  *	incorporate the changes to "zdump" and "date" to make changes to
+	the "TZ" environment variable permanent;
+
+  *	incorporate the table changes by Paul Eggert;
+
+  *	include (and document) support for universal time specifications in
+	data files - but do not (yet) include use of this feature in the
+	data files.
+
+  Think of this as "TZ Classic" - the software has been set up not to break if
+  universal time shows up in its input, and data entries have been
+  left as is so as not to break existing implementations.
+
+
+Release data94f - 1994-08-20 12:56:09 -0400
+
+  (with thanks!) the latest data updates from Paul Eggert
+
+
+Release data94e - 1994-06-04 13:13:53 -0400
+
+  [not summarized]
+
+
+Release code94g - 1994-05-05 12:14:07 -0400
+
+  fix missing "optind.c" and a reference to it in the Makefile
+
+
+Release code94f - 1994-05-05 13:00:33 -0000
+  [tzcode94f.tar.gz is missing!]
+
+  changes to avoid overflow in difftime, as well as changes to cope
+  with the 52/53 challenge in strftime
+
+
+Release code94e - 1994-03-30 23:32:59 -0500
+
+  change for the benefit of PCTS
+
+
+Release 94d - 1994-02-24 15:42:25 -0500
+
+  Avoid clashes with POSIX semantics for zones such as GMT+4.
+
+  Some other very minor housekeeping is also present.
+
+
+Release code94c - 1994-02-10 08:52:40 -0500
+
+  Fix bug where mkdirs was broken unless you compile with
+  -fwritable-strings (which is generally losing to do).
+
+
+Release 94b - 1994-02-07 10:04:33 -0500
+
+  work by Paul Eggert who notes:
+
+  I found another book of time zone histories by E W Whitman; it's not
+  as extensive as Shanks but has a few goodies of its own.  I used it
+  to update the tables.  I also fixed some more as a result of
+  correspondence with Adam David and Peter Ilieve, and move some stray
+  links from 'europe' to 'backward'.  I corrected some scanning errors
+  in usno1989.
+
+  As far as the code goes, I fixed zic to allow years in the range
+  INT_MIN to INT_MAX; this fixed a few boundary conditions around 1900.
+  And I cleaned up the zic documentation a little bit.
+
+
+Release data94a - 1994-02-03 08:58:54 -0500
+
+  It simply incorporates the recently announced leap second into the
+  "leapseconds" file.
+
+
+Release 93g - 1993-11-22 17:28:27 -0500
+
+  Paul Eggert has provided a good deal of historic information (based
+  on Shanks), and there are some code changes to deal with the buglets
+  that crawled out in dealing with the new information.
+
+
+Release 93f - 1993-10-15 12:27:46 -0400
+
+  Paul Eggert's changes
+
+
+Release 93e - 1993-09-05 21:21:44 -0400
+
+  This has updated data for Israel, England, and Kwajalein.  There's
+  also an update to "zdump" to cope with Kwajalein's 24-hour jump.
+  Thanks to Paul Eggert and Peter Ilieve for the changes.
+
+
+Release 93d - 1993-06-17 23:34:17 -0400
+
+  new fix and new data on Israel
+
+
+Release 93c - 1993-06-06 19:31:55 -0400
+
+  [not summarized]
+
+
+Release 93b - 1993-02-02 14:53:58 -0500
+
+  updated "leapseconds" file
+
+
+Release 93 - 1993-01-08 07:01:06 -0500
+
+  At kre's suggestion, the package has been split in two - a code piece
+  (which also includes documentation) that's only of use to folks who
+  want to recompile things and a data piece useful to anyone who can
+  run "zic".
+
+  The new version has a few changes to the data files, a few
+  portability changes, and an off-by-one fix (with thanks to
+  Tom Karzes at deshaw.com for providing a description and a
+  solution).
+
+
+Release 92c - 1992-11-21 17:35:36 -0000
+  [tz92c.tar.Z is missing!]
+
+  The fallout from the latest round of DST transitions.
+
+  There are changes for Portugal, Saskatchewan, and "Pacific-New";
+  there's also a change to "zic.c" that makes it portable to more systems.
+
+
+Release 92 - 1992-04-25 18:17:03 -0000
+  [tz92.tar.Z is missing!]
+
+  By popular demand (well, at any rate, following a request by kre at munnari)
+
+
+The 1989 update of the time zone package featured:
+
+  *	POSIXization (including interpretation of POSIX-style TZ environment
+	variables, provided by Guy Harris),
+  *	ANSIfication (including versions of "mktime" and "difftime"),
+  *	SVIDulation (an "altzone" variable)
+  *	MACHination (the "gtime" function)
+  *	corrections to some time zone data (including corrections to the rules
+	for Great Britain and New Zealand)
+  *	reference data from the United States Naval Observatory for folks who
+	want to do additional time zones
+  *	and the 1989 data for Saudi Arabia.
+
+  (Since this code will be treated as "part of the implementation" in some
+  places and as "part of the application" in others, there's no good way to
+  name functions, such as timegm, that are not part of the proposed ANSI C
+  standard; such functions have kept their old, underscore-free names in this
+  update.)
+
+  And the "dysize" function has disappeared; it was present to allow
+  compilation of the "date" command on old BSD systems, and a version of "date"
+  is now provided in the package.  The "date" command is not created when you
+  "make all" since it may lack options provided by the version distributed with
+  your operating system, or may not interact with the system in the same way
+  the native version does.
+
+  Since POSIX frowns on correct leap second handling, the default behavior of
+  the "zic" command (in the absence of a "-L" option) has been changed to omit
+  leap second information from its output files.
+
+
+-----
+Notes
+
+This file contains copies of the part of each release announcement
+that talks about the changes in that release.  The text has been
+adapted and reformatted for the purposes of this file.
+
+Traditionally a release R consists of a pair of tarball files,
+tzcodeR.tar.gz and tzdataR.tar.gz.  However, some releases (e.g.,
+code2010a, data2012c) consist of just one or the other tarball, and a
+few (e.g., code2012c-data2012d) have tarballs with mixed version
+numbers.  Recent releases also come in an experimental format
+consisting of a single tarball tzdb-R.tar.lz with extra data.
+
+Release timestamps are taken from the release's commit (for newer,
+Git-based releases), from the newest file in the tarball (for older
+releases, where this info is available) or from the email announcing
+the release (if all else fails; these are marked with a time zone
+abbreviation of -0000 and an "is missing!" comment).
+
+Earlier versions of the code and data were not announced on the tz
+list and are not summarized here.
+
+This file is in the public domain.
+
+Local Variables:
+coding: utf-8
+End:
diff --git a/lib-tzcode/README b/lib-tzcode/README
new file mode 100644
index 0000000..280de36
--- /dev/null
+++ b/lib-tzcode/README
@@ -0,0 +1,62 @@
+This library was downloaded from https://www.iana.org/time-zones from the link
+tzcode2023c.tar.gz (286.0kb).
+
+Modifications are as follows:
+ * Makefile has addition definitions and targets in a single commented section
+ * Overridable tz_alloc and tz_free function pointers are added to localtime.c
+ * tzcode.h is added to provide a minimal public interface
+ * test/test-tzcode.c is added to test the library
+
+============================== Original ======================================
+README for the tz distribution
+
+"Where do I set the hands of the clock?" -- Les Tremayne as The King
+"Oh that--you can set them any place you want." -- Frank Baxter as The Scientist
+					(from the Bell System film "About Time")
+
+The Time Zone Database (called tz, tzdb or zoneinfo) contains code and
+data that represent the history of local time for many representative
+locations around the globe.  It is updated periodically to reflect
+changes made by political bodies to time zone boundaries, UTC offsets,
+and daylight-saving rules.
+
+See  or the
+file tz-link.html for how to acquire the code and data.  Once acquired,
+read the comments in the file 'Makefile' and make any changes needed
+to make things right for your system, especially if you are using some
+platform other than GNU/Linux.  Then run the following commands,
+substituting your desired installation directory for "$HOME/tzdir":
+
+	make TOPDIR=$HOME/tzdir install
+	$HOME/tzdir/usr/bin/zdump -v America/Los_Angeles
+
+See the file tz-how-to.html for examples of how to read the data files.
+
+This database of historical local time information has several goals:
+
+ * Provide a compendium of data about the history of civil time that
+   is useful even if not 100% accurate.
+
+ * Give an idea of the variety of local time rules that have existed
+   in the past and thus may be expected in the future.
+
+ * Test the generality of the local time rule description system.
+
+The information in the time zone data files is by no means authoritative;
+fixes and enhancements are welcome.  Please see the file CONTRIBUTING
+for details.
+
+Thanks to these Time Zone Caballeros who've made major contributions to the
+time conversion package: Keith Bostic; Bob Devine; Paul Eggert; Robert Elz;
+Guy Harris; Mark Horton; John Mackin; and Bradley White.  Thanks also to
+Michael Bloom, Art Neilson, Stephen Prince, John Sovereign, and Frank Wales
+for testing work, and to Gwillim Law for checking local mean time data.
+Thanks in particular to Arthur David Olson, the project's founder and first
+maintainer, to whom the time zone community owes the greatest debt of all.
+None of them are responsible for remaining errors.
+
+-----
+
+This file is in the public domain, so clarified as of 2009-05-17 by
+Arthur David Olson.  The other files in this distribution are either
+public domain or BSD licensed; see the file LICENSE for details.
diff --git a/lib-tzcode/SECURITY b/lib-tzcode/SECURITY
new file mode 100644
index 0000000..40128bc
--- /dev/null
+++ b/lib-tzcode/SECURITY
@@ -0,0 +1,15 @@
+Please report any sensitive security-related bugs via email to the
+tzdb designated coordinators, currently Paul Eggert
+ and Tim Parenti .
+Put "tzdb security" at the start of your email's subject line.
+We prefer communications to be in English.
+
+You should receive a response within a week. If not, please follow up
+via email to make sure we received your original message.
+
+If we confirm the bug, we plan to notify affected third-party services
+or software that we know about, prepare an advisory, commit fixes to
+the main development branch as quickly as is practical, and finally
+publish the advisory on tz@iana.org.  As with all tzdb contributions,
+we give credit to security contributors unless they wish to remain
+anonymous.
diff --git a/lib-tzcode/asctime.c b/lib-tzcode/asctime.c
new file mode 100644
index 0000000..a40661f
--- /dev/null
+++ b/lib-tzcode/asctime.c
@@ -0,0 +1,131 @@
+/* asctime and asctime_r a la POSIX and ISO C, except pad years before 1000.  */
+
+/*
+** This file is in the public domain, so clarified as of
+** 1996-06-05 by Arthur David Olson.
+*/
+
+/*
+** Avoid the temptation to punt entirely to strftime;
+** the output of strftime is supposed to be locale specific
+** whereas the output of asctime is supposed to be constant.
+*/
+
+/*LINTLIBRARY*/
+
+#include "private.h"
+#include 
+
+/*
+** All years associated with 32-bit time_t values are exactly four digits long;
+** some years associated with 64-bit time_t values are not.
+** Vintage programs are coded for years that are always four digits long
+** and may assume that the newline always lands in the same place.
+** For years that are less than four digits, we pad the output with
+** leading zeroes to get the newline in the traditional place.
+** The -4 ensures that we get four characters of output even if
+** we call a strftime variant that produces fewer characters for some years.
+** The ISO C and POSIX standards prohibit padding the year,
+** but many implementations pad anyway; most likely the standards are buggy.
+*/
+static char const ASCTIME_FMT[] = "%s %s%3d %.2d:%.2d:%.2d %-4s\n";
+/*
+** For years that are more than four digits we put extra spaces before the year
+** so that code trying to overwrite the newline won't end up overwriting
+** a digit within a year and truncating the year (operating on the assumption
+** that no output is better than wrong output).
+*/
+static char const ASCTIME_FMT_B[] = "%s %s%3d %.2d:%.2d:%.2d     %s\n";
+
+enum { STD_ASCTIME_BUF_SIZE = 26 };
+/*
+** Big enough for something such as
+** ??? ???-2147483648 -2147483648:-2147483648:-2147483648     -2147483648\n
+** (two three-character abbreviations, five strings denoting integers,
+** seven explicit spaces, two explicit colons, a newline,
+** and a trailing NUL byte).
+** The values above are for systems where an int is 32 bits and are provided
+** as an example; the size expression below is a bound for the system at
+** hand.
+*/
+static char buf_asctime[2*3 + 5*INT_STRLEN_MAXIMUM(int) + 7 + 2 + 1 + 1];
+
+/* A similar buffer for ctime.
+   C89 requires that they be the same buffer.
+   This requirement was removed in C99, so support it only if requested,
+   as support is more likely to lead to bugs in badly written programs.  */
+#if SUPPORT_C89
+# define buf_ctime buf_asctime
+#else
+static char buf_ctime[sizeof buf_asctime];
+#endif
+
+char *
+asctime_r(struct tm const *restrict timeptr, char *restrict buf)
+{
+	static const char	wday_name[][4] = {
+		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+	};
+	static const char	mon_name[][4] = {
+		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+	};
+	register const char *	wn;
+	register const char *	mn;
+	char			year[INT_STRLEN_MAXIMUM(int) + 2];
+	char result[sizeof buf_asctime];
+
+	if (timeptr == NULL) {
+		errno = EINVAL;
+		return strcpy(buf, "??? ??? ?? ??:??:?? ????\n");
+	}
+	if (timeptr->tm_wday < 0 || timeptr->tm_wday >= DAYSPERWEEK)
+		wn = "???";
+	else	wn = wday_name[timeptr->tm_wday];
+	if (timeptr->tm_mon < 0 || timeptr->tm_mon >= MONSPERYEAR)
+		mn = "???";
+	else	mn = mon_name[timeptr->tm_mon];
+	/*
+	** Use strftime's %Y to generate the year, to avoid overflow problems
+	** when computing timeptr->tm_year + TM_YEAR_BASE.
+	** Assume that strftime is unaffected by other out-of-range members
+	** (e.g., timeptr->tm_mday) when processing "%Y".
+	*/
+	strftime(year, sizeof year, "%Y", timeptr);
+	/*
+	** We avoid using snprintf since it's not available on all systems.
+	*/
+	sprintf(result,
+		((strlen(year) <= 4) ? ASCTIME_FMT : ASCTIME_FMT_B),
+		wn, mn,
+		timeptr->tm_mday, timeptr->tm_hour,
+		timeptr->tm_min, timeptr->tm_sec,
+		year);
+	if (strlen(result) < STD_ASCTIME_BUF_SIZE
+	    || buf == buf_ctime || buf == buf_asctime)
+		return strcpy(buf, result);
+	else {
+		errno = EOVERFLOW;
+		return NULL;
+	}
+}
+
+char *
+asctime(register const struct tm *timeptr)
+{
+	return asctime_r(timeptr, buf_asctime);
+}
+
+char *
+ctime_r(const time_t *timep, char *buf)
+{
+  struct tm mytm;
+  struct tm *tmp = localtime_r(timep, &mytm);
+  return tmp ? asctime_r(tmp, buf) : NULL;
+}
+
+char *
+ctime(const time_t *timep)
+{
+  return ctime_r(timep, buf_ctime);
+}
diff --git a/lib-tzcode/calendars b/lib-tzcode/calendars
new file mode 100644
index 0000000..f4ed9e4
--- /dev/null
+++ b/lib-tzcode/calendars
@@ -0,0 +1,173 @@
+----- Calendrical issues -----
+
+As mentioned in Theory.html, although calendrical issues are out of
+scope for tzdb, they indicate the sort of problems that we would run
+into if we extended tzdb further into the past.  The following
+information and sources go beyond Theory.html's brief discussion.
+They sometimes disagree.
+
+
+France
+
+Gregorian calendar adopted 1582-12-20.
+French Revolutionary calendar used 1793-11-24 through 1805-12-31,
+and (in Paris only) 1871-05-06 through 1871-05-23.
+
+
+Russia
+
+From Chris Carrier (1996-12-02):
+On 1929-10-01 the Soviet Union instituted an "Eternal Calendar"
+with 30-day months plus 5 holidays, with a 5-day week.
+On 1931-12-01 it changed to a 6-day week; in 1934 it reverted to the
+Gregorian calendar while retaining the 6-day week; on 1940-06-27 it
+reverted to the 7-day week.  With the 6-day week the usual days
+off were the 6th, 12th, 18th, 24th and 30th of the month.
+(Source: Evitiar Zerubavel, _The Seven Day Circle_)
+
+
+Mark Brader reported a similar story in "The Book of Calendars", edited
+by Frank Parise (1982, Facts on File, ISBN 0-8719-6467-8), page 377.  But:
+
+From: Petteri Sulonen (via Usenet)
+Date: 14 Jan 1999 00:00:00 GMT
+...
+
+If your source is correct, how come documents between 1929 and 1940 were
+still dated using the conventional, Gregorian calendar?
+
+I can post a scan of a document dated December 1, 1934, signed by
+Yenukidze, the secretary, on behalf of Kalinin, the President of the
+Executive Committee of the Supreme Soviet, if you like.
+
+
+
+Sweden (and Finland)
+
+From: Mark Brader
+Subject: Re: Gregorian reform - a part of locale?
+
+Date: 1996-07-06
+
+In 1700, Denmark made the transition from Julian to Gregorian.  Sweden
+decided to *start* a transition in 1700 as well, but rather than have one of
+those unsightly calendar gaps :-), they simply decreed that the next leap
+year after 1696 would be in 1744 - putting the whole country on a calendar
+different from both Julian and Gregorian for a period of 40 years.
+
+However, in 1704 something went wrong and the plan was not carried through;
+they did, after all, have a leap year that year.  And one in 1708.  In 1712
+they gave it up and went back to Julian, putting 30 days in February that
+year!...
+
+Then in 1753, Sweden made the transition to Gregorian in the usual manner,
+getting there only 13 years behind the original schedule.
+
+(A previous posting of this story was challenged, and Swedish readers
+produced the following references to support it: "Tideräkning och historia"
+by Natanael Beckman (1924) and "Tid, en bok om tideräkning och
+kalenderväsen" by Lars-Olof Lodén (1968).
+
+
+Grotefend's data
+
+From: "Michael Palmer" [with two obvious typos fixed]
+Subject: Re: Gregorian Calendar (was Re: Another FHC related question
+Newsgroups: soc.genealogy.german
+Date: Tue, 9 Feb 1999 02:32:48 -800
+...
+
+The following is a(n incomplete) listing, arranged chronologically, of
+European states, with the date they converted from the Julian to the
+Gregorian calendar:
+
+04/15 Oct 1582 - Italy (with exceptions), Spain, Portugal, Poland (Roman
+                 Catholics and Danzig only)
+09/20 Dec 1582 - France, Lorraine
+
+21 Dec 1582/
+   01 Jan 1583 - Holland, Brabant, Flanders, Hennegau
+10/21 Feb 1583 - bishopric of Liege (Lüttich)
+13/24 Feb 1583 - bishopric of Augsburg
+04/15 Oct 1583 - electorate of Trier
+05/16 Oct 1583 - Bavaria, bishoprics of Freising, Eichstedt, Regensburg,
+                 Salzburg, Brixen
+13/24 Oct 1583 - Austrian Oberelsaß and Breisgau
+20/31 Oct 1583 - bishopric of Basel
+02/13 Nov 1583 - duchy of Jülich-Berg
+02/13 Nov 1583 - electorate and city of Köln
+04/15 Nov 1583 - bishopric of Würzburg
+11/22 Nov 1583 - electorate of Mainz
+16/27 Nov 1583 - bishopric of Strassburg and the margraviate of Baden
+17/28 Nov 1583 - bishopric of Münster and duchy of Cleve
+14/25 Dec 1583 - Steiermark
+
+06/17 Jan 1584 - Austria and Bohemia
+11/22 Jan 1584 - Lucerne, Uri, Schwyz, Zug, Freiburg, Solothurn
+12/23 Jan 1584 - Silesia and the Lausitz
+22 Jan/
+   02 Feb 1584 - Hungary (legally on 21 Oct 1587)
+      Jun 1584 - Unterwalden
+01/12 Jul 1584 - duchy of Westfalen
+
+16/27 Jun 1585 - bishopric of Paderborn
+
+14/25 Dec 1590 - Transylvania
+
+22 Aug/
+   02 Sep 1612 - duchy of Prussia
+
+13/24 Dec 1614 - Pfalz-Neuburg
+
+          1617 - duchy of Kurland (reverted to the Julian calendar in
+                 1796)
+
+          1624 - bishopric of Osnabrück
+
+          1630 - bishopric of Minden
+
+15/26 Mar 1631 - bishopric of Hildesheim
+
+          1655 - Kanton Wallis
+
+05/16 Feb 1682 - city of Strassburg
+
+18 Feb/
+   01 Mar 1700 - Protestant Germany (including Swedish possessions in
+                 Germany), Denmark, Norway
+30 Jun/
+   12 Jul 1700 - Gelderland, Zutphen
+10 Nov/
+   12 Dec 1700 - Utrecht, Overijssel
+
+31 Dec 1700/
+   12 Jan 1701 - Friesland, Groningen, Zürich, Bern, Basel, Geneva,
+                 Thurgau, and Schaffhausen
+
+          1724 - Glarus, Appenzell, and the city of St. Gallen
+
+01 Jan 1750    - Pisa and Florence
+
+02/14 Sep 1752 - Great Britain
+
+17 Feb/
+   01 Mar 1753 - Sweden
+
+1760-1812      - Graubünden
+
+The Russian empire (including Finland and the Baltic states) did not
+convert to the Gregorian calendar until the Soviet revolution of 1917.
+
+Source: H. Grotefend, _Taschenbuch der Zeitrechnung des deutschen
+Mittelalters und der Neuzeit_, herausgegeben von Dr. O. Grotefend
+(Hannover: Hahnsche Buchhandlung, 1941), pp. 26-28.
+
+-----
+
+This file is in the public domain, so clarified as of 2009-05-17 by
+Arthur David Olson.
+
+-----
+Local Variables:
+coding: utf-8
+End:
diff --git a/lib-tzcode/date.1 b/lib-tzcode/date.1
new file mode 100644
index 0000000..e810721
--- /dev/null
+++ b/lib-tzcode/date.1
@@ -0,0 +1,168 @@
+.\" This file is in the public domain, so clarified as of
+.\" 2009-05-17 by Arthur David Olson.
+.TH date 1 "" "Time Zone Database"
+.SH NAME
+date \- show and set date and time
+.SH SYNOPSIS
+.if n .nh
+.if n .na
+.ie \n(.g .ds - \f(CR-\fP
+.el .ds - \-
+.B date
+[
+.B \*-u
+] [
+.B \*-c
+] [
+.B \*-r
+.I seconds
+] [
+.BI + format
+] [
+\fR[\fIyyyy\fR]\fImmddhhmm\fR[\fIyy\fR][\fB.\fIss\fR]
+]
+.SH DESCRIPTION
+.ie '\(lq'' .ds lq \&"\"
+.el .ds lq \(lq\"
+.ie '\(rq'' .ds rq \&"\"
+.el .ds rq \(rq\"
+.de q
+\\$3\*(lq\\$1\*(rq\\$2
+..
+The
+.B date
+command
+without arguments writes the date and time to the standard output in
+the form
+.ce 1
+Wed Mar  8 14:54:40 EST 1989
+.br
+with
+.B EST
+replaced by the local time zone's abbreviation
+(or by the abbreviation for the time zone specified in the
+.B TZ
+environment variable if set).
+The exact output format depends on the locale.
+.PP
+If a command-line argument starts with a plus sign (\c
+.q "\fB+\fP" ),
+the rest of the argument is used as a
+.I format
+that controls what appears in the output.
+In the format, when a percent sign (\c
+.q "\fB%\fP"
+appears,
+it and the character after it are not output,
+but rather identify part of the date or time
+to be output in a particular way
+(or identify a special character to output):
+.nf
+.sp
+.if t .in +.5i
+.if n .in +2
+.ta \w'%M\0\0'u +\w'Wed Mar  8 14:54:40 EST 1989\0\0'u
+	Sample output	Explanation
+%a	Wed	Abbreviated weekday name*
+%A	Wednesday	Full weekday name*
+%b	Mar	Abbreviated month name*
+%B	March	Full month name*
+%c	Wed Mar 08 14:54:40 1989	Date and time*
+%C	19	Century
+%d	08	Day of month (always two digits)
+%D	03/08/89	Month/day/year (eight characters)
+%e	 8	Day of month (leading zero blanked)
+%h	Mar	Abbreviated month name*
+%H	14	24-hour-clock hour (two digits)
+%I	02	12-hour-clock hour (two digits)
+%j	067	Julian day number (three digits)
+%k	 2	12-hour-clock hour (leading zero blanked)
+%l	14	24-hour-clock hour (leading zero blanked)
+%m	03	Month number (two digits)
+%M	54	Minute (two digits)
+%n	\\n	newline character
+%p	PM	AM/PM designation
+%r	02:54:40 PM	Hour:minute:second AM/PM designation
+%R	14:54	Hour:minute
+%S	40	Second (two digits)
+%t	\\t	tab character
+%T	14:54:40	Hour:minute:second
+%U	10	Sunday-based week number (two digits)
+%w	3	Day number (one digit, Sunday is 0)
+%W	10	Monday-based week number (two digits)
+%x	03/08/89	Date*
+%X	14:54:40	Time*
+%y	89	Last two digits of year
+%Y	1989	Year in full
+%z	-0500	Numeric time zone
+%Z	EST	Time zone abbreviation
+%+	Wed Mar  8 14:54:40 EST 1989	Default output format*
+.if t .in -.5i
+.if n .in -2
+* The exact output depends on the locale.
+.sp
+.fi
+If a character other than one of those shown above appears after
+a percent sign in the format,
+that following character is output.
+All other characters in the format are copied unchanged to the output;
+a newline character is always added at the end of the output.
+.PP
+In Sunday-based week numbering,
+the first Sunday of the year begins week 1;
+days preceding it are part of
+.q "week 0" .
+In Monday-based week numbering,
+the first Monday of the year begins week 1.
+.PP
+To set the date, use a command line argument with one of the following forms:
+.nf
+.if t .in +.5i
+.if n .in +2
+.ta \w'198903081454\0'u
+1454	24-hour-clock hours (first two digits) and minutes
+081454	Month day (first two digits), hours, and minutes
+03081454	Month (two digits, January is 01), month day, hours, minutes
+8903081454	Year, month, month day, hours, minutes
+0308145489	Month, month day, hours, minutes, year
+	(on System V-compatible systems)
+030814541989	Month, month day, hours, minutes, four-digit year
+198903081454	Four-digit year, month, month day, hours, minutes
+.if t .in -.5i
+.if n .in -2
+.fi
+If the century, year, month, or month day is not given,
+the current value is used.
+Any of the above forms may be followed by a period and two digits that give
+the seconds part of the new time; if no seconds are given, zero is assumed.
+.PP
+These options are available:
+.TP
+.BR \*-u " or " \*-c
+Use Universal Time when setting and showing the date and time.
+.TP
+.BI "\*-r " seconds
+Output the date that corresponds to
+.I seconds
+past the epoch of 1970-01-01 00:00:00 UTC, where
+.I seconds
+should be an integer, either decimal, octal (leading 0), or
+hexadecimal (leading 0x), preceded by an optional sign.
+.SH FILES
+.ta \w'/usr/share/zoneinfo/posixrules\0\0'u
+/etc/localtime	local timezone file
+.br
+/usr/lib/locale/\f2L\fP/LC_TIME	description of time locale \f2L\fP
+.br
+/usr/share/zoneinfo	timezone information directory
+.br
+/usr/share/zoneinfo/posixrules	default DST rules (obsolete,
+	and can cause bugs if present)
+.br
+/usr/share/zoneinfo/GMT	for UTC leap seconds
+.sp
+If
+.B /usr/share/zoneinfo/GMT
+is absent,
+UTC leap seconds are loaded from
+.BR /usr/share/zoneinfo/posixrules .
diff --git a/lib-tzcode/date.1.txt b/lib-tzcode/date.1.txt
new file mode 100644
index 0000000..f7b2837
--- /dev/null
+++ b/lib-tzcode/date.1.txt
@@ -0,0 +1,108 @@
+date(1)                     General Commands Manual                    date(1)
+
+NAME
+       date - show and set date and time
+
+SYNOPSIS
+       date [ -u ] [ -c ] [ -r seconds ] [ +format ] [ [yyyy]mmddhhmm[yy][.ss]
+       ]
+
+DESCRIPTION
+       The date command without arguments writes the date and time to the
+       standard output in the form
+                            Wed Mar  8 14:54:40 EST 1989
+       with EST replaced by the local time zone's abbreviation (or by the
+       abbreviation for the time zone specified in the TZ environment variable
+       if set).  The exact output format depends on the locale.
+
+       If a command-line argument starts with a plus sign ("+"), the rest of
+       the argument is used as a format that controls what appears in the
+       output.  In the format, when a percent sign ("%" appears, it and the
+       character after it are not output, but rather identify part of the date
+       or time to be output in a particular way (or identify a special
+       character to output):
+
+             Sample output                 Explanation
+         %a  Wed                           Abbreviated weekday name*
+         %A  Wednesday                     Full weekday name*
+         %b  Mar                           Abbreviated month name*
+         %B  March                         Full month name*
+         %c  Wed Mar 08 14:54:40 1989      Date and time*
+         %C  19                            Century
+         %d  08                            Day of month (always two digits)
+         %D  03/08/89                      Month/day/year (eight characters)
+         %e   8                            Day of month (leading zero blanked)
+         %h  Mar                           Abbreviated month name*
+         %H  14                            24-hour-clock hour (two digits)
+         %I  02                            12-hour-clock hour (two digits)
+         %j  067                           Julian day number (three digits)
+         %k   2                            12-hour-clock hour (leading zero blanked)
+         %l  14                            24-hour-clock hour (leading zero blanked)
+         %m  03                            Month number (two digits)
+         %M  54                            Minute (two digits)
+         %n  \n                            newline character
+         %p  PM                            AM/PM designation
+         %r  02:54:40 PM                   Hour:minute:second AM/PM designation
+         %R  14:54                         Hour:minute
+         %S  40                            Second (two digits)
+         %t  \t                            tab character
+         %T  14:54:40                      Hour:minute:second
+         %U  10                            Sunday-based week number (two digits)
+         %w  3                             Day number (one digit, Sunday is 0)
+         %W  10                            Monday-based week number (two digits)
+         %x  03/08/89                      Date*
+         %X  14:54:40                      Time*
+         %y  89                            Last two digits of year
+         %Y  1989                          Year in full
+         %z  -0500                         Numeric time zone
+         %Z  EST                           Time zone abbreviation
+         %+  Wed Mar  8 14:54:40 EST 1989  Default output format*
+       * The exact output depends on the locale.
+
+       If a character other than one of those shown above appears after a
+       percent sign in the format, that following character is output.  All
+       other characters in the format are copied unchanged to the output; a
+       newline character is always added at the end of the output.
+
+       In Sunday-based week numbering, the first Sunday of the year begins
+       week 1; days preceding it are part of "week 0".  In Monday-based week
+       numbering, the first Monday of the year begins week 1.
+
+       To set the date, use a command line argument with one of the following
+       forms:
+         1454         24-hour-clock hours (first two digits) and minutes
+         081454       Month day (first two digits), hours, and minutes
+         03081454     Month (two digits, January is 01), month day, hours, minutes
+         8903081454   Year, month, month day, hours, minutes
+         0308145489   Month, month day, hours, minutes, year
+                      (on System V-compatible systems)
+         030814541989 Month, month day, hours, minutes, four-digit year
+         198903081454 Four-digit year, month, month day, hours, minutes
+       If the century, year, month, or month day is not given, the current
+       value is used.  Any of the above forms may be followed by a period and
+       two digits that give the seconds part of the new time; if no seconds
+       are given, zero is assumed.
+
+       These options are available:
+
+       -u or -c
+              Use Universal Time when setting and showing the date and time.
+
+       -r seconds
+              Output the date that corresponds to seconds past the epoch of
+              1970-01-01 00:00:00 UTC, where seconds should be an integer,
+              either decimal, octal (leading 0), or hexadecimal (leading 0x),
+              preceded by an optional sign.
+
+FILES
+       /etc/localtime                  local timezone file
+       /usr/lib/locale/L/LC_TIME       description of time locale L
+       /usr/share/zoneinfo             timezone information directory
+       /usr/share/zoneinfo/posixrules  default DST rules (obsolete,
+                                       and can cause bugs if present)
+       /usr/share/zoneinfo/GMT         for UTC leap seconds
+
+       If /usr/share/zoneinfo/GMT is absent, UTC leap seconds are loaded from
+       /usr/share/zoneinfo/posixrules.
+
+Time Zone Database                                                     date(1)
diff --git a/lib-tzcode/date.c b/lib-tzcode/date.c
new file mode 100644
index 0000000..b62f04d
--- /dev/null
+++ b/lib-tzcode/date.c
@@ -0,0 +1,217 @@
+/* Display or set the current time and date.  */
+
+/* Copyright 1985, 1987, 1988 The Regents of the University of California.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+   2. Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+   3. Neither the name of the University nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
+   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+   SUCH DAMAGE.  */
+
+#include "private.h"
+#include 
+#include 
+
+#if !HAVE_POSIX_DECLS
+extern char *		optarg;
+extern int		optind;
+#endif
+
+static int		retval = EXIT_SUCCESS;
+
+static void		display(const char *, time_t);
+static void		dogmt(void);
+static void		errensure(void);
+static void		timeout(FILE *, const char *, const struct tm *);
+ATTRIBUTE_NORETURN static void usage(void);
+
+int
+main(const int argc, char *argv[])
+{
+	register const char *	format = "+%+";
+	register int		ch;
+	register bool		rflag = false;
+	time_t			t;
+	intmax_t		secs;
+	char *			endarg;
+
+#ifdef LC_ALL
+	setlocale(LC_ALL, "");
+#endif /* defined(LC_ALL) */
+#if HAVE_GETTEXT
+# ifdef TZ_DOMAINDIR
+	bindtextdomain(TZ_DOMAIN, TZ_DOMAINDIR);
+# endif /* defined(TEXTDOMAINDIR) */
+	textdomain(TZ_DOMAIN);
+#endif /* HAVE_GETTEXT */
+	t = time(NULL);
+	while ((ch = getopt(argc, argv, "ucr:")) != EOF && ch != -1) {
+		switch (ch) {
+		default:
+			usage();
+		case 'u':		/* do it in UT */
+		case 'c':
+			dogmt();
+			break;
+		case 'r':		/* seconds since 1970 */
+			if (rflag) {
+				fprintf(stderr,
+					_("date: error: multiple -r's used"));
+				usage();
+			}
+			rflag = true;
+			errno = 0;
+			secs = strtoimax(optarg, &endarg, 0);
+			if (*endarg || optarg == endarg)
+				errno = EINVAL;
+			else if (! (TIME_T_MIN <= secs && secs <= TIME_T_MAX))
+				errno = ERANGE;
+			if (errno) {
+				char const *e = strerror(errno);
+				fprintf(stderr, _("date: %s: %s\n"),
+					optarg, e);
+				errensure();
+				exit(retval);
+			}
+			t = secs;
+			break;
+		}
+	}
+	if (optind < argc) {
+	  if (argc - optind != 1) {
+	    fprintf(stderr,
+		    _("date: error: multiple operands in command line\n"));
+	    usage();
+	  }
+	  format = argv[optind];
+	  if (*format != '+') {
+	    fprintf(stderr, _("date: unknown operand: %s\n"), format);
+	    usage();
+	  }
+	}
+
+	display(format, t);
+	return retval;
+}
+
+static void
+dogmt(void)
+{
+	static char **	fakeenv;
+
+	if (fakeenv == NULL) {
+		static char	tzeutc0[] = "TZ=UTC0";
+		ptrdiff_t from, to, n;
+
+		for (n = 0;  environ[n] != NULL;  ++n)
+			continue;
+#if defined ckd_add && defined ckd_mul
+		if (!ckd_add(&n, n, 2) && !ckd_mul(&n, n, sizeof *fakeenv)
+		    && n <= INDEX_MAX)
+		  fakeenv = malloc(n);
+#else
+		if (n <= INDEX_MAX / sizeof *fakeenv - 2)
+		  fakeenv = malloc((n + 2) * sizeof *fakeenv);
+#endif
+		if (fakeenv == NULL) {
+			fprintf(stderr, _("date: Memory exhausted\n"));
+			errensure();
+			exit(retval);
+		}
+		to = 0;
+		fakeenv[to++] = tzeutc0;
+		for (from = 1; environ[from] != NULL; ++from)
+			if (strncmp(environ[from], "TZ=", 3) != 0)
+				fakeenv[to++] = environ[from];
+		fakeenv[to] = NULL;
+		environ = fakeenv;
+	}
+}
+
+static void
+errensure(void)
+{
+	if (retval == EXIT_SUCCESS)
+		retval = EXIT_FAILURE;
+}
+
+static void
+usage(void)
+{
+	fprintf(stderr,
+		       _("date: usage: date [-u] [-c] [-r seconds]"
+			 " [+format]\n"));
+	errensure();
+	exit(retval);
+}
+
+static void
+display(char const *format, time_t now)
+{
+	struct tm *tmp;
+
+	tmp = localtime(&now);
+	if (!tmp) {
+		fprintf(stderr,
+			_("date: error: time out of range\n"));
+		errensure();
+		return;
+	}
+	timeout(stdout, format, tmp);
+	putchar('\n');
+	fflush(stdout);
+	fflush(stderr);
+	if (ferror(stdout) || ferror(stderr)) {
+		fprintf(stderr,
+			_("date: error: couldn't write results\n"));
+		errensure();
+	}
+}
+
+static void
+timeout(FILE *fp, char const *format, struct tm const *tmp)
+{
+	char *cp = NULL;
+	ptrdiff_t result;
+	ptrdiff_t size = 1024 / 2;
+
+	for ( ; ; ) {
+#ifdef ckd_mul
+		bool bigger = !ckd_mul(&size, size, 2) && size <= INDEX_MAX;
+#else
+		bool bigger = size <= INDEX_MAX / 2 && (size *= 2, true);
+#endif
+		char *newcp = bigger ? realloc(cp, size) : NULL;
+		if (!newcp) {
+			fprintf(stderr,
+				_("date: error: can't get memory\n"));
+			errensure();
+			exit(retval);
+		}
+		cp = newcp;
+		result = strftime(cp, size, format, tmp);
+		if (result != 0)
+			break;
+	}
+	fwrite(cp + 1, 1, result - 1, fp);
+	free(cp);
+}
diff --git a/lib-tzcode/difftime.c b/lib-tzcode/difftime.c
new file mode 100644
index 0000000..ff78f03
--- /dev/null
+++ b/lib-tzcode/difftime.c
@@ -0,0 +1,60 @@
+/* Return the difference between two timestamps.  */
+
+/*
+** This file is in the public domain, so clarified as of
+** 1996-06-05 by Arthur David Olson.
+*/
+
+/*LINTLIBRARY*/
+
+#include "private.h"	/* for time_t and TYPE_SIGNED */
+
+/* Return -X as a double.  Using this avoids casting to 'double'.  */
+static double
+dminus(double x)
+{
+  return -x;
+}
+
+double
+difftime(time_t time1, time_t time0)
+{
+	/*
+	** If double is large enough, simply convert and subtract
+	** (assuming that the larger type has more precision).
+	*/
+	if (sizeof(time_t) < sizeof(double)) {
+	  double t1 = time1, t0 = time0;
+	  return t1 - t0;
+	}
+
+	/*
+	** The difference of two unsigned values can't overflow
+	** if the minuend is greater than or equal to the subtrahend.
+	*/
+	if (!TYPE_SIGNED(time_t))
+	  return time0 <= time1 ? time1 - time0 : dminus(time0 - time1);
+
+	/* Use uintmax_t if wide enough.  */
+	if (sizeof(time_t) <= sizeof(uintmax_t)) {
+	  uintmax_t t1 = time1, t0 = time0;
+	  return time0 <= time1 ? t1 - t0 : dminus(t0 - t1);
+	}
+
+	/*
+	** Handle cases where both time1 and time0 have the same sign
+	** (meaning that their difference cannot overflow).
+	*/
+	if ((time1 < 0) == (time0 < 0))
+	  return time1 - time0;
+
+	/*
+	** The values have opposite signs and uintmax_t is too narrow.
+	** This suffers from double rounding; attempt to lessen that
+	** by using long double temporaries.
+	*/
+	{
+	  long double t1 = time1, t0 = time0;
+	  return t1 - t0;
+	}
+}
diff --git a/lib-tzcode/localtime.c b/lib-tzcode/localtime.c
new file mode 100644
index 0000000..af73d19
--- /dev/null
+++ b/lib-tzcode/localtime.c
@@ -0,0 +1,2497 @@
+/* Convert timestamp from time_t to struct tm.  */
+
+/*
+** This file is in the public domain, so clarified as of
+** 1996-06-05 by Arthur David Olson.
+*/
+
+/*
+** Leap second handling from Bradley White.
+** POSIX-style TZ environment variable handling from Guy Harris.
+*/
+
+/*LINTLIBRARY*/
+
+#define LOCALTIME_IMPLEMENTATION
+#include "private.h"
+
+#include "tzfile.h"
+#include 
+
+#if defined THREAD_SAFE && THREAD_SAFE
+# include 
+static pthread_mutex_t locallock = PTHREAD_MUTEX_INITIALIZER;
+static int lock(void) { return pthread_mutex_lock(&locallock); }
+static void unlock(void) { pthread_mutex_unlock(&locallock); }
+#else
+static int lock(void) { return 0; }
+static void unlock(void) { }
+#endif
+
+#ifndef TZ_ABBR_CHAR_SET
+# define TZ_ABBR_CHAR_SET \
+	"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 :+-._"
+#endif /* !defined TZ_ABBR_CHAR_SET */
+
+#ifndef TZ_ABBR_ERR_CHAR
+# define TZ_ABBR_ERR_CHAR '_'
+#endif /* !defined TZ_ABBR_ERR_CHAR */
+
+/*
+** Support non-POSIX platforms that distinguish between text and binary files.
+*/
+
+#ifndef O_BINARY
+# define O_BINARY 0
+#endif
+
+#ifndef WILDABBR
+/*
+** Someone might make incorrect use of a time zone abbreviation:
+**	1.	They might reference tzname[0] before calling tzset (explicitly
+**		or implicitly).
+**	2.	They might reference tzname[1] before calling tzset (explicitly
+**		or implicitly).
+**	3.	They might reference tzname[1] after setting to a time zone
+**		in which Daylight Saving Time is never observed.
+**	4.	They might reference tzname[0] after setting to a time zone
+**		in which Standard Time is never observed.
+**	5.	They might reference tm.TM_ZONE after calling offtime.
+** What's best to do in the above cases is open to debate;
+** for now, we just set things up so that in any of the five cases
+** WILDABBR is used. Another possibility: initialize tzname[0] to the
+** string "tzname[0] used before set", and similarly for the other cases.
+** And another: initialize tzname[0] to "ERA", with an explanation in the
+** manual page of what this "time zone abbreviation" means (doing this so
+** that tzname[0] has the "normal" length of three characters).
+*/
+# define WILDABBR "   "
+#endif /* !defined WILDABBR */
+
+static const char	wildabbr[] = WILDABBR;
+
+static char const etc_utc[] = "Etc/UTC";
+static char const *utc = etc_utc + sizeof "Etc/" - 1;
+
+/*
+** The DST rules to use if TZ has no rules and we can't load TZDEFRULES.
+** Default to US rules as of 2017-05-07.
+** POSIX does not specify the default DST rules;
+** for historical reasons, US rules are a common default.
+*/
+#ifndef TZDEFRULESTRING
+# define TZDEFRULESTRING ",M3.2.0,M11.1.0"
+#endif
+
+struct ttinfo {				/* time type information */
+	int_fast32_t	tt_utoff;	/* UT offset in seconds */
+	bool		tt_isdst;	/* used to set tm_isdst */
+	int		tt_desigidx;	/* abbreviation list index */
+	bool		tt_ttisstd;	/* transition is std time */
+	bool		tt_ttisut;	/* transition is UT */
+};
+
+struct lsinfo {				/* leap second information */
+	time_t		ls_trans;	/* transition time */
+	int_fast32_t	ls_corr;	/* correction to apply */
+};
+
+/* This abbreviation means local time is unspecified.  */
+static char const UNSPEC[] = "-00";
+
+/* How many extra bytes are needed at the end of struct state's chars array.
+   This needs to be at least 1 for null termination in case the input
+   data isn't properly terminated, and it also needs to be big enough
+   for ttunspecified to work without crashing.  */
+enum { CHARS_EXTRA = max(sizeof UNSPEC, 2) - 1 };
+
+/* Limit to time zone abbreviation length in POSIX-style TZ strings.
+   This is distinct from TZ_MAX_CHARS, which limits TZif file contents.  */
+#ifndef TZNAME_MAXIMUM
+# define TZNAME_MAXIMUM 255
+#endif
+
+struct state {
+	int		leapcnt;
+	int		timecnt;
+	int		typecnt;
+	int		charcnt;
+	bool		goback;
+	bool		goahead;
+	time_t		ats[TZ_MAX_TIMES];
+	unsigned char	types[TZ_MAX_TIMES];
+	struct ttinfo	ttis[TZ_MAX_TYPES];
+	char chars[max(max(TZ_MAX_CHARS + CHARS_EXTRA, sizeof "UTC"),
+		       2 * (TZNAME_MAXIMUM + 1))];
+	struct lsinfo	lsis[TZ_MAX_LEAPS];
+
+	/* The time type to use for early times or if no transitions.
+	   It is always zero for recent tzdb releases.
+	   It might be nonzero for data from tzdb 2018e or earlier.  */
+	int defaulttype;
+};
+
+enum r_type {
+  JULIAN_DAY,		/* Jn = Julian day */
+  DAY_OF_YEAR,		/* n = day of year */
+  MONTH_NTH_DAY_OF_WEEK	/* Mm.n.d = month, week, day of week */
+};
+
+struct rule {
+	enum r_type	r_type;		/* type of rule */
+	int		r_day;		/* day number of rule */
+	int		r_week;		/* week number of rule */
+	int		r_mon;		/* month number of rule */
+	int_fast32_t	r_time;		/* transition time of rule */
+};
+
+static struct tm *gmtsub(struct state const *, time_t const *, int_fast32_t,
+			 struct tm *);
+static bool increment_overflow(int *, int);
+static bool increment_overflow_time(time_t *, int_fast32_t);
+static int_fast32_t leapcorr(struct state const *, time_t);
+static bool normalize_overflow32(int_fast32_t *, int *, int);
+static struct tm *timesub(time_t const *, int_fast32_t, struct state const *,
+			  struct tm *);
+static bool typesequiv(struct state const *, int, int);
+static bool tzparse(char const *, struct state *, struct state *);
+
+#ifdef ALL_STATE
+static struct state *	lclptr;
+static struct state *	gmtptr;
+#endif /* defined ALL_STATE */
+
+#ifndef ALL_STATE
+static struct state	lclmem;
+static struct state	gmtmem;
+static struct state *const lclptr = &lclmem;
+static struct state *const gmtptr = &gmtmem;
+#endif /* State Farm */
+
+#ifndef TZ_STRLEN_MAX
+# define TZ_STRLEN_MAX 255
+#endif /* !defined TZ_STRLEN_MAX */
+
+static char		lcl_TZname[TZ_STRLEN_MAX + 1];
+static int		lcl_is_set;
+
+/*
+** Section 4.12.3 of X3.159-1989 requires that
+**	Except for the strftime function, these functions [asctime,
+**	ctime, gmtime, localtime] return values in one of two static
+**	objects: a broken-down time structure and an array of char.
+** Thanks to Paul Eggert for noting this.
+**
+** This requirement was removed in C99, so support it only if requested,
+** as support is more likely to lead to bugs in badly written programs.
+*/
+
+#if SUPPORT_C89
+static struct tm	tm;
+#endif
+
+#if 2 <= HAVE_TZNAME + TZ_TIME_T
+char *			tzname[2] = {
+	(char *) wildabbr,
+	(char *) wildabbr
+};
+#endif
+#if 2 <= USG_COMPAT + TZ_TIME_T
+long			timezone;
+int			daylight;
+#endif
+#if 2 <= ALTZONE + TZ_TIME_T
+long			altzone;
+#endif
+
+/* Initialize *S to a value based on UTOFF, ISDST, and DESIGIDX.  */
+static void
+init_ttinfo(struct ttinfo *s, int_fast32_t utoff, bool isdst, int desigidx)
+{
+  s->tt_utoff = utoff;
+  s->tt_isdst = isdst;
+  s->tt_desigidx = desigidx;
+  s->tt_ttisstd = false;
+  s->tt_ttisut = false;
+}
+
+/* Return true if SP's time type I does not specify local time.  */
+static bool
+ttunspecified(struct state const *sp, int i)
+{
+  char const *abbr = &sp->chars[sp->ttis[i].tt_desigidx];
+  /* memcmp is likely faster than strcmp, and is safe due to CHARS_EXTRA.  */
+  return memcmp(abbr, UNSPEC, sizeof UNSPEC) == 0;
+}
+
+static int_fast32_t
+detzcode(const char *const codep)
+{
+	register int_fast32_t	result;
+	register int		i;
+	int_fast32_t one = 1;
+	int_fast32_t halfmaxval = one << (32 - 2);
+	int_fast32_t maxval = halfmaxval - 1 + halfmaxval;
+	int_fast32_t minval = -1 - maxval;
+
+	result = codep[0] & 0x7f;
+	for (i = 1; i < 4; ++i)
+		result = (result << 8) | (codep[i] & 0xff);
+
+	if (codep[0] & 0x80) {
+	  /* Do two's-complement negation even on non-two's-complement machines.
+	     If the result would be minval - 1, return minval.  */
+	  result -= !TWOS_COMPLEMENT(int_fast32_t) && result != 0;
+	  result += minval;
+	}
+	return result;
+}
+
+static int_fast64_t
+detzcode64(const char *const codep)
+{
+	register int_fast64_t result;
+	register int	i;
+	int_fast64_t one = 1;
+	int_fast64_t halfmaxval = one << (64 - 2);
+	int_fast64_t maxval = halfmaxval - 1 + halfmaxval;
+	int_fast64_t minval = -TWOS_COMPLEMENT(int_fast64_t) - maxval;
+
+	result = codep[0] & 0x7f;
+	for (i = 1; i < 8; ++i)
+		result = (result << 8) | (codep[i] & 0xff);
+
+	if (codep[0] & 0x80) {
+	  /* Do two's-complement negation even on non-two's-complement machines.
+	     If the result would be minval - 1, return minval.  */
+	  result -= !TWOS_COMPLEMENT(int_fast64_t) && result != 0;
+	  result += minval;
+	}
+	return result;
+}
+
+static void
+update_tzname_etc(struct state const *sp, struct ttinfo const *ttisp)
+{
+#if HAVE_TZNAME
+  tzname[ttisp->tt_isdst] = (char *) &sp->chars[ttisp->tt_desigidx];
+#endif
+#if USG_COMPAT
+  if (!ttisp->tt_isdst)
+    timezone = - ttisp->tt_utoff;
+#endif
+#if ALTZONE
+  if (ttisp->tt_isdst)
+    altzone = - ttisp->tt_utoff;
+#endif
+}
+
+/* If STDDST_MASK indicates that SP's TYPE provides useful info,
+   update tzname, timezone, and/or altzone and return STDDST_MASK,
+   diminished by the provided info if it is a specified local time.
+   Otherwise, return STDDST_MASK.  See settzname for STDDST_MASK.  */
+static int
+may_update_tzname_etc(int stddst_mask, struct state *sp, int type)
+{
+  struct ttinfo *ttisp = &sp->ttis[type];
+  int this_bit = 1 << ttisp->tt_isdst;
+  if (stddst_mask & this_bit) {
+    update_tzname_etc(sp, ttisp);
+    if (!ttunspecified(sp, type))
+      return stddst_mask & ~this_bit;
+  }
+  return stddst_mask;
+}
+
+static void
+settzname(void)
+{
+	register struct state * const	sp = lclptr;
+	register int			i;
+
+	/* If STDDST_MASK & 1 we need info about a standard time.
+	   If STDDST_MASK & 2 we need info about a daylight saving time.
+	   When STDDST_MASK becomes zero we can stop looking.  */
+	int stddst_mask = 0;
+
+#if HAVE_TZNAME
+	tzname[0] = tzname[1] = (char *) (sp ? wildabbr : utc);
+	stddst_mask = 3;
+#endif
+#if USG_COMPAT
+	timezone = 0;
+	stddst_mask = 3;
+#endif
+#if ALTZONE
+	altzone = 0;
+	stddst_mask |= 2;
+#endif
+	/*
+	** And to get the latest time zone abbreviations into tzname. . .
+	*/
+	if (sp) {
+	  for (i = sp->timecnt - 1; stddst_mask && 0 <= i; i--)
+	    stddst_mask = may_update_tzname_etc(stddst_mask, sp, sp->types[i]);
+	  for (i = sp->typecnt - 1; stddst_mask && 0 <= i; i--)
+	    stddst_mask = may_update_tzname_etc(stddst_mask, sp, i);
+	}
+#if USG_COMPAT
+	daylight = stddst_mask >> 1 ^ 1;
+#endif
+}
+
+/* Replace bogus characters in time zone abbreviations.
+   Return 0 on success, an errno value if a time zone abbreviation is
+   too long.  */
+static int
+scrub_abbrs(struct state *sp)
+{
+	int i;
+
+	/* Reject overlong abbreviations.  */
+	for (i = 0; i < sp->charcnt - (TZNAME_MAXIMUM + 1); ) {
+	  int len = strlen(&sp->chars[i]);
+	  if (TZNAME_MAXIMUM < len)
+	    return EOVERFLOW;
+	  i += len + 1;
+	}
+
+	/* Replace bogus characters.  */
+	for (i = 0; i < sp->charcnt; ++i)
+		if (strchr(TZ_ABBR_CHAR_SET, sp->chars[i]) == NULL)
+			sp->chars[i] = TZ_ABBR_ERR_CHAR;
+
+	return 0;
+}
+
+/* Input buffer for data read from a compiled tz file.  */
+union input_buffer {
+  /* The first part of the buffer, interpreted as a header.  */
+  struct tzhead tzhead;
+
+  /* The entire buffer.  */
+  char buf[2 * sizeof(struct tzhead) + 2 * sizeof(struct state)
+	   + 4 * TZ_MAX_TIMES];
+};
+
+/* TZDIR with a trailing '/' rather than a trailing '\0'.  */
+static char const tzdirslash[sizeof TZDIR] = TZDIR "/";
+
+/* Local storage needed for 'tzloadbody'.  */
+union local_storage {
+  /* The results of analyzing the file's contents after it is opened.  */
+  struct file_analysis {
+    /* The input buffer.  */
+    union input_buffer u;
+
+    /* A temporary state used for parsing a TZ string in the file.  */
+    struct state st;
+  } u;
+
+  /* The file name to be opened.  */
+  char fullname[max(sizeof(struct file_analysis), sizeof tzdirslash + 1024)];
+};
+
+/* Load tz data from the file named NAME into *SP.  Read extended
+   format if DOEXTEND.  Use *LSP for temporary storage.  Return 0 on
+   success, an errno value on failure.  */
+static int
+tzloadbody(char const *name, struct state *sp, bool doextend,
+	   union local_storage *lsp)
+{
+	register int			i;
+	register int			fid;
+	register int			stored;
+	register ssize_t		nread;
+	register bool doaccess;
+	register union input_buffer *up = &lsp->u.u;
+	register int tzheadsize = sizeof(struct tzhead);
+
+	sp->goback = sp->goahead = false;
+
+	if (! name) {
+		name = TZDEFAULT;
+		if (! name)
+		  return EINVAL;
+	}
+
+	if (name[0] == ':')
+		++name;
+#ifdef SUPPRESS_TZDIR
+	/* Do not prepend TZDIR.  This is intended for specialized
+	   applications only, due to its security implications.  */
+	doaccess = true;
+#else
+	doaccess = name[0] == '/';
+#endif
+	if (!doaccess) {
+		char const *dot;
+		if (sizeof lsp->fullname - sizeof tzdirslash <= strlen(name))
+		  return ENAMETOOLONG;
+
+		/* Create a string "TZDIR/NAME".  Using sprintf here
+		   would pull in stdio (and would fail if the
+		   resulting string length exceeded INT_MAX!).  */
+		memcpy(lsp->fullname, tzdirslash, sizeof tzdirslash);
+		strcpy(lsp->fullname + sizeof tzdirslash, name);
+
+		/* Set doaccess if NAME contains a ".." file name
+		   component, as such a name could read a file outside
+		   the TZDIR virtual subtree.  */
+		for (dot = name; (dot = strchr(dot, '.')); dot++)
+		  if ((dot == name || dot[-1] == '/') && dot[1] == '.'
+		      && (dot[2] == '/' || !dot[2])) {
+		    doaccess = true;
+		    break;
+		  }
+
+		name = lsp->fullname;
+	}
+	if (doaccess && access(name, R_OK) != 0)
+	  return errno;
+	fid = open(name, O_RDONLY | O_BINARY);
+	if (fid < 0)
+	  return errno;
+
+	nread = read(fid, up->buf, sizeof up->buf);
+	if (nread < tzheadsize) {
+	  int err = nread < 0 ? errno : EINVAL;
+	  close(fid);
+	  return err;
+	}
+	if (close(fid) < 0)
+	  return errno;
+	for (stored = 4; stored <= 8; stored *= 2) {
+	    char version = up->tzhead.tzh_version[0];
+	    bool skip_datablock = stored == 4 && version;
+	    int_fast32_t datablock_size;
+	    int_fast32_t ttisstdcnt = detzcode(up->tzhead.tzh_ttisstdcnt);
+	    int_fast32_t ttisutcnt = detzcode(up->tzhead.tzh_ttisutcnt);
+	    int_fast64_t prevtr = -1;
+	    int_fast32_t prevcorr;
+	    int_fast32_t leapcnt = detzcode(up->tzhead.tzh_leapcnt);
+	    int_fast32_t timecnt = detzcode(up->tzhead.tzh_timecnt);
+	    int_fast32_t typecnt = detzcode(up->tzhead.tzh_typecnt);
+	    int_fast32_t charcnt = detzcode(up->tzhead.tzh_charcnt);
+	    char const *p = up->buf + tzheadsize;
+	    /* Although tzfile(5) currently requires typecnt to be nonzero,
+	       support future formats that may allow zero typecnt
+	       in files that have a TZ string and no transitions.  */
+	    if (! (0 <= leapcnt && leapcnt < TZ_MAX_LEAPS
+		   && 0 <= typecnt && typecnt < TZ_MAX_TYPES
+		   && 0 <= timecnt && timecnt < TZ_MAX_TIMES
+		   && 0 <= charcnt && charcnt < TZ_MAX_CHARS
+		   && 0 <= ttisstdcnt && ttisstdcnt < TZ_MAX_TYPES
+		   && 0 <= ttisutcnt && ttisutcnt < TZ_MAX_TYPES))
+	      return EINVAL;
+	    datablock_size
+		    = (timecnt * stored		/* ats */
+		       + timecnt		/* types */
+		       + typecnt * 6		/* ttinfos */
+		       + charcnt		/* chars */
+		       + leapcnt * (stored + 4)	/* lsinfos */
+		       + ttisstdcnt		/* ttisstds */
+		       + ttisutcnt);		/* ttisuts */
+	    if (nread < tzheadsize + datablock_size)
+	      return EINVAL;
+	    if (skip_datablock)
+		p += datablock_size;
+	    else {
+		if (! ((ttisstdcnt == typecnt || ttisstdcnt == 0)
+		       && (ttisutcnt == typecnt || ttisutcnt == 0)))
+		  return EINVAL;
+
+		sp->leapcnt = leapcnt;
+		sp->timecnt = timecnt;
+		sp->typecnt = typecnt;
+		sp->charcnt = charcnt;
+
+		/* Read transitions, discarding those out of time_t range.
+		   But pretend the last transition before TIME_T_MIN
+		   occurred at TIME_T_MIN.  */
+		timecnt = 0;
+		for (i = 0; i < sp->timecnt; ++i) {
+			int_fast64_t at
+			  = stored == 4 ? detzcode(p) : detzcode64(p);
+			sp->types[i] = at <= TIME_T_MAX;
+			if (sp->types[i]) {
+			  time_t attime
+			    = ((TYPE_SIGNED(time_t) ? at < TIME_T_MIN : at < 0)
+			       ? TIME_T_MIN : at);
+			  if (timecnt && attime <= sp->ats[timecnt - 1]) {
+			    if (attime < sp->ats[timecnt - 1])
+			      return EINVAL;
+			    sp->types[i - 1] = 0;
+			    timecnt--;
+			  }
+			  sp->ats[timecnt++] = attime;
+			}
+			p += stored;
+		}
+
+		timecnt = 0;
+		for (i = 0; i < sp->timecnt; ++i) {
+			unsigned char typ = *p++;
+			if (sp->typecnt <= typ)
+			  return EINVAL;
+			if (sp->types[i])
+				sp->types[timecnt++] = typ;
+		}
+		sp->timecnt = timecnt;
+		for (i = 0; i < sp->typecnt; ++i) {
+			register struct ttinfo *	ttisp;
+			unsigned char isdst, desigidx;
+
+			ttisp = &sp->ttis[i];
+			ttisp->tt_utoff = detzcode(p);
+			p += 4;
+			isdst = *p++;
+			if (! (isdst < 2))
+			  return EINVAL;
+			ttisp->tt_isdst = isdst;
+			desigidx = *p++;
+			if (! (desigidx < sp->charcnt))
+			  return EINVAL;
+			ttisp->tt_desigidx = desigidx;
+		}
+		for (i = 0; i < sp->charcnt; ++i)
+			sp->chars[i] = *p++;
+		/* Ensure '\0'-terminated, and make it safe to call
+		   ttunspecified later.  */
+		memset(&sp->chars[i], 0, CHARS_EXTRA);
+
+		/* Read leap seconds, discarding those out of time_t range.  */
+		leapcnt = 0;
+		for (i = 0; i < sp->leapcnt; ++i) {
+		  int_fast64_t tr = stored == 4 ? detzcode(p) : detzcode64(p);
+		  int_fast32_t corr = detzcode(p + stored);
+		  p += stored + 4;
+
+		  /* Leap seconds cannot occur before the Epoch,
+		     or out of order.  */
+		  if (tr <= prevtr)
+		    return EINVAL;
+
+		  /* To avoid other botches in this code, each leap second's
+		     correction must differ from the previous one's by 1
+		     second or less, except that the first correction can be
+		     any value; these requirements are more generous than
+		     RFC 8536, to allow future RFC extensions.  */
+		  if (! (i == 0
+			 || (prevcorr < corr
+			     ? corr == prevcorr + 1
+			     : (corr == prevcorr
+				|| corr == prevcorr - 1))))
+		    return EINVAL;
+		  prevtr = tr;
+		  prevcorr = corr;
+
+		  if (tr <= TIME_T_MAX) {
+		    sp->lsis[leapcnt].ls_trans = tr;
+		    sp->lsis[leapcnt].ls_corr = corr;
+		    leapcnt++;
+		  }
+		}
+		sp->leapcnt = leapcnt;
+
+		for (i = 0; i < sp->typecnt; ++i) {
+			register struct ttinfo *	ttisp;
+
+			ttisp = &sp->ttis[i];
+			if (ttisstdcnt == 0)
+				ttisp->tt_ttisstd = false;
+			else {
+				if (*p != true && *p != false)
+				  return EINVAL;
+				ttisp->tt_ttisstd = *p++;
+			}
+		}
+		for (i = 0; i < sp->typecnt; ++i) {
+			register struct ttinfo *	ttisp;
+
+			ttisp = &sp->ttis[i];
+			if (ttisutcnt == 0)
+				ttisp->tt_ttisut = false;
+			else {
+				if (*p != true && *p != false)
+						return EINVAL;
+				ttisp->tt_ttisut = *p++;
+			}
+		}
+	    }
+
+	    nread -= p - up->buf;
+	    memmove(up->buf, p, nread);
+
+	    /* If this is an old file, we're done.  */
+	    if (!version)
+	      break;
+	}
+	if (doextend && nread > 2 &&
+		up->buf[0] == '\n' && up->buf[nread - 1] == '\n' &&
+		sp->typecnt + 2 <= TZ_MAX_TYPES) {
+			struct state	*ts = &lsp->u.st;
+
+			up->buf[nread - 1] = '\0';
+			if (tzparse(&up->buf[1], ts, sp)) {
+
+			  /* Attempt to reuse existing abbreviations.
+			     Without this, America/Anchorage would be right on
+			     the edge after 2037 when TZ_MAX_CHARS is 50, as
+			     sp->charcnt equals 40 (for LMT AST AWT APT AHST
+			     AHDT YST AKDT AKST) and ts->charcnt equals 10
+			     (for AKST AKDT).  Reusing means sp->charcnt can
+			     stay 40 in this example.  */
+			  int gotabbr = 0;
+			  int charcnt = sp->charcnt;
+			  for (i = 0; i < ts->typecnt; i++) {
+			    char *tsabbr = ts->chars + ts->ttis[i].tt_desigidx;
+			    int j;
+			    for (j = 0; j < charcnt; j++)
+			      if (strcmp(sp->chars + j, tsabbr) == 0) {
+				ts->ttis[i].tt_desigidx = j;
+				gotabbr++;
+				break;
+			      }
+			    if (! (j < charcnt)) {
+			      int tsabbrlen = strlen(tsabbr);
+			      if (j + tsabbrlen < TZ_MAX_CHARS) {
+				strcpy(sp->chars + j, tsabbr);
+				charcnt = j + tsabbrlen + 1;
+				ts->ttis[i].tt_desigidx = j;
+				gotabbr++;
+			      }
+			    }
+			  }
+			  if (gotabbr == ts->typecnt) {
+			    sp->charcnt = charcnt;
+
+			    /* Ignore any trailing, no-op transitions generated
+			       by zic as they don't help here and can run afoul
+			       of bugs in zic 2016j or earlier.  */
+			    while (1 < sp->timecnt
+				   && (sp->types[sp->timecnt - 1]
+				       == sp->types[sp->timecnt - 2]))
+			      sp->timecnt--;
+
+			    for (i = 0;
+				 i < ts->timecnt && sp->timecnt < TZ_MAX_TIMES;
+				 i++) {
+			      time_t t = ts->ats[i];
+			      if (increment_overflow_time(&t, leapcorr(sp, t))
+				  || (0 < sp->timecnt
+				      && t <= sp->ats[sp->timecnt - 1]))
+				continue;
+			      sp->ats[sp->timecnt] = t;
+			      sp->types[sp->timecnt] = (sp->typecnt
+							+ ts->types[i]);
+			      sp->timecnt++;
+			    }
+			    for (i = 0; i < ts->typecnt; i++)
+			      sp->ttis[sp->typecnt++] = ts->ttis[i];
+			  }
+			}
+	}
+	if (sp->typecnt == 0)
+	  return EINVAL;
+	if (sp->timecnt > 1) {
+	    if (sp->ats[0] <= TIME_T_MAX - SECSPERREPEAT) {
+		time_t repeatat = sp->ats[0] + SECSPERREPEAT;
+		int repeattype = sp->types[0];
+		for (i = 1; i < sp->timecnt; ++i)
+		  if (sp->ats[i] == repeatat
+		      && typesequiv(sp, sp->types[i], repeattype)) {
+					sp->goback = true;
+					break;
+		  }
+	    }
+	    if (TIME_T_MIN + SECSPERREPEAT <= sp->ats[sp->timecnt - 1]) {
+		time_t repeatat = sp->ats[sp->timecnt - 1] - SECSPERREPEAT;
+		int repeattype = sp->types[sp->timecnt - 1];
+		for (i = sp->timecnt - 2; i >= 0; --i)
+		  if (sp->ats[i] == repeatat
+		      && typesequiv(sp, sp->types[i], repeattype)) {
+					sp->goahead = true;
+					break;
+		  }
+	    }
+	}
+
+	/* Infer sp->defaulttype from the data.  Although this default
+	   type is always zero for data from recent tzdb releases,
+	   things are trickier for data from tzdb 2018e or earlier.
+
+	   The first set of heuristics work around bugs in 32-bit data
+	   generated by tzdb 2013c or earlier.  The workaround is for
+	   zones like Australia/Macquarie where timestamps before the
+	   first transition have a time type that is not the earliest
+	   standard-time type.  See:
+	   https://mm.icann.org/pipermail/tz/2013-May/019368.html */
+	/*
+	** If type 0 does not specify local time, or is unused in transitions,
+	** it's the type to use for early times.
+	*/
+	for (i = 0; i < sp->timecnt; ++i)
+		if (sp->types[i] == 0)
+			break;
+	i = i < sp->timecnt && ! ttunspecified(sp, 0) ? -1 : 0;
+	/*
+	** Absent the above,
+	** if there are transition times
+	** and the first transition is to a daylight time
+	** find the standard type less than and closest to
+	** the type of the first transition.
+	*/
+	if (i < 0 && sp->timecnt > 0 && sp->ttis[sp->types[0]].tt_isdst) {
+		i = sp->types[0];
+		while (--i >= 0)
+			if (!sp->ttis[i].tt_isdst)
+				break;
+	}
+	/* The next heuristics are for data generated by tzdb 2018e or
+	   earlier, for zones like EST5EDT where the first transition
+	   is to DST.  */
+	/*
+	** If no result yet, find the first standard type.
+	** If there is none, punt to type zero.
+	*/
+	if (i < 0) {
+		i = 0;
+		while (sp->ttis[i].tt_isdst)
+			if (++i >= sp->typecnt) {
+				i = 0;
+				break;
+			}
+	}
+	/* A simple 'sp->defaulttype = 0;' would suffice here if we
+	   didn't have to worry about 2018e-or-earlier data.  Even
+	   simpler would be to remove the defaulttype member and just
+	   use 0 in its place.  */
+	sp->defaulttype = i;
+
+	return 0;
+}
+
+/* Load tz data from the file named NAME into *SP.  Read extended
+   format if DOEXTEND.  Return 0 on success, an errno value on failure.  */
+static int
+tzload(char const *name, struct state *sp, bool doextend)
+{
+#ifdef ALL_STATE
+  union local_storage *lsp = tz_malloc(sizeof *lsp);
+  if (!lsp) {
+    return HAVE_MALLOC_ERRNO ? errno : ENOMEM;
+  } else {
+    int err = tzloadbody(name, sp, doextend, lsp);
+    tz_free(lsp);
+    return err;
+  }
+#else
+  union local_storage ls;
+  return tzloadbody(name, sp, doextend, &ls);
+#endif
+}
+
+static bool
+typesequiv(const struct state *sp, int a, int b)
+{
+	register bool result;
+
+	if (sp == NULL ||
+		a < 0 || a >= sp->typecnt ||
+		b < 0 || b >= sp->typecnt)
+			result = false;
+	else {
+		/* Compare the relevant members of *AP and *BP.
+		   Ignore tt_ttisstd and tt_ttisut, as they are
+		   irrelevant now and counting them could cause
+		   sp->goahead to mistakenly remain false.  */
+		register const struct ttinfo *	ap = &sp->ttis[a];
+		register const struct ttinfo *	bp = &sp->ttis[b];
+		result = (ap->tt_utoff == bp->tt_utoff
+			  && ap->tt_isdst == bp->tt_isdst
+			  && (strcmp(&sp->chars[ap->tt_desigidx],
+				     &sp->chars[bp->tt_desigidx])
+			      == 0));
+	}
+	return result;
+}
+
+static const int	mon_lengths[2][MONSPERYEAR] = {
+	{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
+	{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
+};
+
+static const int	year_lengths[2] = {
+	DAYSPERNYEAR, DAYSPERLYEAR
+};
+
+/* Is C an ASCII digit?  */
+static bool
+is_digit(char c)
+{
+  return '0' <= c && c <= '9';
+}
+
+/*
+** Given a pointer into a timezone string, scan until a character that is not
+** a valid character in a time zone abbreviation is found.
+** Return a pointer to that character.
+*/
+
+ATTRIBUTE_REPRODUCIBLE static const char *
+getzname(register const char *strp)
+{
+	register char	c;
+
+	while ((c = *strp) != '\0' && !is_digit(c) && c != ',' && c != '-' &&
+		c != '+')
+			++strp;
+	return strp;
+}
+
+/*
+** Given a pointer into an extended timezone string, scan until the ending
+** delimiter of the time zone abbreviation is located.
+** Return a pointer to the delimiter.
+**
+** As with getzname above, the legal character set is actually quite
+** restricted, with other characters producing undefined results.
+** We don't do any checking here; checking is done later in common-case code.
+*/
+
+ATTRIBUTE_REPRODUCIBLE static const char *
+getqzname(register const char *strp, const int delim)
+{
+	register int	c;
+
+	while ((c = *strp) != '\0' && c != delim)
+		++strp;
+	return strp;
+}
+
+/*
+** Given a pointer into a timezone string, extract a number from that string.
+** Check that the number is within a specified range; if it is not, return
+** NULL.
+** Otherwise, return a pointer to the first character not part of the number.
+*/
+
+static const char *
+getnum(register const char *strp, int *const nump, const int min, const int max)
+{
+	register char	c;
+	register int	num;
+
+	if (strp == NULL || !is_digit(c = *strp))
+		return NULL;
+	num = 0;
+	do {
+		num = num * 10 + (c - '0');
+		if (num > max)
+			return NULL;	/* illegal value */
+		c = *++strp;
+	} while (is_digit(c));
+	if (num < min)
+		return NULL;		/* illegal value */
+	*nump = num;
+	return strp;
+}
+
+/*
+** Given a pointer into a timezone string, extract a number of seconds,
+** in hh[:mm[:ss]] form, from the string.
+** If any error occurs, return NULL.
+** Otherwise, return a pointer to the first character not part of the number
+** of seconds.
+*/
+
+static const char *
+getsecs(register const char *strp, int_fast32_t *const secsp)
+{
+	int	num;
+	int_fast32_t secsperhour = SECSPERHOUR;
+
+	/*
+	** 'HOURSPERDAY * DAYSPERWEEK - 1' allows quasi-Posix rules like
+	** "M10.4.6/26", which does not conform to Posix,
+	** but which specifies the equivalent of
+	** "02:00 on the first Sunday on or after 23 Oct".
+	*/
+	strp = getnum(strp, &num, 0, HOURSPERDAY * DAYSPERWEEK - 1);
+	if (strp == NULL)
+		return NULL;
+	*secsp = num * secsperhour;
+	if (*strp == ':') {
+		++strp;
+		strp = getnum(strp, &num, 0, MINSPERHOUR - 1);
+		if (strp == NULL)
+			return NULL;
+		*secsp += num * SECSPERMIN;
+		if (*strp == ':') {
+			++strp;
+			/* 'SECSPERMIN' allows for leap seconds.  */
+			strp = getnum(strp, &num, 0, SECSPERMIN);
+			if (strp == NULL)
+				return NULL;
+			*secsp += num;
+		}
+	}
+	return strp;
+}
+
+/*
+** Given a pointer into a timezone string, extract an offset, in
+** [+-]hh[:mm[:ss]] form, from the string.
+** If any error occurs, return NULL.
+** Otherwise, return a pointer to the first character not part of the time.
+*/
+
+static const char *
+getoffset(register const char *strp, int_fast32_t *const offsetp)
+{
+	register bool neg = false;
+
+	if (*strp == '-') {
+		neg = true;
+		++strp;
+	} else if (*strp == '+')
+		++strp;
+	strp = getsecs(strp, offsetp);
+	if (strp == NULL)
+		return NULL;		/* illegal time */
+	if (neg)
+		*offsetp = -*offsetp;
+	return strp;
+}
+
+/*
+** Given a pointer into a timezone string, extract a rule in the form
+** date[/time]. See POSIX section 8 for the format of "date" and "time".
+** If a valid rule is not found, return NULL.
+** Otherwise, return a pointer to the first character not part of the rule.
+*/
+
+static const char *
+getrule(const char *strp, register struct rule *const rulep)
+{
+	if (*strp == 'J') {
+		/*
+		** Julian day.
+		*/
+		rulep->r_type = JULIAN_DAY;
+		++strp;
+		strp = getnum(strp, &rulep->r_day, 1, DAYSPERNYEAR);
+	} else if (*strp == 'M') {
+		/*
+		** Month, week, day.
+		*/
+		rulep->r_type = MONTH_NTH_DAY_OF_WEEK;
+		++strp;
+		strp = getnum(strp, &rulep->r_mon, 1, MONSPERYEAR);
+		if (strp == NULL)
+			return NULL;
+		if (*strp++ != '.')
+			return NULL;
+		strp = getnum(strp, &rulep->r_week, 1, 5);
+		if (strp == NULL)
+			return NULL;
+		if (*strp++ != '.')
+			return NULL;
+		strp = getnum(strp, &rulep->r_day, 0, DAYSPERWEEK - 1);
+	} else if (is_digit(*strp)) {
+		/*
+		** Day of year.
+		*/
+		rulep->r_type = DAY_OF_YEAR;
+		strp = getnum(strp, &rulep->r_day, 0, DAYSPERLYEAR - 1);
+	} else	return NULL;		/* invalid format */
+	if (strp == NULL)
+		return NULL;
+	if (*strp == '/') {
+		/*
+		** Time specified.
+		*/
+		++strp;
+		strp = getoffset(strp, &rulep->r_time);
+	} else	rulep->r_time = 2 * SECSPERHOUR;	/* default = 2:00:00 */
+	return strp;
+}
+
+/*
+** Given a year, a rule, and the offset from UT at the time that rule takes
+** effect, calculate the year-relative time that rule takes effect.
+*/
+
+static int_fast32_t
+transtime(const int year, register const struct rule *const rulep,
+	  const int_fast32_t offset)
+{
+	register bool	leapyear;
+	register int_fast32_t value;
+	register int	i;
+	int		d, m1, yy0, yy1, yy2, dow;
+
+	leapyear = isleap(year);
+	switch (rulep->r_type) {
+
+	case JULIAN_DAY:
+		/*
+		** Jn - Julian day, 1 == January 1, 60 == March 1 even in leap
+		** years.
+		** In non-leap years, or if the day number is 59 or less, just
+		** add SECSPERDAY times the day number-1 to the time of
+		** January 1, midnight, to get the day.
+		*/
+		value = (rulep->r_day - 1) * SECSPERDAY;
+		if (leapyear && rulep->r_day >= 60)
+			value += SECSPERDAY;
+		break;
+
+	case DAY_OF_YEAR:
+		/*
+		** n - day of year.
+		** Just add SECSPERDAY times the day number to the time of
+		** January 1, midnight, to get the day.
+		*/
+		value = rulep->r_day * SECSPERDAY;
+		break;
+
+	case MONTH_NTH_DAY_OF_WEEK:
+		/*
+		** Mm.n.d - nth "dth day" of month m.
+		*/
+
+		/*
+		** Use Zeller's Congruence to get day-of-week of first day of
+		** month.
+		*/
+		m1 = (rulep->r_mon + 9) % 12 + 1;
+		yy0 = (rulep->r_mon <= 2) ? (year - 1) : year;
+		yy1 = yy0 / 100;
+		yy2 = yy0 % 100;
+		dow = ((26 * m1 - 2) / 10 +
+			1 + yy2 + yy2 / 4 + yy1 / 4 - 2 * yy1) % 7;
+		if (dow < 0)
+			dow += DAYSPERWEEK;
+
+		/*
+		** "dow" is the day-of-week of the first day of the month. Get
+		** the day-of-month (zero-origin) of the first "dow" day of the
+		** month.
+		*/
+		d = rulep->r_day - dow;
+		if (d < 0)
+			d += DAYSPERWEEK;
+		for (i = 1; i < rulep->r_week; ++i) {
+			if (d + DAYSPERWEEK >=
+				mon_lengths[leapyear][rulep->r_mon - 1])
+					break;
+			d += DAYSPERWEEK;
+		}
+
+		/*
+		** "d" is the day-of-month (zero-origin) of the day we want.
+		*/
+		value = d * SECSPERDAY;
+		for (i = 0; i < rulep->r_mon - 1; ++i)
+			value += mon_lengths[leapyear][i] * SECSPERDAY;
+		break;
+
+	default: unreachable();
+	}
+
+	/*
+	** "value" is the year-relative time of 00:00:00 UT on the day in
+	** question. To get the year-relative time of the specified local
+	** time on that day, add the transition time and the current offset
+	** from UT.
+	*/
+	return value + rulep->r_time + offset;
+}
+
+/*
+** Given a POSIX section 8-style TZ string, fill in the rule tables as
+** appropriate.
+*/
+
+static bool
+tzparse(const char *name, struct state *sp, struct state *basep)
+{
+	const char *			stdname;
+	const char *			dstname;
+	int_fast32_t			stdoffset;
+	int_fast32_t			dstoffset;
+	register char *			cp;
+	register bool			load_ok;
+	ptrdiff_t stdlen, dstlen, charcnt;
+	time_t atlo = TIME_T_MIN, leaplo = TIME_T_MIN;
+
+	stdname = name;
+	if (*name == '<') {
+	  name++;
+	  stdname = name;
+	  name = getqzname(name, '>');
+	  if (*name != '>')
+	    return false;
+	  stdlen = name - stdname;
+	  name++;
+	} else {
+	  name = getzname(name);
+	  stdlen = name - stdname;
+	}
+	if (! (0 < stdlen && stdlen <= TZNAME_MAXIMUM))
+	  return false;
+	name = getoffset(name, &stdoffset);
+	if (name == NULL)
+	  return false;
+	charcnt = stdlen + 1;
+	if (basep) {
+	  if (0 < basep->timecnt)
+	    atlo = basep->ats[basep->timecnt - 1];
+	  load_ok = false;
+	  sp->leapcnt = basep->leapcnt;
+	  memcpy(sp->lsis, basep->lsis, sp->leapcnt * sizeof *sp->lsis);
+	} else {
+	  load_ok = tzload(TZDEFRULES, sp, false) == 0;
+	  if (!load_ok)
+	    sp->leapcnt = 0;	/* So, we're off a little.  */
+	}
+	if (0 < sp->leapcnt)
+	  leaplo = sp->lsis[sp->leapcnt - 1].ls_trans;
+	if (*name != '\0') {
+		if (*name == '<') {
+			dstname = ++name;
+			name = getqzname(name, '>');
+			if (*name != '>')
+			  return false;
+			dstlen = name - dstname;
+			name++;
+		} else {
+			dstname = name;
+			name = getzname(name);
+			dstlen = name - dstname; /* length of DST abbr. */
+		}
+		if (! (0 < dstlen && dstlen <= TZNAME_MAXIMUM))
+		  return false;
+		charcnt += dstlen + 1;
+		if (*name != '\0' && *name != ',' && *name != ';') {
+			name = getoffset(name, &dstoffset);
+			if (name == NULL)
+			  return false;
+		} else	dstoffset = stdoffset - SECSPERHOUR;
+		if (*name == '\0' && !load_ok)
+			name = TZDEFRULESTRING;
+		if (*name == ',' || *name == ';') {
+			struct rule	start;
+			struct rule	end;
+			register int	year;
+			register int	timecnt;
+			time_t		janfirst;
+			int_fast32_t janoffset = 0;
+			int yearbeg, yearlim;
+
+			++name;
+			if ((name = getrule(name, &start)) == NULL)
+			  return false;
+			if (*name++ != ',')
+			  return false;
+			if ((name = getrule(name, &end)) == NULL)
+			  return false;
+			if (*name != '\0')
+			  return false;
+			sp->typecnt = 2;	/* standard time and DST */
+			/*
+			** Two transitions per year, from EPOCH_YEAR forward.
+			*/
+			init_ttinfo(&sp->ttis[0], -stdoffset, false, 0);
+			init_ttinfo(&sp->ttis[1], -dstoffset, true, stdlen + 1);
+			sp->defaulttype = 0;
+			timecnt = 0;
+			janfirst = 0;
+			yearbeg = EPOCH_YEAR;
+
+			do {
+			  int_fast32_t yearsecs
+			    = year_lengths[isleap(yearbeg - 1)] * SECSPERDAY;
+			  yearbeg--;
+			  if (increment_overflow_time(&janfirst, -yearsecs)) {
+			    janoffset = -yearsecs;
+			    break;
+			  }
+			} while (atlo < janfirst
+				 && EPOCH_YEAR - YEARSPERREPEAT / 2 < yearbeg);
+
+			while (true) {
+			  int_fast32_t yearsecs
+			    = year_lengths[isleap(yearbeg)] * SECSPERDAY;
+			  int yearbeg1 = yearbeg;
+			  time_t janfirst1 = janfirst;
+			  if (increment_overflow_time(&janfirst1, yearsecs)
+			      || increment_overflow(&yearbeg1, 1)
+			      || atlo <= janfirst1)
+			    break;
+			  yearbeg = yearbeg1;
+			  janfirst = janfirst1;
+			}
+
+			yearlim = yearbeg;
+			if (increment_overflow(&yearlim, YEARSPERREPEAT + 1))
+			  yearlim = INT_MAX;
+			for (year = yearbeg; year < yearlim; year++) {
+				int_fast32_t
+				  starttime = transtime(year, &start, stdoffset),
+				  endtime = transtime(year, &end, dstoffset);
+				int_fast32_t
+				  yearsecs = (year_lengths[isleap(year)]
+					      * SECSPERDAY);
+				bool reversed = endtime < starttime;
+				if (reversed) {
+					int_fast32_t swap = starttime;
+					starttime = endtime;
+					endtime = swap;
+				}
+				if (reversed
+				    || (starttime < endtime
+					&& endtime - starttime < yearsecs)) {
+					if (TZ_MAX_TIMES - 2 < timecnt)
+						break;
+					sp->ats[timecnt] = janfirst;
+					if (! increment_overflow_time
+					    (&sp->ats[timecnt],
+					     janoffset + starttime)
+					    && atlo <= sp->ats[timecnt])
+					  sp->types[timecnt++] = !reversed;
+					sp->ats[timecnt] = janfirst;
+					if (! increment_overflow_time
+					    (&sp->ats[timecnt],
+					     janoffset + endtime)
+					    && atlo <= sp->ats[timecnt]) {
+					  sp->types[timecnt++] = reversed;
+					}
+				}
+				if (endtime < leaplo) {
+				  yearlim = year;
+				  if (increment_overflow(&yearlim,
+							 YEARSPERREPEAT + 1))
+				    yearlim = INT_MAX;
+				}
+				if (increment_overflow_time
+				    (&janfirst, janoffset + yearsecs))
+					break;
+				janoffset = 0;
+			}
+			sp->timecnt = timecnt;
+			if (! timecnt) {
+				sp->ttis[0] = sp->ttis[1];
+				sp->typecnt = 1;	/* Perpetual DST.  */
+			} else if (YEARSPERREPEAT < year - yearbeg)
+				sp->goback = sp->goahead = true;
+		} else {
+			register int_fast32_t	theirstdoffset;
+			register int_fast32_t	theirdstoffset;
+			register int_fast32_t	theiroffset;
+			register bool		isdst;
+			register int		i;
+			register int		j;
+
+			if (*name != '\0')
+			  return false;
+			/*
+			** Initial values of theirstdoffset and theirdstoffset.
+			*/
+			theirstdoffset = 0;
+			for (i = 0; i < sp->timecnt; ++i) {
+				j = sp->types[i];
+				if (!sp->ttis[j].tt_isdst) {
+					theirstdoffset =
+						- sp->ttis[j].tt_utoff;
+					break;
+				}
+			}
+			theirdstoffset = 0;
+			for (i = 0; i < sp->timecnt; ++i) {
+				j = sp->types[i];
+				if (sp->ttis[j].tt_isdst) {
+					theirdstoffset =
+						- sp->ttis[j].tt_utoff;
+					break;
+				}
+			}
+			/*
+			** Initially we're assumed to be in standard time.
+			*/
+			isdst = false;
+			/*
+			** Now juggle transition times and types
+			** tracking offsets as you do.
+			*/
+			for (i = 0; i < sp->timecnt; ++i) {
+				j = sp->types[i];
+				sp->types[i] = sp->ttis[j].tt_isdst;
+				if (sp->ttis[j].tt_ttisut) {
+					/* No adjustment to transition time */
+				} else {
+					/*
+					** If daylight saving time is in
+					** effect, and the transition time was
+					** not specified as standard time, add
+					** the daylight saving time offset to
+					** the transition time; otherwise, add
+					** the standard time offset to the
+					** transition time.
+					*/
+					/*
+					** Transitions from DST to DDST
+					** will effectively disappear since
+					** POSIX provides for only one DST
+					** offset.
+					*/
+					if (isdst && !sp->ttis[j].tt_ttisstd) {
+						sp->ats[i] += dstoffset -
+							theirdstoffset;
+					} else {
+						sp->ats[i] += stdoffset -
+							theirstdoffset;
+					}
+				}
+				theiroffset = -sp->ttis[j].tt_utoff;
+				if (sp->ttis[j].tt_isdst)
+					theirdstoffset = theiroffset;
+				else	theirstdoffset = theiroffset;
+			}
+			/*
+			** Finally, fill in ttis.
+			*/
+			init_ttinfo(&sp->ttis[0], -stdoffset, false, 0);
+			init_ttinfo(&sp->ttis[1], -dstoffset, true, stdlen + 1);
+			sp->typecnt = 2;
+			sp->defaulttype = 0;
+		}
+	} else {
+		dstlen = 0;
+		sp->typecnt = 1;		/* only standard time */
+		sp->timecnt = 0;
+		init_ttinfo(&sp->ttis[0], -stdoffset, false, 0);
+		sp->defaulttype = 0;
+	}
+	sp->charcnt = charcnt;
+	cp = sp->chars;
+	memcpy(cp, stdname, stdlen);
+	cp += stdlen;
+	*cp++ = '\0';
+	if (dstlen != 0) {
+		memcpy(cp, dstname, dstlen);
+		*(cp + dstlen) = '\0';
+	}
+	return true;
+}
+
+static void
+gmtload(struct state *const sp)
+{
+	if (tzload(etc_utc, sp, true) != 0)
+	  tzparse("UTC0", sp, NULL);
+}
+
+/* Initialize *SP to a value appropriate for the TZ setting NAME.
+   Return 0 on success, an errno value on failure.  */
+static int
+zoneinit(struct state *sp, char const *name)
+{
+  if (name && ! name[0]) {
+    /*
+    ** User wants it fast rather than right.
+    */
+    sp->leapcnt = 0;		/* so, we're off a little */
+    sp->timecnt = 0;
+    sp->typecnt = 0;
+    sp->charcnt = 0;
+    sp->goback = sp->goahead = false;
+    init_ttinfo(&sp->ttis[0], 0, false, 0);
+    strcpy(sp->chars, utc);
+    sp->defaulttype = 0;
+    return 0;
+  } else {
+    int err = tzload(name, sp, true);
+    if (err != 0 && name && name[0] != ':' && tzparse(name, sp, NULL))
+      err = 0;
+    if (err == 0)
+      err = scrub_abbrs(sp);
+    return err;
+  }
+}
+
+static void
+tzset_unlocked(void)
+{
+  char const *name = getenv("TZ");
+  struct state *sp = lclptr;
+  int lcl = name ? strlen(name) < sizeof lcl_TZname : -1;
+  if (lcl < 0
+      ? lcl_is_set < 0
+      : 0 < lcl_is_set && strcmp(lcl_TZname, name) == 0)
+    return;
+#ifdef ALL_STATE
+  if (! sp)
+    lclptr = sp = tz_malloc(sizeof *lclptr);
+#endif /* defined ALL_STATE */
+  if (sp) {
+    if (zoneinit(sp, name) != 0)
+      zoneinit(sp, "");
+    if (0 < lcl)
+      strcpy(lcl_TZname, name);
+  }
+  settzname();
+  lcl_is_set = lcl;
+}
+
+void
+tzset(void)
+{
+  if (lock() != 0)
+    return;
+  tzset_unlocked();
+  unlock();
+}
+
+static void
+gmtcheck(void)
+{
+  static bool gmt_is_set;
+  if (lock() != 0)
+    return;
+  if (! gmt_is_set) {
+#ifdef ALL_STATE
+    gmtptr = tz_malloc(sizeof *gmtptr);
+#endif
+    if (gmtptr)
+      gmtload(gmtptr);
+    gmt_is_set = true;
+  }
+  unlock();
+}
+
+#if NETBSD_INSPIRED
+
+/* Patch to allow malloc and free to be overridden
+ */
+void *(*tz_malloc)(size_t size) = malloc;
+void  (*tz_free)(void *ptr)     = free;
+
+timezone_t
+tzalloc(char const *name)
+{
+  timezone_t sp = tz_malloc(sizeof *sp);
+  if (sp) {
+    int err = zoneinit(sp, name);
+    if (err != 0) {
+      tz_free(sp);
+      errno = err;
+      return NULL;
+    }
+  } else if (!HAVE_MALLOC_ERRNO)
+    errno = ENOMEM;
+  return sp;
+}
+
+void
+tzfree(timezone_t sp)
+{
+  tz_free(sp);
+}
+
+/*
+** NetBSD 6.1.4 has ctime_rz, but omit it because POSIX says ctime and
+** ctime_r are obsolescent and have potential security problems that
+** ctime_rz would share.  Callers can instead use localtime_rz + strftime.
+**
+** NetBSD 6.1.4 has tzgetname, but omit it because it doesn't work
+** in zones with three or more time zone abbreviations.
+** Callers can instead use localtime_rz + strftime.
+*/
+
+#endif
+
+/*
+** The easy way to behave "as if no library function calls" localtime
+** is to not call it, so we drop its guts into "localsub", which can be
+** freely called. (And no, the PANS doesn't require the above behavior,
+** but it *is* desirable.)
+**
+** If successful and SETNAME is nonzero,
+** set the applicable parts of tzname, timezone and altzone;
+** however, it's OK to omit this step if the timezone is POSIX-compatible,
+** since in that case tzset should have already done this step correctly.
+** SETNAME's type is int_fast32_t for compatibility with gmtsub,
+** but it is actually a boolean and its value should be 0 or 1.
+*/
+
+/*ARGSUSED*/
+static struct tm *
+localsub(struct state const *sp, time_t const *timep, int_fast32_t setname,
+	 struct tm *const tmp)
+{
+	register const struct ttinfo *	ttisp;
+	register int			i;
+	register struct tm *		result;
+	const time_t			t = *timep;
+
+	if (sp == NULL) {
+	  /* Don't bother to set tzname etc.; tzset has already done it.  */
+	  return gmtsub(gmtptr, timep, 0, tmp);
+	}
+	if ((sp->goback && t < sp->ats[0]) ||
+		(sp->goahead && t > sp->ats[sp->timecnt - 1])) {
+			time_t newt;
+			register time_t		seconds;
+			register time_t		years;
+
+			if (t < sp->ats[0])
+				seconds = sp->ats[0] - t;
+			else	seconds = t - sp->ats[sp->timecnt - 1];
+			--seconds;
+
+			/* Beware integer overflow, as SECONDS might
+			   be close to the maximum time_t.  */
+			years = seconds / SECSPERREPEAT * YEARSPERREPEAT;
+			seconds = years * AVGSECSPERYEAR;
+			years += YEARSPERREPEAT;
+			if (t < sp->ats[0])
+			  newt = t + seconds + SECSPERREPEAT;
+			else
+			  newt = t - seconds - SECSPERREPEAT;
+
+			if (newt < sp->ats[0] ||
+				newt > sp->ats[sp->timecnt - 1])
+					return NULL;	/* "cannot happen" */
+			result = localsub(sp, &newt, setname, tmp);
+			if (result) {
+#if defined ckd_add && defined ckd_sub
+				if (t < sp->ats[0]
+				    ? ckd_sub(&result->tm_year,
+					      result->tm_year, years)
+				    : ckd_add(&result->tm_year,
+					      result->tm_year, years))
+				  return NULL;
+#else
+				register int_fast64_t newy;
+
+				newy = result->tm_year;
+				if (t < sp->ats[0])
+					newy -= years;
+				else	newy += years;
+				if (! (INT_MIN <= newy && newy <= INT_MAX))
+					return NULL;
+				result->tm_year = newy;
+#endif
+			}
+			return result;
+	}
+	if (sp->timecnt == 0 || t < sp->ats[0]) {
+		i = sp->defaulttype;
+	} else {
+		register int	lo = 1;
+		register int	hi = sp->timecnt;
+
+		while (lo < hi) {
+			register int	mid = (lo + hi) >> 1;
+
+			if (t < sp->ats[mid])
+				hi = mid;
+			else	lo = mid + 1;
+		}
+		i = sp->types[lo - 1];
+	}
+	ttisp = &sp->ttis[i];
+	/*
+	** To get (wrong) behavior that's compatible with System V Release 2.0
+	** you'd replace the statement below with
+	**	t += ttisp->tt_utoff;
+	**	timesub(&t, 0L, sp, tmp);
+	*/
+	result = timesub(&t, ttisp->tt_utoff, sp, tmp);
+	if (result) {
+	  result->tm_isdst = ttisp->tt_isdst;
+#ifdef TM_ZONE
+	  result->TM_ZONE = (char *) &sp->chars[ttisp->tt_desigidx];
+#endif /* defined TM_ZONE */
+	  if (setname)
+	    update_tzname_etc(sp, ttisp);
+	}
+	return result;
+}
+
+#if NETBSD_INSPIRED
+
+struct tm *
+localtime_rz(struct state *restrict sp, time_t const *restrict timep,
+	     struct tm *restrict tmp)
+{
+  return localsub(sp, timep, 0, tmp);
+}
+
+#endif
+
+static struct tm *
+localtime_tzset(time_t const *timep, struct tm *tmp, bool setname)
+{
+  int err = lock();
+  if (err) {
+    errno = err;
+    return NULL;
+  }
+  if (setname || !lcl_is_set)
+    tzset_unlocked();
+  tmp = localsub(lclptr, timep, setname, tmp);
+  unlock();
+  return tmp;
+}
+
+struct tm *
+localtime(const time_t *timep)
+{
+#if !SUPPORT_C89
+  static struct tm tm;
+#endif
+  return localtime_tzset(timep, &tm, true);
+}
+
+struct tm *
+localtime_r(const time_t *restrict timep, struct tm *restrict tmp)
+{
+  return localtime_tzset(timep, tmp, false);
+}
+
+/*
+** gmtsub is to gmtime as localsub is to localtime.
+*/
+
+static struct tm *
+gmtsub(ATTRIBUTE_MAYBE_UNUSED struct state const *sp, time_t const *timep,
+       int_fast32_t offset, struct tm *tmp)
+{
+	register struct tm *	result;
+
+	result = timesub(timep, offset, gmtptr, tmp);
+#ifdef TM_ZONE
+	/*
+	** Could get fancy here and deliver something such as
+	** "+xx" or "-xx" if offset is non-zero,
+	** but this is no time for a treasure hunt.
+	*/
+	tmp->TM_ZONE = ((char *)
+			(offset ? wildabbr : gmtptr ? gmtptr->chars : utc));
+#endif /* defined TM_ZONE */
+	return result;
+}
+
+/*
+* Re-entrant version of gmtime.
+*/
+
+struct tm *
+gmtime_r(time_t const *restrict timep, struct tm *restrict tmp)
+{
+  gmtcheck();
+  return gmtsub(gmtptr, timep, 0, tmp);
+}
+
+struct tm *
+gmtime(const time_t *timep)
+{
+#if !SUPPORT_C89
+  static struct tm tm;
+#endif
+  return gmtime_r(timep, &tm);
+}
+
+#if STD_INSPIRED
+
+struct tm *
+offtime(const time_t *timep, long offset)
+{
+  gmtcheck();
+
+#if !SUPPORT_C89
+  static struct tm tm;
+#endif
+  return gmtsub(gmtptr, timep, offset, &tm);
+}
+
+#endif
+
+/*
+** Return the number of leap years through the end of the given year
+** where, to make the math easy, the answer for year zero is defined as zero.
+*/
+
+static time_t
+leaps_thru_end_of_nonneg(time_t y)
+{
+  return y / 4 - y / 100 + y / 400;
+}
+
+static time_t
+leaps_thru_end_of(time_t y)
+{
+  return (y < 0
+	  ? -1 - leaps_thru_end_of_nonneg(-1 - y)
+	  : leaps_thru_end_of_nonneg(y));
+}
+
+static struct tm *
+timesub(const time_t *timep, int_fast32_t offset,
+	const struct state *sp, struct tm *tmp)
+{
+	register const struct lsinfo *	lp;
+	register time_t			tdays;
+	register const int *		ip;
+	register int_fast32_t		corr;
+	register int			i;
+	int_fast32_t idays, rem, dayoff, dayrem;
+	time_t y;
+
+	/* If less than SECSPERMIN, the number of seconds since the
+	   most recent positive leap second; otherwise, do not add 1
+	   to localtime tm_sec because of leap seconds.  */
+	time_t secs_since_posleap = SECSPERMIN;
+
+	corr = 0;
+	i = (sp == NULL) ? 0 : sp->leapcnt;
+	while (--i >= 0) {
+		lp = &sp->lsis[i];
+		if (*timep >= lp->ls_trans) {
+			corr = lp->ls_corr;
+			if ((i == 0 ? 0 : lp[-1].ls_corr) < corr)
+			  secs_since_posleap = *timep - lp->ls_trans;
+			break;
+		}
+	}
+
+	/* Calculate the year, avoiding integer overflow even if
+	   time_t is unsigned.  */
+	tdays = *timep / SECSPERDAY;
+	rem = *timep % SECSPERDAY;
+	rem += offset % SECSPERDAY - corr % SECSPERDAY + 3 * SECSPERDAY;
+	dayoff = offset / SECSPERDAY - corr / SECSPERDAY + rem / SECSPERDAY - 3;
+	rem %= SECSPERDAY;
+	/* y = (EPOCH_YEAR
+	        + floor((tdays + dayoff) / DAYSPERREPEAT) * YEARSPERREPEAT),
+	   sans overflow.  But calculate against 1570 (EPOCH_YEAR -
+	   YEARSPERREPEAT) instead of against 1970 so that things work
+	   for localtime values before 1970 when time_t is unsigned.  */
+	dayrem = tdays % DAYSPERREPEAT;
+	dayrem += dayoff % DAYSPERREPEAT;
+	y = (EPOCH_YEAR - YEARSPERREPEAT
+	     + ((1 + dayoff / DAYSPERREPEAT + dayrem / DAYSPERREPEAT
+		 - ((dayrem % DAYSPERREPEAT) < 0)
+		 + tdays / DAYSPERREPEAT)
+		* YEARSPERREPEAT));
+	/* idays = (tdays + dayoff) mod DAYSPERREPEAT, sans overflow.  */
+	idays = tdays % DAYSPERREPEAT;
+	idays += dayoff % DAYSPERREPEAT + 2 * DAYSPERREPEAT;
+	idays %= DAYSPERREPEAT;
+	/* Increase Y and decrease IDAYS until IDAYS is in range for Y.  */
+	while (year_lengths[isleap(y)] <= idays) {
+		int tdelta = idays / DAYSPERLYEAR;
+		int_fast32_t ydelta = tdelta + !tdelta;
+		time_t newy = y + ydelta;
+		register int	leapdays;
+		leapdays = leaps_thru_end_of(newy - 1) -
+			leaps_thru_end_of(y - 1);
+		idays -= ydelta * DAYSPERNYEAR;
+		idays -= leapdays;
+		y = newy;
+	}
+
+#ifdef ckd_add
+	if (ckd_add(&tmp->tm_year, y, -TM_YEAR_BASE)) {
+	  errno = EOVERFLOW;
+	  return NULL;
+	}
+#else
+	if (!TYPE_SIGNED(time_t) && y < TM_YEAR_BASE) {
+	  int signed_y = y;
+	  tmp->tm_year = signed_y - TM_YEAR_BASE;
+	} else if ((!TYPE_SIGNED(time_t) || INT_MIN + TM_YEAR_BASE <= y)
+		   && y - TM_YEAR_BASE <= INT_MAX)
+	  tmp->tm_year = y - TM_YEAR_BASE;
+	else {
+	  errno = EOVERFLOW;
+	  return NULL;
+	}
+#endif
+	tmp->tm_yday = idays;
+	/*
+	** The "extra" mods below avoid overflow problems.
+	*/
+	tmp->tm_wday = (TM_WDAY_BASE
+			+ ((tmp->tm_year % DAYSPERWEEK)
+			   * (DAYSPERNYEAR % DAYSPERWEEK))
+			+ leaps_thru_end_of(y - 1)
+			- leaps_thru_end_of(TM_YEAR_BASE - 1)
+			+ idays);
+	tmp->tm_wday %= DAYSPERWEEK;
+	if (tmp->tm_wday < 0)
+		tmp->tm_wday += DAYSPERWEEK;
+	tmp->tm_hour = rem / SECSPERHOUR;
+	rem %= SECSPERHOUR;
+	tmp->tm_min = rem / SECSPERMIN;
+	tmp->tm_sec = rem % SECSPERMIN;
+
+	/* Use "... ??:??:60" at the end of the localtime minute containing
+	   the second just before the positive leap second.  */
+	tmp->tm_sec += secs_since_posleap <= tmp->tm_sec;
+
+	ip = mon_lengths[isleap(y)];
+	for (tmp->tm_mon = 0; idays >= ip[tmp->tm_mon]; ++(tmp->tm_mon))
+		idays -= ip[tmp->tm_mon];
+	tmp->tm_mday = idays + 1;
+	tmp->tm_isdst = 0;
+#ifdef TM_GMTOFF
+	tmp->TM_GMTOFF = offset;
+#endif /* defined TM_GMTOFF */
+	return tmp;
+}
+
+/*
+** Adapted from code provided by Robert Elz, who writes:
+**	The "best" way to do mktime I think is based on an idea of Bob
+**	Kridle's (so its said...) from a long time ago.
+**	It does a binary search of the time_t space. Since time_t's are
+**	just 32 bits, its a max of 32 iterations (even at 64 bits it
+**	would still be very reasonable).
+*/
+
+#ifndef WRONG
+# define WRONG (-1)
+#endif /* !defined WRONG */
+
+/*
+** Normalize logic courtesy Paul Eggert.
+*/
+
+static bool
+increment_overflow(int *ip, int j)
+{
+#ifdef ckd_add
+	return ckd_add(ip, *ip, j);
+#else
+	register int const	i = *ip;
+
+	/*
+	** If i >= 0 there can only be overflow if i + j > INT_MAX
+	** or if j > INT_MAX - i; given i >= 0, INT_MAX - i cannot overflow.
+	** If i < 0 there can only be overflow if i + j < INT_MIN
+	** or if j < INT_MIN - i; given i < 0, INT_MIN - i cannot overflow.
+	*/
+	if ((i >= 0) ? (j > INT_MAX - i) : (j < INT_MIN - i))
+		return true;
+	*ip += j;
+	return false;
+#endif
+}
+
+static bool
+increment_overflow32(int_fast32_t *const lp, int const m)
+{
+#ifdef ckd_add
+	return ckd_add(lp, *lp, m);
+#else
+	register int_fast32_t const	l = *lp;
+
+	if ((l >= 0) ? (m > INT_FAST32_MAX - l) : (m < INT_FAST32_MIN - l))
+		return true;
+	*lp += m;
+	return false;
+#endif
+}
+
+static bool
+increment_overflow_time(time_t *tp, int_fast32_t j)
+{
+#ifdef ckd_add
+	return ckd_add(tp, *tp, j);
+#else
+	/*
+	** This is like
+	** 'if (! (TIME_T_MIN <= *tp + j && *tp + j <= TIME_T_MAX)) ...',
+	** except that it does the right thing even if *tp + j would overflow.
+	*/
+	if (! (j < 0
+	       ? (TYPE_SIGNED(time_t) ? TIME_T_MIN - j <= *tp : -1 - j < *tp)
+	       : *tp <= TIME_T_MAX - j))
+		return true;
+	*tp += j;
+	return false;
+#endif
+}
+
+static bool
+normalize_overflow(int *const tensptr, int *const unitsptr, const int base)
+{
+	register int	tensdelta;
+
+	tensdelta = (*unitsptr >= 0) ?
+		(*unitsptr / base) :
+		(-1 - (-1 - *unitsptr) / base);
+	*unitsptr -= tensdelta * base;
+	return increment_overflow(tensptr, tensdelta);
+}
+
+static bool
+normalize_overflow32(int_fast32_t *tensptr, int *unitsptr, int base)
+{
+	register int	tensdelta;
+
+	tensdelta = (*unitsptr >= 0) ?
+		(*unitsptr / base) :
+		(-1 - (-1 - *unitsptr) / base);
+	*unitsptr -= tensdelta * base;
+	return increment_overflow32(tensptr, tensdelta);
+}
+
+static int
+tmcomp(register const struct tm *const atmp,
+       register const struct tm *const btmp)
+{
+	register int	result;
+
+	if (atmp->tm_year != btmp->tm_year)
+		return atmp->tm_year < btmp->tm_year ? -1 : 1;
+	if ((result = (atmp->tm_mon - btmp->tm_mon)) == 0 &&
+		(result = (atmp->tm_mday - btmp->tm_mday)) == 0 &&
+		(result = (atmp->tm_hour - btmp->tm_hour)) == 0 &&
+		(result = (atmp->tm_min - btmp->tm_min)) == 0)
+			result = atmp->tm_sec - btmp->tm_sec;
+	return result;
+}
+
+/* Copy to *DEST from *SRC.  Copy only the members needed for mktime,
+   as other members might not be initialized.  */
+static void
+mktmcpy(struct tm *dest, struct tm const *src)
+{
+  dest->tm_sec = src->tm_sec;
+  dest->tm_min = src->tm_min;
+  dest->tm_hour = src->tm_hour;
+  dest->tm_mday = src->tm_mday;
+  dest->tm_mon = src->tm_mon;
+  dest->tm_year = src->tm_year;
+  dest->tm_isdst = src->tm_isdst;
+#if defined TM_GMTOFF && ! UNINIT_TRAP
+  dest->TM_GMTOFF = src->TM_GMTOFF;
+#endif
+}
+
+static time_t
+time2sub(struct tm *const tmp,
+	 struct tm *(*funcp)(struct state const *, time_t const *,
+			     int_fast32_t, struct tm *),
+	 struct state const *sp,
+	 const int_fast32_t offset,
+	 bool *okayp,
+	 bool do_norm_secs)
+{
+	register int			dir;
+	register int			i, j;
+	register int			saved_seconds;
+	register int_fast32_t		li;
+	register time_t			lo;
+	register time_t			hi;
+	int_fast32_t			y;
+	time_t				newt;
+	time_t				t;
+	struct tm			yourtm, mytm;
+
+	*okayp = false;
+	mktmcpy(&yourtm, tmp);
+
+	if (do_norm_secs) {
+		if (normalize_overflow(&yourtm.tm_min, &yourtm.tm_sec,
+			SECSPERMIN))
+				return WRONG;
+	}
+	if (normalize_overflow(&yourtm.tm_hour, &yourtm.tm_min, MINSPERHOUR))
+		return WRONG;
+	if (normalize_overflow(&yourtm.tm_mday, &yourtm.tm_hour, HOURSPERDAY))
+		return WRONG;
+	y = yourtm.tm_year;
+	if (normalize_overflow32(&y, &yourtm.tm_mon, MONSPERYEAR))
+		return WRONG;
+	/*
+	** Turn y into an actual year number for now.
+	** It is converted back to an offset from TM_YEAR_BASE later.
+	*/
+	if (increment_overflow32(&y, TM_YEAR_BASE))
+		return WRONG;
+	while (yourtm.tm_mday <= 0) {
+		if (increment_overflow32(&y, -1))
+			return WRONG;
+		li = y + (1 < yourtm.tm_mon);
+		yourtm.tm_mday += year_lengths[isleap(li)];
+	}
+	while (yourtm.tm_mday > DAYSPERLYEAR) {
+		li = y + (1 < yourtm.tm_mon);
+		yourtm.tm_mday -= year_lengths[isleap(li)];
+		if (increment_overflow32(&y, 1))
+			return WRONG;
+	}
+	for ( ; ; ) {
+		i = mon_lengths[isleap(y)][yourtm.tm_mon];
+		if (yourtm.tm_mday <= i)
+			break;
+		yourtm.tm_mday -= i;
+		if (++yourtm.tm_mon >= MONSPERYEAR) {
+			yourtm.tm_mon = 0;
+			if (increment_overflow32(&y, 1))
+				return WRONG;
+		}
+	}
+#ifdef ckd_add
+	if (ckd_add(&yourtm.tm_year, y, -TM_YEAR_BASE))
+	  return WRONG;
+#else
+	if (increment_overflow32(&y, -TM_YEAR_BASE))
+		return WRONG;
+	if (! (INT_MIN <= y && y <= INT_MAX))
+		return WRONG;
+	yourtm.tm_year = y;
+#endif
+	if (yourtm.tm_sec >= 0 && yourtm.tm_sec < SECSPERMIN)
+		saved_seconds = 0;
+	else if (yourtm.tm_year < EPOCH_YEAR - TM_YEAR_BASE) {
+		/*
+		** We can't set tm_sec to 0, because that might push the
+		** time below the minimum representable time.
+		** Set tm_sec to 59 instead.
+		** This assumes that the minimum representable time is
+		** not in the same minute that a leap second was deleted from,
+		** which is a safer assumption than using 58 would be.
+		*/
+		if (increment_overflow(&yourtm.tm_sec, 1 - SECSPERMIN))
+			return WRONG;
+		saved_seconds = yourtm.tm_sec;
+		yourtm.tm_sec = SECSPERMIN - 1;
+	} else {
+		saved_seconds = yourtm.tm_sec;
+		yourtm.tm_sec = 0;
+	}
+	/*
+	** Do a binary search (this works whatever time_t's type is).
+	*/
+	lo = TIME_T_MIN;
+	hi = TIME_T_MAX;
+	for ( ; ; ) {
+		t = lo / 2 + hi / 2;
+		if (t < lo)
+			t = lo;
+		else if (t > hi)
+			t = hi;
+		if (! funcp(sp, &t, offset, &mytm)) {
+			/*
+			** Assume that t is too extreme to be represented in
+			** a struct tm; arrange things so that it is less
+			** extreme on the next pass.
+			*/
+			dir = (t > 0) ? 1 : -1;
+		} else	dir = tmcomp(&mytm, &yourtm);
+		if (dir != 0) {
+			if (t == lo) {
+				if (t == TIME_T_MAX)
+					return WRONG;
+				++t;
+				++lo;
+			} else if (t == hi) {
+				if (t == TIME_T_MIN)
+					return WRONG;
+				--t;
+				--hi;
+			}
+			if (lo > hi)
+				return WRONG;
+			if (dir > 0)
+				hi = t;
+			else	lo = t;
+			continue;
+		}
+#if defined TM_GMTOFF && ! UNINIT_TRAP
+		if (mytm.TM_GMTOFF != yourtm.TM_GMTOFF
+		    && (yourtm.TM_GMTOFF < 0
+			? (-SECSPERDAY <= yourtm.TM_GMTOFF
+			   && (mytm.TM_GMTOFF <=
+			       (min(INT_FAST32_MAX, LONG_MAX)
+				+ yourtm.TM_GMTOFF)))
+			: (yourtm.TM_GMTOFF <= SECSPERDAY
+			   && ((max(INT_FAST32_MIN, LONG_MIN)
+				+ yourtm.TM_GMTOFF)
+			       <= mytm.TM_GMTOFF)))) {
+		  /* MYTM matches YOURTM except with the wrong UT offset.
+		     YOURTM.TM_GMTOFF is plausible, so try it instead.
+		     It's OK if YOURTM.TM_GMTOFF contains uninitialized data,
+		     since the guess gets checked.  */
+		  time_t altt = t;
+		  int_fast32_t diff = mytm.TM_GMTOFF - yourtm.TM_GMTOFF;
+		  if (!increment_overflow_time(&altt, diff)) {
+		    struct tm alttm;
+		    if (funcp(sp, &altt, offset, &alttm)
+			&& alttm.tm_isdst == mytm.tm_isdst
+			&& alttm.TM_GMTOFF == yourtm.TM_GMTOFF
+			&& tmcomp(&alttm, &yourtm) == 0) {
+		      t = altt;
+		      mytm = alttm;
+		    }
+		  }
+		}
+#endif
+		if (yourtm.tm_isdst < 0 || mytm.tm_isdst == yourtm.tm_isdst)
+			break;
+		/*
+		** Right time, wrong type.
+		** Hunt for right time, right type.
+		** It's okay to guess wrong since the guess
+		** gets checked.
+		*/
+		if (sp == NULL)
+			return WRONG;
+		for (i = sp->typecnt - 1; i >= 0; --i) {
+			if (sp->ttis[i].tt_isdst != yourtm.tm_isdst)
+				continue;
+			for (j = sp->typecnt - 1; j >= 0; --j) {
+				if (sp->ttis[j].tt_isdst == yourtm.tm_isdst)
+					continue;
+				if (ttunspecified(sp, j))
+				  continue;
+				newt = (t + sp->ttis[j].tt_utoff
+					- sp->ttis[i].tt_utoff);
+				if (! funcp(sp, &newt, offset, &mytm))
+					continue;
+				if (tmcomp(&mytm, &yourtm) != 0)
+					continue;
+				if (mytm.tm_isdst != yourtm.tm_isdst)
+					continue;
+				/*
+				** We have a match.
+				*/
+				t = newt;
+				goto label;
+			}
+		}
+		return WRONG;
+	}
+label:
+	newt = t + saved_seconds;
+	if ((newt < t) != (saved_seconds < 0))
+		return WRONG;
+	t = newt;
+	if (funcp(sp, &t, offset, tmp))
+		*okayp = true;
+	return t;
+}
+
+static time_t
+time2(struct tm * const	tmp,
+      struct tm *(*funcp)(struct state const *, time_t const *,
+			  int_fast32_t, struct tm *),
+      struct state const *sp,
+      const int_fast32_t offset,
+      bool *okayp)
+{
+	time_t	t;
+
+	/*
+	** First try without normalization of seconds
+	** (in case tm_sec contains a value associated with a leap second).
+	** If that fails, try with normalization of seconds.
+	*/
+	t = time2sub(tmp, funcp, sp, offset, okayp, false);
+	return *okayp ? t : time2sub(tmp, funcp, sp, offset, okayp, true);
+}
+
+static time_t
+time1(struct tm *const tmp,
+      struct tm *(*funcp)(struct state const *, time_t const *,
+			  int_fast32_t, struct tm *),
+      struct state const *sp,
+      const int_fast32_t offset)
+{
+	register time_t			t;
+	register int			samei, otheri;
+	register int			sameind, otherind;
+	register int			i;
+	register int			nseen;
+	char				seen[TZ_MAX_TYPES];
+	unsigned char			types[TZ_MAX_TYPES];
+	bool				okay;
+
+	if (tmp == NULL) {
+		errno = EINVAL;
+		return WRONG;
+	}
+	if (tmp->tm_isdst > 1)
+		tmp->tm_isdst = 1;
+	t = time2(tmp, funcp, sp, offset, &okay);
+	if (okay)
+		return t;
+	if (tmp->tm_isdst < 0)
+#ifdef PCTS
+		/*
+		** POSIX Conformance Test Suite code courtesy Grant Sullivan.
+		*/
+		tmp->tm_isdst = 0;	/* reset to std and try again */
+#else
+		return t;
+#endif /* !defined PCTS */
+	/*
+	** We're supposed to assume that somebody took a time of one type
+	** and did some math on it that yielded a "struct tm" that's bad.
+	** We try to divine the type they started from and adjust to the
+	** type they need.
+	*/
+	if (sp == NULL)
+		return WRONG;
+	for (i = 0; i < sp->typecnt; ++i)
+		seen[i] = false;
+	nseen = 0;
+	for (i = sp->timecnt - 1; i >= 0; --i)
+		if (!seen[sp->types[i]] && !ttunspecified(sp, sp->types[i])) {
+			seen[sp->types[i]] = true;
+			types[nseen++] = sp->types[i];
+		}
+	for (sameind = 0; sameind < nseen; ++sameind) {
+		samei = types[sameind];
+		if (sp->ttis[samei].tt_isdst != tmp->tm_isdst)
+			continue;
+		for (otherind = 0; otherind < nseen; ++otherind) {
+			otheri = types[otherind];
+			if (sp->ttis[otheri].tt_isdst == tmp->tm_isdst)
+				continue;
+			tmp->tm_sec += (sp->ttis[otheri].tt_utoff
+					- sp->ttis[samei].tt_utoff);
+			tmp->tm_isdst = !tmp->tm_isdst;
+			t = time2(tmp, funcp, sp, offset, &okay);
+			if (okay)
+				return t;
+			tmp->tm_sec -= (sp->ttis[otheri].tt_utoff
+					- sp->ttis[samei].tt_utoff);
+			tmp->tm_isdst = !tmp->tm_isdst;
+		}
+	}
+	return WRONG;
+}
+
+static time_t
+mktime_tzname(struct state *sp, struct tm *tmp, bool setname)
+{
+  if (sp)
+    return time1(tmp, localsub, sp, setname);
+  else {
+    gmtcheck();
+    return time1(tmp, gmtsub, gmtptr, 0);
+  }
+}
+
+#if NETBSD_INSPIRED
+
+time_t
+mktime_z(struct state *restrict sp, struct tm *restrict tmp)
+{
+  return mktime_tzname(sp, tmp, false);
+}
+
+#endif
+
+time_t
+mktime(struct tm *tmp)
+{
+  time_t t;
+  int err = lock();
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  tzset_unlocked();
+  t = mktime_tzname(lclptr, tmp, true);
+  unlock();
+  return t;
+}
+
+#if STD_INSPIRED
+time_t
+timelocal(struct tm *tmp)
+{
+	if (tmp != NULL)
+		tmp->tm_isdst = -1;	/* in case it wasn't initialized */
+	return mktime(tmp);
+}
+#else
+static
+#endif
+time_t
+timeoff(struct tm *tmp, long offset)
+{
+  if (tmp)
+    tmp->tm_isdst = 0;
+  gmtcheck();
+  return time1(tmp, gmtsub, gmtptr, offset);
+}
+
+time_t
+timegm(struct tm *tmp)
+{
+  time_t t;
+  struct tm tmcpy;
+  mktmcpy(&tmcpy, tmp);
+  tmcpy.tm_wday = -1;
+  t = timeoff(&tmcpy, 0);
+  if (0 <= tmcpy.tm_wday)
+    *tmp = tmcpy;
+  return t;
+}
+
+static int_fast32_t
+leapcorr(struct state const *sp, time_t t)
+{
+	register struct lsinfo const *	lp;
+	register int			i;
+
+	i = sp->leapcnt;
+	while (--i >= 0) {
+		lp = &sp->lsis[i];
+		if (t >= lp->ls_trans)
+			return lp->ls_corr;
+	}
+	return 0;
+}
+
+/*
+** XXX--is the below the right way to conditionalize??
+*/
+
+#if STD_INSPIRED
+
+/* NETBSD_INSPIRED_EXTERN functions are exported to callers if
+   NETBSD_INSPIRED is defined, and are private otherwise.  */
+# if NETBSD_INSPIRED
+#  define NETBSD_INSPIRED_EXTERN
+# else
+#  define NETBSD_INSPIRED_EXTERN static
+# endif
+
+/*
+** IEEE Std 1003.1 (POSIX) says that 536457599
+** shall correspond to "Wed Dec 31 23:59:59 UTC 1986", which
+** is not the case if we are accounting for leap seconds.
+** So, we provide the following conversion routines for use
+** when exchanging timestamps with POSIX conforming systems.
+*/
+
+NETBSD_INSPIRED_EXTERN time_t
+time2posix_z(struct state *sp, time_t t)
+{
+  return t - leapcorr(sp, t);
+}
+
+time_t
+time2posix(time_t t)
+{
+  int err = lock();
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  if (!lcl_is_set)
+    tzset_unlocked();
+  if (lclptr)
+    t = time2posix_z(lclptr, t);
+  unlock();
+  return t;
+}
+
+NETBSD_INSPIRED_EXTERN time_t
+posix2time_z(struct state *sp, time_t t)
+{
+	time_t	x;
+	time_t	y;
+	/*
+	** For a positive leap second hit, the result
+	** is not unique. For a negative leap second
+	** hit, the corresponding time doesn't exist,
+	** so we return an adjacent second.
+	*/
+	x = t + leapcorr(sp, t);
+	y = x - leapcorr(sp, x);
+	if (y < t) {
+		do {
+			x++;
+			y = x - leapcorr(sp, x);
+		} while (y < t);
+		x -= y != t;
+	} else if (y > t) {
+		do {
+			--x;
+			y = x - leapcorr(sp, x);
+		} while (y > t);
+		x += y != t;
+	}
+	return x;
+}
+
+time_t
+posix2time(time_t t)
+{
+  int err = lock();
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  if (!lcl_is_set)
+    tzset_unlocked();
+  if (lclptr)
+    t = posix2time_z(lclptr, t);
+  unlock();
+  return t;
+}
+
+#endif /* STD_INSPIRED */
+
+#if TZ_TIME_T
+
+# if !USG_COMPAT
+#  define daylight 0
+#  define timezone 0
+# endif
+# if !ALTZONE
+#  define altzone 0
+# endif
+
+/* Convert from the underlying system's time_t to the ersatz time_tz,
+   which is called 'time_t' in this file.  Typically, this merely
+   converts the time's integer width.  On some platforms, the system
+   time is local time not UT, or uses some epoch other than the POSIX
+   epoch.
+
+   Although this code appears to define a function named 'time' that
+   returns time_t, the macros in private.h cause this code to actually
+   define a function named 'tz_time' that returns tz_time_t.  The call
+   to sys_time invokes the underlying system's 'time' function.  */
+
+time_t
+time(time_t *p)
+{
+  time_t r = sys_time(0);
+  if (r != (time_t) -1) {
+    int_fast32_t offset = EPOCH_LOCAL ? (daylight ? timezone : altzone) : 0;
+    if (increment_overflow32(&offset, -EPOCH_OFFSET)
+	|| increment_overflow_time(&r, offset)) {
+      errno = EOVERFLOW;
+      r = -1;
+    }
+  }
+  if (p)
+    *p = r;
+  return r;
+}
+
+#endif
diff --git a/lib-tzcode/newctime.3 b/lib-tzcode/newctime.3
new file mode 100644
index 0000000..05bb7de
--- /dev/null
+++ b/lib-tzcode/newctime.3
@@ -0,0 +1,358 @@
+.\" This file is in the public domain, so clarified as of
+.\" 2009-05-17 by Arthur David Olson.
+.TH newctime 3 "" "Time Zone Database"
+.SH NAME
+asctime, ctime, difftime, gmtime, localtime, mktime \- convert date and time
+.SH SYNOPSIS
+.nf
+.ie \n(.g .ds - \f(CR-\fP
+.el .ds - \-
+.B #include 
+.PP
+.BR "extern char *tzname[];" " /\(** (optional) \(**/"
+.PP
+.B [[deprecated]] char *ctime(time_t const *clock);
+.PP
+.B char *ctime_r(time_t const *clock, char *buf);
+.PP
+.B double difftime(time_t time1, time_t time0);
+.PP
+.B [[deprecated]] char *asctime(struct tm const *tm);
+.PP
+.B "char *asctime_r(struct tm const *restrict tm,"
+.B "    char *restrict result);"
+.PP
+.B struct tm *localtime(time_t const *clock);
+.PP
+.B "struct tm *localtime_r(time_t const *restrict clock,"
+.B "    struct tm *restrict result);"
+.PP
+.B "struct tm *localtime_rz(timezone_t restrict zone,"
+.B "    time_t const *restrict clock,"
+.B "    struct tm *restrict result);"
+.PP
+.B struct tm *gmtime(time_t const *clock);
+.PP
+.B "struct tm *gmtime_r(time_t const *restrict clock,"
+.B "    struct tm *restrict result);"
+.PP
+.B time_t mktime(struct tm *tm);
+.PP
+.B "time_t mktime_z(timezone_t restrict zone,"
+.B "    struct tm *restrict tm);"
+.PP
+.B cc ... \*-ltz
+.fi
+.SH DESCRIPTION
+.ie '\(en'' .ds en \-
+.el .ds en \(en
+.ie '\(lq'' .ds lq \&"\"
+.el .ds lq \(lq\"
+.ie '\(rq'' .ds rq \&"\"
+.el .ds rq \(rq\"
+.de q
+\\$3\*(lq\\$1\*(rq\\$2
+..
+The
+.B ctime
+function
+converts a long integer, pointed to by
+.IR clock ,
+and returns a pointer to a
+string of the form
+.br
+.ce
+.eo
+Thu Nov 24 18:22:48 1986\n\0
+.br
+.ec
+Years requiring fewer than four characters are padded with leading zeroes.
+For years longer than four characters, the string is of the form
+.br
+.ce
+.eo
+Thu Nov 24 18:22:48     81986\n\0
+.ec
+.br
+with five spaces before the year.
+These unusual formats are designed to make it less likely that older
+software that expects exactly 26 bytes of output will mistakenly output
+misleading values for out-of-range years.
+.PP
+The
+.BI * clock
+timestamp represents the time in seconds since 1970-01-01 00:00:00
+Coordinated Universal Time (UTC).
+The POSIX standard says that timestamps must be nonnegative
+and must ignore leap seconds.
+Many implementations extend POSIX by allowing negative timestamps,
+and can therefore represent timestamps that predate the
+introduction of UTC and are some other flavor of Universal Time (UT).
+Some implementations support leap seconds, in contradiction to POSIX.
+.PP
+The
+.B ctime
+function is deprecated starting in C23.
+Callers can use
+.B localtime_r
+and
+.B strftime
+instead.
+.PP
+The
+.B localtime
+and
+.B gmtime
+functions
+return pointers to
+.q "tm"
+structures, described below.
+The
+.B localtime
+function
+corrects for the time zone and any time zone adjustments
+(such as Daylight Saving Time in the United States).
+After filling in the
+.q "tm"
+structure,
+.B localtime
+sets the
+.BR tm_isdst 'th
+element of
+.B tzname
+to a pointer to a string that's the time zone abbreviation to be used with
+.BR localtime 's
+return value.
+.PP
+The
+.B gmtime
+function
+converts to Coordinated Universal Time.
+.PP
+The
+.B asctime
+function
+converts a time value contained in a
+.q "tm"
+structure to a string,
+as shown in the above example,
+and returns a pointer to the string.
+This function is deprecated starting in C23.
+Callers can use
+.B strftime
+instead.
+.PP
+The
+.B mktime
+function
+converts the broken-down time,
+expressed as local time,
+in the structure pointed to by
+.I tm
+into a calendar time value with the same encoding as that of the values
+returned by the
+.B time
+function.
+The original values of the
+.B tm_wday
+and
+.B tm_yday
+components of the structure are ignored,
+and the original values of the other components are not restricted
+to their normal ranges.
+(A positive or zero value for
+.B tm_isdst
+causes
+.B mktime
+to presume initially that daylight saving time
+respectively,
+is or is not in effect for the specified time.
+A negative value for
+.B tm_isdst
+causes the
+.B mktime
+function to attempt to divine whether daylight saving time is in effect
+for the specified time; in this case it does not use a consistent
+rule and may give a different answer when later
+presented with the same argument.)
+On successful completion, the values of the
+.B tm_wday
+and
+.B tm_yday
+components of the structure are set appropriately,
+and the other components are set to represent the specified calendar time,
+but with their values forced to their normal ranges; the final value of
+.B tm_mday
+is not set until
+.B tm_mon
+and
+.B tm_year
+are determined.
+The
+.B mktime
+function
+returns the specified calendar time;
+If the calendar time cannot be represented,
+it returns \-1.
+.PP
+The
+.B difftime
+function
+returns the difference between two calendar times,
+.RI ( time1
+\-
+.IR time0 ),
+expressed in seconds.
+.PP
+The
+.BR ctime_r ,
+.BR localtime_r ,
+.BR gmtime_r ,
+and
+.B asctime_r
+functions
+are like their unsuffixed counterparts, except that they accept an
+additional argument specifying where to store the result if successful.
+.PP
+The
+.B localtime_rz
+and
+.B mktime_z
+functions
+are like their unsuffixed counterparts, except that they accept an
+extra initial
+.B zone
+argument specifying the timezone to be used for conversion.
+If
+.B zone
+is null, UT is used; otherwise,
+.B zone
+should be have been allocated by
+.B tzalloc
+and should not be freed until after all uses (e.g., by calls to
+.BR strftime )
+of the filled-in
+.B tm_zone
+fields.
+.PP
+Declarations of all the functions and externals, and the
+.q "tm"
+structure,
+are in the
+.B 
+header file.
+The structure (of type)
+.B struct tm
+includes the following fields:
+.RS
+.PP
+.nf
+.ta 2n +\w'long tm_gmtoff;nn'u
+	int tm_sec;	/\(** seconds (0\*(en60) \(**/
+	int tm_min;	/\(** minutes (0\*(en59) \(**/
+	int tm_hour;	/\(** hours (0\*(en23) \(**/
+	int tm_mday;	/\(** day of month (1\*(en31) \(**/
+	int tm_mon;	/\(** month of year (0\*(en11) \(**/
+	int tm_year;	/\(** year \- 1900 \(**/
+	int tm_wday;	/\(** day of week (Sunday = 0) \(**/
+	int tm_yday;	/\(** day of year (0\*(en365) \(**/
+	int tm_isdst;	/\(** is daylight saving time in effect? \(**/
+	char \(**tm_zone;	/\(** time zone abbreviation (optional) \(**/
+	long tm_gmtoff;	/\(** offset from UT in seconds (optional) \(**/
+.fi
+.RE
+.PP
+The
+.B tm_isdst
+field
+is non-zero if daylight saving time is in effect.
+.PP
+The
+.B tm_gmtoff
+field
+is the offset (in seconds) of the time represented
+from UT, with positive values indicating east
+of the Prime Meridian.
+The field's name is derived from Greenwich Mean Time, a precursor of UT.
+.PP
+In
+.B "struct tm"
+the
+.B tm_zone
+and
+.B tm_gmtoff
+fields exist, and are filled in, only if arrangements to do
+so were made when the library containing these functions was
+created.
+Similarly, the
+.B tzname
+variable is optional; also, there is no guarantee that
+.B tzname
+will
+continue to exist in this form in future releases of this code.
+.SH FILES
+.ta \w'/usr/share/zoneinfo/posixrules\0\0'u
+/usr/share/zoneinfo	timezone information directory
+.br
+/usr/share/zoneinfo/localtime	local timezone file
+.br
+/usr/share/zoneinfo/posixrules	default DST rules (obsolete,
+	and can cause bugs if present)
+.br
+/usr/share/zoneinfo/GMT	for UTC leap seconds
+.sp
+If
+.B /usr/share/zoneinfo/GMT
+is absent,
+UTC leap seconds are loaded from
+.BR /usr/share/zoneinfo/posixrules .
+.SH SEE ALSO
+getenv(3),
+newstrftime(3),
+newtzset(3),
+time(2),
+tzfile(5)
+.SH NOTES
+The return values of
+.BR asctime ,
+.BR ctime ,
+.BR gmtime ,
+and
+.B localtime
+point to static data
+overwritten by each call.
+The
+.B tzname
+variable (once set) and the
+.B tm_zone
+field of a returned
+.B "struct tm"
+both point to an array of characters that
+can be freed or overwritten by later calls to the functions
+.BR localtime ,
+.BR tzfree ,
+and
+.BR tzset ,
+if these functions affect the timezone information that specifies the
+abbreviation in question.
+The remaining functions and data are thread-safe.
+.PP
+The
+.BR asctime ,
+.BR asctime_r ,
+.BR ctime ,
+and
+.B ctime_r
+functions
+behave strangely for years before 1000 or after 9999.
+The 1989 and 1999 editions of the C Standard say
+that years from \-99 through 999 are converted without
+extra spaces, but this conflicts with longstanding
+tradition and with this implementation.
+The 2011 edition says that the behavior
+is undefined if the year is before 1000 or after 9999.
+Traditional implementations of these two functions are
+restricted to years in the range 1900 through 2099.
+To avoid this portability mess, new programs should use
+.B strftime
+instead.
diff --git a/lib-tzcode/newctime.3.txt b/lib-tzcode/newctime.3.txt
new file mode 100644
index 0000000..7a59bbe
--- /dev/null
+++ b/lib-tzcode/newctime.3.txt
@@ -0,0 +1,177 @@
+newctime(3)                Library Functions Manual                newctime(3)
+
+NAME
+       asctime, ctime, difftime, gmtime, localtime, mktime - convert date and
+       time
+
+SYNOPSIS
+       #include 
+
+       extern char *tzname[]; /* (optional) */
+
+       [[deprecated]] char *ctime(time_t const *clock);
+
+       char *ctime_r(time_t const *clock, char *buf);
+
+       double difftime(time_t time1, time_t time0);
+
+       [[deprecated]] char *asctime(struct tm const *tm);
+
+       char *asctime_r(struct tm const *restrict tm,
+           char *restrict result);
+
+       struct tm *localtime(time_t const *clock);
+
+       struct tm *localtime_r(time_t const *restrict clock,
+           struct tm *restrict result);
+
+       struct tm *localtime_rz(timezone_t restrict zone,
+           time_t const *restrict clock,
+           struct tm *restrict result);
+
+       struct tm *gmtime(time_t const *clock);
+
+       struct tm *gmtime_r(time_t const *restrict clock,
+           struct tm *restrict result);
+
+       time_t mktime(struct tm *tm);
+
+       time_t mktime_z(timezone_t restrict zone,
+           struct tm *restrict tm);
+
+       cc ... -ltz
+
+DESCRIPTION
+       The ctime function converts a long integer, pointed to by clock, and
+       returns a pointer to a string of the form
+                            Thu Nov 24 18:22:48 1986\n\0
+       Years requiring fewer than four characters are padded with leading
+       zeroes.  For years longer than four characters, the string is of the
+       form
+                          Thu Nov 24 18:22:48     81986\n\0
+       with five spaces before the year.  These unusual formats are designed
+       to make it less likely that older software that expects exactly 26
+       bytes of output will mistakenly output misleading values for out-of-
+       range years.
+
+       The *clock timestamp represents the time in seconds since 1970-01-01
+       00:00:00 Coordinated Universal Time (UTC).  The POSIX standard says
+       that timestamps must be nonnegative and must ignore leap seconds.  Many
+       implementations extend POSIX by allowing negative timestamps, and can
+       therefore represent timestamps that predate the introduction of UTC and
+       are some other flavor of Universal Time (UT).  Some implementations
+       support leap seconds, in contradiction to POSIX.
+
+       The ctime function is deprecated starting in C23.  Callers can use
+       localtime_r and strftime instead.
+
+       The localtime and gmtime functions return pointers to "tm" structures,
+       described below.  The localtime function corrects for the time zone and
+       any time zone adjustments (such as Daylight Saving Time in the United
+       States).  After filling in the "tm" structure, localtime sets the
+       tm_isdst'th element of tzname to a pointer to a string that's the time
+       zone abbreviation to be used with localtime's return value.
+
+       The gmtime function converts to Coordinated Universal Time.
+
+       The asctime function converts a time value contained in a "tm"
+       structure to a string, as shown in the above example, and returns a
+       pointer to the string.  This function is deprecated starting in C23.
+       Callers can use strftime instead.
+
+       The mktime function converts the broken-down time, expressed as local
+       time, in the structure pointed to by tm into a calendar time value with
+       the same encoding as that of the values returned by the time function.
+       The original values of the tm_wday and tm_yday components of the
+       structure are ignored, and the original values of the other components
+       are not restricted to their normal ranges.  (A positive or zero value
+       for tm_isdst causes mktime to presume initially that daylight saving
+       time respectively, is or is not in effect for the specified time.  A
+       negative value for tm_isdst causes the mktime function to attempt to
+       divine whether daylight saving time is in effect for the specified
+       time; in this case it does not use a consistent rule and may give a
+       different answer when later presented with the same argument.)  On
+       successful completion, the values of the tm_wday and tm_yday components
+       of the structure are set appropriately, and the other components are
+       set to represent the specified calendar time, but with their values
+       forced to their normal ranges; the final value of tm_mday is not set
+       until tm_mon and tm_year are determined.  The mktime function returns
+       the specified calendar time; If the calendar time cannot be
+       represented, it returns -1.
+
+       The difftime function returns the difference between two calendar
+       times, (time1 - time0), expressed in seconds.
+
+       The ctime_r, localtime_r, gmtime_r, and asctime_r functions are like
+       their unsuffixed counterparts, except that they accept an additional
+       argument specifying where to store the result if successful.
+
+       The localtime_rz and mktime_z functions are like their unsuffixed
+       counterparts, except that they accept an extra initial zone argument
+       specifying the timezone to be used for conversion.  If zone is null, UT
+       is used; otherwise, zone should be have been allocated by tzalloc and
+       should not be freed until after all uses (e.g., by calls to strftime)
+       of the filled-in tm_zone fields.
+
+       Declarations of all the functions and externals, and the "tm"
+       structure, are in the  header file.  The structure (of type)
+       struct tm includes the following fields:
+
+                int tm_sec;      /* seconds (0-60) */
+                int tm_min;      /* minutes (0-59) */
+                int tm_hour;     /* hours (0-23) */
+                int tm_mday;     /* day of month (1-31) */
+                int tm_mon;      /* month of year (0-11) */
+                int tm_year;     /* year - 1900 */
+                int tm_wday;     /* day of week (Sunday = 0) */
+                int tm_yday;     /* day of year (0-365) */
+                int tm_isdst;    /* is daylight saving time in effect? */
+                char *tm_zone;   /* time zone abbreviation (optional) */
+                long tm_gmtoff;  /* offset from UT in seconds (optional) */
+
+       The tm_isdst field is non-zero if daylight saving time is in effect.
+
+       The tm_gmtoff field is the offset (in seconds) of the time represented
+       from UT, with positive values indicating east of the Prime Meridian.
+       The field's name is derived from Greenwich Mean Time, a precursor of
+       UT.
+
+       In struct tm the tm_zone and tm_gmtoff fields exist, and are filled in,
+       only if arrangements to do so were made when the library containing
+       these functions was created.  Similarly, the tzname variable is
+       optional; also, there is no guarantee that tzname will continue to
+       exist in this form in future releases of this code.
+
+FILES
+       /usr/share/zoneinfo             timezone information directory
+       /usr/share/zoneinfo/localtime   local timezone file
+       /usr/share/zoneinfo/posixrules  default DST rules (obsolete,
+                                       and can cause bugs if present)
+       /usr/share/zoneinfo/GMT         for UTC leap seconds
+
+       If /usr/share/zoneinfo/GMT is absent, UTC leap seconds are loaded from
+       /usr/share/zoneinfo/posixrules.
+
+SEE ALSO
+       getenv(3), newstrftime(3), newtzset(3), time(2), tzfile(5)
+
+NOTES
+       The return values of asctime, ctime, gmtime, and localtime point to
+       static data overwritten by each call.  The tzname variable (once set)
+       and the tm_zone field of a returned struct tm both point to an array of
+       characters that can be freed or overwritten by later calls to the
+       functions localtime, tzfree, and tzset, if these functions affect the
+       timezone information that specifies the abbreviation in question.  The
+       remaining functions and data are thread-safe.
+
+       The asctime, asctime_r, ctime, and ctime_r functions behave strangely
+       for years before 1000 or after 9999.  The 1989 and 1999 editions of the
+       C Standard say that years from -99 through 999 are converted without
+       extra spaces, but this conflicts with longstanding tradition and with
+       this implementation.  The 2011 edition says that the behavior is
+       undefined if the year is before 1000 or after 9999.  Traditional
+       implementations of these two functions are restricted to years in the
+       range 1900 through 2099.  To avoid this portability mess, new programs
+       should use strftime instead.
+
+Time Zone Database                                                 newctime(3)
diff --git a/lib-tzcode/newstrftime.3 b/lib-tzcode/newstrftime.3
new file mode 100644
index 0000000..432c3e8
--- /dev/null
+++ b/lib-tzcode/newstrftime.3
@@ -0,0 +1,290 @@
+.\" strftime man page
+.\"
+.\" Based on the UCB file whose corrected copyright information appears below.
+.\" Copyright 1989, 1991 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the American National Standards Committee X3, on Information
+.\" Processing Systems.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     from: @(#)strftime.3	5.12 (Berkeley) 6/29/91
+.\"	$Id: strftime.3,v 1.4 1993/12/15 20:33:00 jtc Exp $
+.\"
+.TH newstrftime 3 "" "Time Zone Database"
+.SH NAME
+strftime \- format date and time
+.SH SYNOPSIS
+.nf
+.ie \n(.g .ds - \f(CR-\fP
+.el .ds - \-
+.B #include 
+.PP
+.B "size_t strftime(char *restrict buf, size_t maxsize,"
+.B "    char const *restrict format, struct tm const *restrict timeptr);"
+.PP
+.B cc ... \-ltz
+.fi
+.SH DESCRIPTION
+.ie '\(lq'' .ds lq \&"\"
+.el .ds lq \(lq\"
+.ie '\(rq'' .ds rq \&"\"
+.el .ds rq \(rq\"
+.de c
+.ie \n(.g \f(CR\\$1\fP\\$2
+.el \\$1\\$2
+..
+.de q
+\\$3\*(lq\\$1\*(rq\\$2
+..
+The
+.B strftime
+function formats the information from
+.I timeptr
+into the array pointed to by
+.I buf
+according to the string pointed to by
+.IR format .
+.PP
+The
+.I format
+string consists of zero or more conversion specifications and
+ordinary characters.
+All ordinary characters are copied directly into the array.
+A conversion specification consists of a percent sign
+.Ql %
+and one other character.
+.PP
+No more than
+.I maxsize
+bytes are placed into the array.
+.PP
+Each conversion specification is replaced by the characters as
+follows which are then copied into the array.
+.TP
+%A
+is replaced by the locale's full weekday name.
+.TP
+%a
+is replaced by the locale's abbreviated weekday name.
+.TP
+%B
+is replaced by the locale's full month name.
+.TP
+%b or %h
+is replaced by the locale's abbreviated month name.
+.TP
+%C
+is replaced by the century (a year divided by 100 and truncated to an integer)
+as a decimal number [00,99].
+.TP
+%c
+is replaced by the locale's appropriate date and time representation.
+.TP
+%D
+is equivalent to
+.c %m/%d/%y .
+.TP
+%d
+is replaced by the day of the month as a decimal number [01,31].
+.TP
+%e
+is replaced by the day of month as a decimal number [1,31];
+single digits are preceded by a blank.
+.TP
+%F
+is equivalent to
+.c %Y-%m-%d
+(the ISO 8601 date format).
+.TP
+%G
+is replaced by the ISO 8601 year with century as a decimal number.
+See also the
+.c %V
+conversion specification.
+.TP
+%g
+is replaced by the ISO 8601 year without century as a decimal number [00,99].
+This is the year that includes the greater part of the week.
+(Monday as the first day of a week).
+See also the
+.c %V
+conversion specification.
+.TP
+%H
+is replaced by the hour (24-hour clock) as a decimal number [00,23].
+.TP
+%I
+is replaced by the hour (12-hour clock) as a decimal number [01,12].
+.TP
+%j
+is replaced by the day of the year as a decimal number [001,366].
+.TP
+%k
+is replaced by the hour (24-hour clock) as a decimal number [0,23];
+single digits are preceded by a blank.
+.TP
+%l
+is replaced by the hour (12-hour clock) as a decimal number [1,12];
+single digits are preceded by a blank.
+.TP
+%M
+is replaced by the minute as a decimal number [00,59].
+.TP
+%m
+is replaced by the month as a decimal number [01,12].
+.TP
+%n
+is replaced by a newline.
+.TP
+%p
+is replaced by the locale's equivalent of either
+.q AM
+or
+.q PM .
+.TP
+%R
+is replaced by the time in the format
+.c %H:%M .
+.TP
+%r
+is replaced by the locale's representation of 12-hour clock time
+using AM/PM notation.
+.TP
+%S
+is replaced by the second as a decimal number [00,60].
+The range of
+seconds is [00,60] instead of [00,59] to allow for the periodic occurrence
+of leap seconds.
+.TP
+%s
+is replaced by the number of seconds since the Epoch (see
+.BR ctime (3)).
+.TP
+%T
+is replaced by the time in the format
+.c %H:%M:%S .
+.TP
+%t
+is replaced by a tab.
+.TP
+%U
+is replaced by the week number of the year (Sunday as the first day of
+the week) as a decimal number [00,53].
+.TP
+%u
+is replaced by the weekday (Monday as the first day of the week)
+as a decimal number [1,7].
+.TP
+%V
+is replaced by the week number of the year (Monday as the first day of
+the week) as a decimal number [01,53].  If the week containing January
+1 has four or more days in the new year, then it is week 1; otherwise
+it is week 53 of the previous year, and the next week is week 1.
+The year is given by the
+.c %G
+conversion specification.
+.TP
+%W
+is replaced by the week number of the year (Monday as the first day of
+the week) as a decimal number [00,53].
+.TP
+%w
+is replaced by the weekday (Sunday as the first day of the week)
+as a decimal number [0,6].
+.TP
+%X
+is replaced by the locale's appropriate time representation.
+.TP
+%x
+is replaced by the locale's appropriate date representation.
+.TP
+%Y
+is replaced by the year with century as a decimal number.
+.TP
+%y
+is replaced by the year without century as a decimal number [00,99].
+.TP
+%Z
+is replaced by the time zone abbreviation,
+or by the empty string if this is not determinable.
+.TP
+%z
+is replaced by the offset from the Prime Meridian
+in the format +HHMM or \*-HHMM (ISO 8601) as appropriate,
+with positive values representing locations east of Greenwich,
+or by the empty string if this is not determinable.
+The numeric time zone abbreviation \*-0000 is used when the time is
+Universal Time
+but local time is indeterminate; by convention this is used for
+locations while uninhabited, and corresponds to a zero offset when the
+time zone abbreviation begins with
+.q "\*-" .
+.TP
+%%
+is replaced by a single %.
+.TP
+%+
+is replaced by the locale's date and time in
+.BR date (1)
+format.
+.SH "RETURN VALUE"
+If the conversion is successful,
+.B strftime
+returns the number of bytes placed into the array, not counting the
+terminating NUL;
+.B errno
+is unchanged if the returned value is zero.
+Otherwise,
+.B errno
+is set to indicate the error, zero is returned,
+and the array contents are unspecified.
+.SH ERRORS
+This function fails if:
+.TP
+[ERANGE]
+The total number of resulting bytes, including the terminating
+NUL character, is more than
+.IR maxsize .
+.PP
+This function may fail if:
+.TP
+[EOVERFLOW]
+The format includes an
+.c %s
+conversion and the number of seconds since the Epoch cannot be represented
+in a
+.c time_t .
+.SH SEE ALSO
+date(1),
+getenv(3),
+newctime(3),
+newtzset(3),
+time(2),
+tzfile(5)
+.SH BUGS
+There is no conversion specification for the phase of the moon.
diff --git a/lib-tzcode/newstrftime.3.txt b/lib-tzcode/newstrftime.3.txt
new file mode 100644
index 0000000..9227a8e
--- /dev/null
+++ b/lib-tzcode/newstrftime.3.txt
@@ -0,0 +1,169 @@
+newstrftime(3)             Library Functions Manual             newstrftime(3)
+
+NAME
+       strftime - format date and time
+
+SYNOPSIS
+       #include 
+
+       size_t strftime(char *restrict buf, size_t maxsize,
+           char const *restrict format, struct tm const *restrict timeptr);
+
+       cc ... -ltz
+
+DESCRIPTION
+       The strftime function formats the information from timeptr into the
+       array pointed to by buf according to the string pointed to by format.
+
+       The format string consists of zero or more conversion specifications
+       and ordinary characters.  All ordinary characters are copied directly
+       into the array.  A conversion specification consists of a percent sign
+       and one other character.
+
+       No more than maxsize bytes are placed into the array.
+
+       Each conversion specification is replaced by the characters as follows
+       which are then copied into the array.
+
+       %A     is replaced by the locale's full weekday name.
+
+       %a     is replaced by the locale's abbreviated weekday name.
+
+       %B     is replaced by the locale's full month name.
+
+       %b or %h
+              is replaced by the locale's abbreviated month name.
+
+       %C     is replaced by the century (a year divided by 100 and truncated
+              to an integer) as a decimal number [00,99].
+
+       %c     is replaced by the locale's appropriate date and time
+              representation.
+
+       %D     is equivalent to %m/%d/%y.
+
+       %d     is replaced by the day of the month as a decimal number [01,31].
+
+       %e     is replaced by the day of month as a decimal number [1,31];
+              single digits are preceded by a blank.
+
+       %F     is equivalent to %Y-%m-%d (the ISO 8601 date format).
+
+       %G     is replaced by the ISO 8601 year with century as a decimal
+              number.  See also the %V conversion specification.
+
+       %g     is replaced by the ISO 8601 year without century as a decimal
+              number [00,99].  This is the year that includes the greater part
+              of the week.  (Monday as the first day of a week).  See also the
+              %V conversion specification.
+
+       %H     is replaced by the hour (24-hour clock) as a decimal number
+              [00,23].
+
+       %I     is replaced by the hour (12-hour clock) as a decimal number
+              [01,12].
+
+       %j     is replaced by the day of the year as a decimal number
+              [001,366].
+
+       %k     is replaced by the hour (24-hour clock) as a decimal number
+              [0,23]; single digits are preceded by a blank.
+
+       %l     is replaced by the hour (12-hour clock) as a decimal number
+              [1,12]; single digits are preceded by a blank.
+
+       %M     is replaced by the minute as a decimal number [00,59].
+
+       %m     is replaced by the month as a decimal number [01,12].
+
+       %n     is replaced by a newline.
+
+       %p     is replaced by the locale's equivalent of either "AM" or "PM".
+
+       %R     is replaced by the time in the format %H:%M.
+
+       %r     is replaced by the locale's representation of 12-hour clock time
+              using AM/PM notation.
+
+       %S     is replaced by the second as a decimal number [00,60].  The
+              range of seconds is [00,60] instead of [00,59] to allow for the
+              periodic occurrence of leap seconds.
+
+       %s     is replaced by the number of seconds since the Epoch (see
+              ctime(3)).
+
+       %T     is replaced by the time in the format %H:%M:%S.
+
+       %t     is replaced by a tab.
+
+       %U     is replaced by the week number of the year (Sunday as the first
+              day of the week) as a decimal number [00,53].
+
+       %u     is replaced by the weekday (Monday as the first day of the week)
+              as a decimal number [1,7].
+
+       %V     is replaced by the week number of the year (Monday as the first
+              day of the week) as a decimal number [01,53].  If the week
+              containing January 1 has four or more days in the new year, then
+              it is week 1; otherwise it is week 53 of the previous year, and
+              the next week is week 1.  The year is given by the %G conversion
+              specification.
+
+       %W     is replaced by the week number of the year (Monday as the first
+              day of the week) as a decimal number [00,53].
+
+       %w     is replaced by the weekday (Sunday as the first day of the week)
+              as a decimal number [0,6].
+
+       %X     is replaced by the locale's appropriate time representation.
+
+       %x     is replaced by the locale's appropriate date representation.
+
+       %Y     is replaced by the year with century as a decimal number.
+
+       %y     is replaced by the year without century as a decimal number
+              [00,99].
+
+       %Z     is replaced by the time zone abbreviation, or by the empty
+              string if this is not determinable.
+
+       %z     is replaced by the offset from the Prime Meridian in the format
+              +HHMM or -HHMM (ISO 8601) as appropriate, with positive values
+              representing locations east of Greenwich, or by the empty string
+              if this is not determinable.  The numeric time zone abbreviation
+              -0000 is used when the time is Universal Time but local time is
+              indeterminate; by convention this is used for locations while
+              uninhabited, and corresponds to a zero offset when the time zone
+              abbreviation begins with "-".
+
+       %%     is replaced by a single %.
+
+       %+     is replaced by the locale's date and time in date(1) format.
+
+RETURN VALUE
+       If the conversion is successful, strftime returns the number of bytes
+       placed into the array, not counting the terminating NUL; errno is
+       unchanged if the returned value is zero.  Otherwise, errno is set to
+       indicate the error, zero is returned, and the array contents are
+       unspecified.
+
+ERRORS
+       This function fails if:
+
+       [ERANGE]
+              The total number of resulting bytes, including the terminating
+              NUL character, is more than maxsize.
+
+       This function may fail if:
+
+       [EOVERFLOW]
+              The format includes an %s conversion and the number of seconds
+              since the Epoch cannot be represented in a time_t.
+
+SEE ALSO
+       date(1), getenv(3), newctime(3), newtzset(3), time(2), tzfile(5)
+
+BUGS
+       There is no conversion specification for the phase of the moon.
+
+Time Zone Database                                              newstrftime(3)
diff --git a/lib-tzcode/newtzset.3 b/lib-tzcode/newtzset.3
new file mode 100644
index 0000000..78b6b6c
--- /dev/null
+++ b/lib-tzcode/newtzset.3
@@ -0,0 +1,351 @@
+.\" This file is in the public domain, so clarified as of
+.\" 2009-05-17 by Arthur David Olson.
+.TH newtzset 3 "" "Time Zone Database"
+.SH NAME
+tzset \- initialize time conversion information
+.SH SYNOPSIS
+.nf
+.ie \n(.g .ds - \f(CR-\fP
+.el .ds - \-
+.B #include 
+.PP
+.B timezone_t tzalloc(char const *TZ);
+.PP
+.B void tzfree(timezone_t tz);
+.PP
+.B void tzset(void);
+.PP
+.B cc ... \*-ltz
+.fi
+.SH DESCRIPTION
+.ie '\(en'' .ds en \-
+.el .ds en \(en
+.ie '\(lq'' .ds lq \&"\"
+.el .ds lq \(lq\"
+.ie '\(rq'' .ds rq \&"\"
+.el .ds rq \(rq\"
+.de q
+\\$3\*(lq\\$1\*(rq\\$2
+..
+The
+.B tzalloc
+function
+allocates and returns a timezone object described by
+.BR TZ .
+If
+.B TZ
+is not a valid timezone description, or if the object cannot be allocated,
+.B tzalloc
+returns a null pointer and sets
+.BR errno .
+.PP
+The
+.B tzfree
+function
+frees a timezone object
+.BR tz ,
+which should have been successfully allocated by
+.BR tzalloc .
+This invalidates any
+.B tm_zone
+pointers that
+.B tz
+was used to set.
+.PP
+The
+.B tzset
+function
+acts like
+.BR tzalloc(getenv("TZ")) ,
+except it saves any resulting timezone object into internal
+storage that is accessed by
+.BR localtime ,
+.BR localtime_r ,
+and
+.BR mktime .
+The anonymous shared timezone object is freed by the next call to
+.BR tzset .
+If the implied call to
+.B tzalloc
+fails,
+.B tzset
+falls back on Universal Time (UT).
+.PP
+If
+.B TZ
+is null, the best available approximation to local (wall
+clock) time, as specified by the
+.BR tzfile (5)-format
+file
+.B localtime
+in the system time conversion information directory, is used.
+If
+.B TZ
+is the empty string,
+UT is used, with the abbreviation "UTC"
+and without leap second correction; please see
+.BR newctime (3)
+for more about UT, UTC, and leap seconds.  If
+.B TZ
+is nonnull and nonempty:
+.IP
+if the value begins with a colon, it is used as a pathname of a file
+from which to read the time conversion information;
+.IP
+if the value does not begin with a colon, it is first used as the
+pathname of a file from which to read the time conversion information,
+and, if that file cannot be read, is used directly as a specification of
+the time conversion information.
+.PP
+When
+.B TZ
+is used as a pathname, if it begins with a slash,
+it is used as an absolute pathname; otherwise,
+it is used as a pathname relative to a system time conversion information
+directory.
+The file must be in the format specified in
+.BR tzfile (5).
+.PP
+When
+.B TZ
+is used directly as a specification of the time conversion information,
+it must have the following syntax (spaces inserted for clarity):
+.IP
+\fIstd\|offset\fR[\fIdst\fR[\fIoffset\fR][\fB,\fIrule\fR]]
+.PP
+Where:
+.RS
+.TP 15
+.IR std " and " dst
+Three or more bytes that are the designation for the standard
+.RI ( std )
+or the alternative
+.RI ( dst ,
+such as daylight saving time)
+time zone.  Only
+.I std
+is required; if
+.I dst
+is missing, then daylight saving time does not apply in this locale.
+Upper- and lowercase letters are explicitly allowed.  Any characters
+except a leading colon
+.RB ( : ),
+digits, comma
+.RB ( , ),
+ASCII minus
+.RB ( \*- ),
+ASCII plus
+.RB ( + ),
+and NUL bytes are allowed.
+Alternatively, a designation can be surrounded by angle brackets
+.B <
+and
+.BR > ;
+in this case, the designation can contain any characters other than
+.B >
+and NUL.
+.TP
+.I offset
+Indicates the value one must add to the local time to arrive at
+Coordinated Universal Time.  The
+.I offset
+has the form:
+.RS
+.IP
+\fIhh\fR[\fB:\fImm\fR[\fB:\fIss\fR]]
+.RE
+.IP
+The minutes
+.RI ( mm )
+and seconds
+.RI ( ss )
+are optional.  The hour
+.RI ( hh )
+is required and may be a single digit.  The
+.I offset
+following
+.I std
+is required.  If no
+.I offset
+follows
+.IR dst ,
+daylight saving time is assumed to be one hour ahead of standard time.  One or
+more digits may be used; the value is always interpreted as a decimal
+number.  The hour must be between zero and 24, and the minutes (and
+seconds) \*(en if present \*(en between zero and 59.  If preceded by a
+.q "\*-" ,
+the time zone shall be east of the Prime Meridian; otherwise it shall be
+west (which may be indicated by an optional preceding
+.q "+" .
+.TP
+.I rule
+Indicates when to change to and back from daylight saving time.  The
+.I rule
+has the form:
+.RS
+.IP
+\fIdate\fB/\fItime\fB,\fIdate\fB/\fItime\fR
+.RE
+.IP
+where the first
+.I date
+describes when the change from standard to daylight saving time occurs and the
+second
+.I date
+describes when the change back happens.  Each
+.I time
+field describes when, in current local time, the change to the other
+time is made.
+As an extension to POSIX, daylight saving is assumed to be in effect
+all year if it begins January 1 at 00:00 and ends December 31 at
+24:00 plus the difference between daylight saving and standard time,
+leaving no room for standard time in the calendar.
+.IP
+The format of
+.I date
+is one of the following:
+.RS
+.TP 10
+.BI J n
+The Julian day
+.I n
+.RI "(1\ \(<=" "\ n\ " "\(<=\ 365).
+Leap days are not counted; that is, in all years \*(en including leap
+years \*(en February 28 is day 59 and March 1 is day 60.  It is
+impossible to explicitly refer to the occasional February 29.
+.TP
+.I n
+The zero-based Julian day
+.RI "(0\ \(<=" "\ n\ " "\(<=\ 365).
+Leap days are counted, and it is possible to refer to February 29.
+.TP
+.BI M m . n . d
+The
+.IR d' th
+day
+.RI "(0\ \(<=" "\ d\ " "\(<=\ 6)
+of week
+.I n
+of month
+.I m
+of the year
+.RI "(1\ \(<=" "\ n\ " "\(<=\ 5,
+.RI "1\ \(<=" "\ m\ " "\(<=\ 12,
+where week 5 means
+.q "the last \fId\fP day in month \fIm\fP"
+which may occur in either the fourth or the fifth week).  Week 1 is the
+first week in which the
+.IR d' th
+day occurs.  Day zero is Sunday.
+.RE
+.IP "" 15
+The
+.I time
+has the same format as
+.I offset
+except that POSIX does not allow a leading sign (\c
+.q "\*-"
+or
+.q "+" ).
+As an extension to POSIX, the hours part of
+.I time
+can range from \-167 through 167; this allows for unusual rules such
+as
+.q "the Saturday before the first Sunday of March" .
+The default, if
+.I time
+is not given, is
+.BR 02:00:00 .
+.RE
+.LP
+Here are some examples of
+.B TZ
+values that directly specify the timezone; they use some of the
+extensions to POSIX.
+.TP
+.B EST5
+stands for US Eastern Standard
+Time (EST), 5 hours behind UT, without daylight saving.
+.TP
+.B <+12>\*-12<+13>,M11.1.0,M1.2.1/147
+stands for Fiji time, 12 hours ahead
+of UT, springing forward on November's first Sunday at 02:00, and
+falling back on January's second Monday at 147:00 (i.e., 03:00 on the
+first Sunday on or after January 14).  The abbreviations for standard
+and daylight saving time are
+.q "+12"
+and
+.q "+13".
+.TP
+.B IST\*-2IDT,M3.4.4/26,M10.5.0
+stands for Israel Standard Time (IST) and Israel Daylight Time (IDT),
+2 hours ahead of UT, springing forward on March's fourth
+Thursday at 26:00 (i.e., 02:00 on the first Friday on or after March
+23), and falling back on October's last Sunday at 02:00.
+.TP
+.B <\*-04>4<\*-03>,J1/0,J365/25
+stands for permanent daylight saving time, 3 hours behind UT with
+abbreviation
+.q "\*-03".
+There is a dummy fall-back transition on December 31 at 25:00 daylight
+saving time (i.e., 24:00 standard time, equivalent to January 1 at
+00:00 standard time), and a simultaneous spring-forward transition on
+January 1 at 00:00 standard time, so daylight saving time is in effect
+all year and the initial
+.B <\*-04>
+is a placeholder.
+.TP
+.B <\*-03>3<\*-02>,M3.5.0/\*-2,M10.5.0/\*-1
+stands for time in western Greenland, 3 hours behind UT, where clocks
+follow the EU rules of
+springing forward on March's last Sunday at 01:00 UT (\-02:00 local
+time, i.e., 22:00 the previous day) and falling back on October's last
+Sunday at 01:00 UT (\-01:00 local time, i.e., 23:00 the previous day).
+The abbreviations for standard and daylight saving time are
+.q "\*-03"
+and
+.q "\*-02".
+.PP
+If no
+.I rule
+is present in
+.BR TZ ,
+the rules specified
+by the
+.BR tzfile (5)-format
+file
+.B posixrules
+in the system time conversion information directory are used, with the
+standard and daylight saving time offsets from UT replaced by those specified by
+the
+.I offset
+values in
+.BR TZ .
+.PP
+For compatibility with System V Release 3.1, a semicolon
+.RB ( ; )
+may be used to separate the
+.I rule
+from the rest of the specification.
+.SH FILES
+.ta \w'/usr/share/zoneinfo/posixrules\0\0'u
+/usr/share/zoneinfo	timezone information directory
+.br
+/usr/share/zoneinfo/localtime	local timezone file
+.br
+/usr/share/zoneinfo/posixrules	default DST rules (obsolete,
+	and can cause bugs if present)
+.br
+/usr/share/zoneinfo/GMT	for UTC leap seconds
+.sp
+If
+.B /usr/share/zoneinfo/GMT
+is absent,
+UTC leap seconds are loaded from
+.BR /usr/share/zoneinfo/posixrules .
+.SH SEE ALSO
+getenv(3),
+newctime(3),
+newstrftime(3),
+time(2),
+tzfile(5)
diff --git a/lib-tzcode/newtzset.3.txt b/lib-tzcode/newtzset.3.txt
new file mode 100644
index 0000000..ee5c125
--- /dev/null
+++ b/lib-tzcode/newtzset.3.txt
@@ -0,0 +1,200 @@
+newtzset(3)                Library Functions Manual                newtzset(3)
+
+NAME
+       tzset - initialize time conversion information
+
+SYNOPSIS
+       #include 
+
+       timezone_t tzalloc(char const *TZ);
+
+       void tzfree(timezone_t tz);
+
+       void tzset(void);
+
+       cc ... -ltz
+
+DESCRIPTION
+       The tzalloc function allocates and returns a timezone object described
+       by TZ.  If TZ is not a valid timezone description, or if the object
+       cannot be allocated, tzalloc returns a null pointer and sets errno.
+
+       The tzfree function frees a timezone object tz, which should have been
+       successfully allocated by tzalloc.  This invalidates any tm_zone
+       pointers that tz was used to set.
+
+       The tzset function acts like tzalloc(getenv("TZ")), except it saves any
+       resulting timezone object into internal storage that is accessed by
+       localtime, localtime_r, and mktime.  The anonymous shared timezone
+       object is freed by the next call to tzset.  If the implied call to
+       tzalloc fails, tzset falls back on Universal Time (UT).
+
+       If TZ is null, the best available approximation to local (wall clock)
+       time, as specified by the tzfile(5)-format file localtime in the system
+       time conversion information directory, is used.  If TZ is the empty
+       string, UT is used, with the abbreviation "UTC" and without leap second
+       correction; please see newctime(3) for more about UT, UTC, and leap
+       seconds.  If TZ is nonnull and nonempty:
+
+              if the value begins with a colon, it is used as a pathname of a
+              file from which to read the time conversion information;
+
+              if the value does not begin with a colon, it is first used as
+              the pathname of a file from which to read the time conversion
+              information, and, if that file cannot be read, is used directly
+              as a specification of the time conversion information.
+
+       When TZ is used as a pathname, if it begins with a slash, it is used as
+       an absolute pathname; otherwise, it is used as a pathname relative to a
+       system time conversion information directory.  The file must be in the
+       format specified in tzfile(5).
+
+       When TZ is used directly as a specification of the time conversion
+       information, it must have the following syntax (spaces inserted for
+       clarity):
+
+              stdoffset[dst[offset][,rule]]
+
+       Where:
+
+              std and dst    Three or more bytes that are the designation for
+                             the standard (std) or the alternative (dst, such
+                             as daylight saving time) time zone.  Only std is
+                             required; if dst is missing, then daylight saving
+                             time does not apply in this locale.  Upper- and
+                             lowercase letters are explicitly allowed.  Any
+                             characters except a leading colon (:), digits,
+                             comma (,), ASCII minus (-), ASCII plus (+), and
+                             NUL bytes are allowed.  Alternatively, a
+                             designation can be surrounded by angle brackets <
+                             and >; in this case, the designation can contain
+                             any characters other than > and NUL.
+
+              offset         Indicates the value one must add to the local
+                             time to arrive at Coordinated Universal Time.
+                             The offset has the form:
+
+                                    hh[:mm[:ss]]
+
+                             The minutes (mm) and seconds (ss) are optional.
+                             The hour (hh) is required and may be a single
+                             digit.  The offset following std is required.  If
+                             no offset follows dst, daylight saving time is
+                             assumed to be one hour ahead of standard time.
+                             One or more digits may be used; the value is
+                             always interpreted as a decimal number.  The hour
+                             must be between zero and 24, and the minutes (and
+                             seconds) - if present - between zero and 59.  If
+                             preceded by a "-", the time zone shall be east of
+                             the Prime Meridian; otherwise it shall be west
+                             (which may be indicated by an optional preceding
+                             "+".
+
+              rule           Indicates when to change to and back from
+                             daylight saving time.  The rule has the form:
+
+                                    date/time,date/time
+
+                             where the first date describes when the change
+                             from standard to daylight saving time occurs and
+                             the second date describes when the change back
+                             happens.  Each time field describes when, in
+                             current local time, the change to the other time
+                             is made.  As an extension to POSIX, daylight
+                             saving is assumed to be in effect all year if it
+                             begins January 1 at 00:00 and ends December 31 at
+                             24:00 plus the difference between daylight saving
+                             and standard time, leaving no room for standard
+                             time in the calendar.
+
+                             The format of date is one of the following:
+
+                             Jn        The Julian day n (1 <= n <= 365).  Leap
+                                       days are not counted; that is, in all
+                                       years - including leap years - February
+                                       28 is day 59 and March 1 is day 60.  It
+                                       is impossible to explicitly refer to
+                                       the occasional February 29.
+
+                             n         The zero-based Julian day
+                                       (0 <= n <= 365).  Leap days are
+                                       counted, and it is possible to refer to
+                                       February 29.
+
+                             Mm.n.d    The d'th day (0 <= d <= 6) of week n of
+                                       month m of the year (1 <= n <= 5,
+                                       1 <= m <= 12, where week 5 means "the
+                                       last d day in month m" which may occur
+                                       in either the fourth or the fifth
+                                       week).  Week 1 is the first week in
+                                       which the d'th day occurs.  Day zero is
+                                       Sunday.
+
+                             The time has the same format as offset except
+                             that POSIX does not allow a leading sign ("-" or
+                             "+").  As an extension to POSIX, the hours part
+                             of time can range from -167 through 167; this
+                             allows for unusual rules such as "the Saturday
+                             before the first Sunday of March".  The default,
+                             if time is not given, is 02:00:00.
+
+       Here are some examples of TZ values that directly specify the timezone;
+       they use some of the extensions to POSIX.
+
+       EST5   stands for US Eastern Standard Time (EST), 5 hours behind UT,
+              without daylight saving.
+
+       <+12>-12<+13>,M11.1.0,M1.2.1/147
+              stands for Fiji time, 12 hours ahead of UT, springing forward on
+              November's first Sunday at 02:00, and falling back on January's
+              second Monday at 147:00 (i.e., 03:00 on the first Sunday on or
+              after January 14).  The abbreviations for standard and daylight
+              saving time are "+12" and "+13".
+
+       IST-2IDT,M3.4.4/26,M10.5.0
+              stands for Israel Standard Time (IST) and Israel Daylight Time
+              (IDT), 2 hours ahead of UT, springing forward on March's fourth
+              Thursday at 26:00 (i.e., 02:00 on the first Friday on or after
+              March 23), and falling back on October's last Sunday at 02:00.
+
+       <-04>4<-03>,J1/0,J365/25
+              stands for permanent daylight saving time, 3 hours behind UT
+              with abbreviation "-03".  There is a dummy fall-back transition
+              on December 31 at 25:00 daylight saving time (i.e., 24:00
+              standard time, equivalent to January 1 at 00:00 standard time),
+              and a simultaneous spring-forward transition on January 1 at
+              00:00 standard time, so daylight saving time is in effect all
+              year and the initial <-04> is a placeholder.
+
+       <-03>3<-02>,M3.5.0/-2,M10.5.0/-1
+              stands for time in western Greenland, 3 hours behind UT, where
+              clocks follow the EU rules of springing forward on March's last
+              Sunday at 01:00 UT (-02:00 local time, i.e., 22:00 the previous
+              day) and falling back on October's last Sunday at 01:00 UT
+              (-01:00 local time, i.e., 23:00 the previous day).  The
+              abbreviations for standard and daylight saving time are "-03"
+              and "-02".
+
+       If no rule is present in TZ, the rules specified by the
+       tzfile(5)-format file posixrules in the system time conversion
+       information directory are used, with the standard and daylight saving
+       time offsets from UT replaced by those specified by the offset values
+       in TZ.
+
+       For compatibility with System V Release 3.1, a semicolon (;) may be
+       used to separate the rule from the rest of the specification.
+
+FILES
+       /usr/share/zoneinfo             timezone information directory
+       /usr/share/zoneinfo/localtime   local timezone file
+       /usr/share/zoneinfo/posixrules  default DST rules (obsolete,
+                                       and can cause bugs if present)
+       /usr/share/zoneinfo/GMT         for UTC leap seconds
+
+       If /usr/share/zoneinfo/GMT is absent, UTC leap seconds are loaded from
+       /usr/share/zoneinfo/posixrules.
+
+SEE ALSO
+       getenv(3), newctime(3), newstrftime(3), time(2), tzfile(5)
+
+Time Zone Database                                                 newtzset(3)
diff --git a/lib-tzcode/private.h b/lib-tzcode/private.h
new file mode 100644
index 0000000..838ab2b
--- /dev/null
+++ b/lib-tzcode/private.h
@@ -0,0 +1,1006 @@
+/* Private header for tzdb code.  */
+
+#ifndef PRIVATE_H
+
+#define PRIVATE_H
+
+/*
+** This file is in the public domain, so clarified as of
+** 1996-06-05 by Arthur David Olson.
+*/
+
+/*
+** This header is for use ONLY with the time conversion code.
+** There is no guarantee that it will remain unchanged,
+** or that it will remain at all.
+** Do NOT copy it to any system include directory.
+** Thank you!
+*/
+
+/* PORT_TO_C89 means the code should work even if the underlying
+   compiler and library support only C89.  SUPPORT_C89 means the
+   tzcode library should support C89 callers in addition to the usual
+   support for C99-and-later callers.  These macros are obsolescent,
+   and the plan is to remove them along with any code needed only when
+   they are nonzero.  */
+#ifndef PORT_TO_C89
+# define PORT_TO_C89 0
+#endif
+#ifndef SUPPORT_C89
+# define SUPPORT_C89 0
+#endif
+
+#ifndef __STDC_VERSION__
+# define __STDC_VERSION__ 0
+#endif
+
+/* Define true, false and bool if they don't work out of the box.  */
+#if PORT_TO_C89 && __STDC_VERSION__ < 199901
+# define true 1
+# define false 0
+# define bool int
+#elif __STDC_VERSION__ < 202311
+# include 
+#endif
+
+#if __STDC_VERSION__ < 202311
+# define static_assert(cond) extern int static_assert_check[(cond) ? 1 : -1]
+#endif
+
+/*
+** zdump has been made independent of the rest of the time
+** conversion package to increase confidence in the verification it provides.
+** You can use zdump to help in verifying other implementations.
+** To do this, compile with -DUSE_LTZ=0 and link without the tz library.
+*/
+#ifndef USE_LTZ
+# define USE_LTZ 1
+#endif
+
+/* This string was in the Factory zone through version 2016f.  */
+#define GRANDPARENTED	"Local time zone must be set--see zic manual page"
+
+/*
+** Defaults for preprocessor symbols.
+** You can override these in your C compiler options, e.g. '-DHAVE_GETTEXT=1'.
+*/
+
+#ifndef HAVE_DECL_ASCTIME_R
+# define HAVE_DECL_ASCTIME_R 1
+#endif
+
+#if !defined HAVE__GENERIC && defined __has_extension
+# if __has_extension(c_generic_selections)
+#  define HAVE__GENERIC 1
+# else
+#  define HAVE__GENERIC 0
+# endif
+#endif
+/* _Generic is buggy in pre-4.9 GCC.  */
+#if !defined HAVE__GENERIC && defined __GNUC__ && !defined __STRICT_ANSI__
+# define HAVE__GENERIC (4 < __GNUC__ + (9 <= __GNUC_MINOR__))
+#endif
+#ifndef HAVE__GENERIC
+# define HAVE__GENERIC (201112 <= __STDC_VERSION__)
+#endif
+
+#if !defined HAVE_GETTEXT && defined __has_include
+# if __has_include()
+#  define HAVE_GETTEXT true
+# endif
+#endif
+#ifndef HAVE_GETTEXT
+# define HAVE_GETTEXT false
+#endif
+
+#ifndef HAVE_INCOMPATIBLE_CTIME_R
+# define HAVE_INCOMPATIBLE_CTIME_R 0
+#endif
+
+#ifndef HAVE_LINK
+# define HAVE_LINK 1
+#endif /* !defined HAVE_LINK */
+
+#ifndef HAVE_MALLOC_ERRNO
+# define HAVE_MALLOC_ERRNO 1
+#endif
+
+#ifndef HAVE_POSIX_DECLS
+# define HAVE_POSIX_DECLS 1
+#endif
+
+#ifndef HAVE_SETENV
+# define HAVE_SETENV 1
+#endif
+
+#ifndef HAVE_STRDUP
+# define HAVE_STRDUP 1
+#endif
+
+#ifndef HAVE_SYMLINK
+# define HAVE_SYMLINK 1
+#endif /* !defined HAVE_SYMLINK */
+
+#if !defined HAVE_SYS_STAT_H && defined __has_include
+# if !__has_include()
+#  define HAVE_SYS_STAT_H false
+# endif
+#endif
+#ifndef HAVE_SYS_STAT_H
+# define HAVE_SYS_STAT_H true
+#endif
+
+#if !defined HAVE_UNISTD_H && defined __has_include
+# if !__has_include()
+#  define HAVE_UNISTD_H false
+# endif
+#endif
+#ifndef HAVE_UNISTD_H
+# define HAVE_UNISTD_H true
+#endif
+
+#ifndef NETBSD_INSPIRED
+# define NETBSD_INSPIRED 1
+#endif
+
+#if HAVE_INCOMPATIBLE_CTIME_R
+# define asctime_r _incompatible_asctime_r
+# define ctime_r _incompatible_ctime_r
+#endif /* HAVE_INCOMPATIBLE_CTIME_R */
+
+/* Enable tm_gmtoff, tm_zone, and environ on GNUish systems.  */
+#define _GNU_SOURCE 1
+/* Fix asctime_r on Solaris 11.  */
+#define _POSIX_PTHREAD_SEMANTICS 1
+/* Enable strtoimax on pre-C99 Solaris 11.  */
+#define __EXTENSIONS__ 1
+
+/* On GNUish systems where time_t might be 32 or 64 bits, use 64.
+   On these platforms _FILE_OFFSET_BITS must also be 64; otherwise
+   setting _TIME_BITS to 64 does not work.  The code does not
+   otherwise rely on _FILE_OFFSET_BITS being 64, since it does not
+   use off_t or functions like 'stat' that depend on off_t.  */
+#ifndef _FILE_OFFSET_BITS
+# define _FILE_OFFSET_BITS 64
+#endif
+#if !defined _TIME_BITS && _FILE_OFFSET_BITS == 64
+# define _TIME_BITS 64
+#endif
+
+/*
+** Nested includes
+*/
+
+/* Avoid clashes with NetBSD by renaming NetBSD's declarations.
+   If defining the 'timezone' variable, avoid a clash with FreeBSD's
+   'timezone' function by renaming its declaration.  */
+#define localtime_rz sys_localtime_rz
+#define mktime_z sys_mktime_z
+#define posix2time_z sys_posix2time_z
+#define time2posix_z sys_time2posix_z
+#if defined USG_COMPAT && USG_COMPAT == 2
+# define timezone sys_timezone
+#endif
+#define timezone_t sys_timezone_t
+#define tzalloc sys_tzalloc
+#define tzfree sys_tzfree
+#include 
+#undef localtime_rz
+#undef mktime_z
+#undef posix2time_z
+#undef time2posix_z
+#if defined USG_COMPAT && USG_COMPAT == 2
+# undef timezone
+#endif
+#undef timezone_t
+#undef tzalloc
+#undef tzfree
+
+#include 
+#include 
+#if !PORT_TO_C89
+# include 
+#endif
+#include 	/* for CHAR_BIT et al. */
+#include 
+
+#include 
+
+#ifndef EINVAL
+# define EINVAL ERANGE
+#endif
+
+#ifndef ELOOP
+# define ELOOP EINVAL
+#endif
+#ifndef ENAMETOOLONG
+# define ENAMETOOLONG EINVAL
+#endif
+#ifndef ENOMEM
+# define ENOMEM EINVAL
+#endif
+#ifndef ENOTSUP
+# define ENOTSUP EINVAL
+#endif
+#ifndef EOVERFLOW
+# define EOVERFLOW EINVAL
+#endif
+
+#if HAVE_GETTEXT
+# include 
+#endif /* HAVE_GETTEXT */
+
+#if HAVE_UNISTD_H
+# include  /* for R_OK, and other POSIX goodness */
+#endif /* HAVE_UNISTD_H */
+
+#ifndef HAVE_STRFTIME_L
+# if _POSIX_VERSION < 200809
+#  define HAVE_STRFTIME_L 0
+# else
+#  define HAVE_STRFTIME_L 1
+# endif
+#endif
+
+#ifndef USG_COMPAT
+# ifndef _XOPEN_VERSION
+#  define USG_COMPAT 0
+# else
+#  define USG_COMPAT 1
+# endif
+#endif
+
+#ifndef HAVE_TZNAME
+# if _POSIX_VERSION < 198808 && !USG_COMPAT
+#  define HAVE_TZNAME 0
+# else
+#  define HAVE_TZNAME 1
+# endif
+#endif
+
+#ifndef ALTZONE
+# if defined __sun || defined _M_XENIX
+#  define ALTZONE 1
+# else
+#  define ALTZONE 0
+# endif
+#endif
+
+#ifndef R_OK
+# define R_OK 4
+#endif /* !defined R_OK */
+
+#if PORT_TO_C89
+
+/*
+** Define HAVE_STDINT_H's default value here, rather than at the
+** start, since __GLIBC__ and INTMAX_MAX's values depend on
+** previously included files.  glibc 2.1 and Solaris 10 and later have
+** stdint.h, even with pre-C99 compilers.
+*/
+#if !defined HAVE_STDINT_H && defined __has_include
+# define HAVE_STDINT_H true /* C23 __has_include implies C99 stdint.h.  */
+#endif
+#ifndef HAVE_STDINT_H
+# define HAVE_STDINT_H \
+   (199901 <= __STDC_VERSION__ \
+    || 2 < __GLIBC__ + (1 <= __GLIBC_MINOR__) \
+    || __CYGWIN__ || INTMAX_MAX)
+#endif /* !defined HAVE_STDINT_H */
+
+#if HAVE_STDINT_H
+# include 
+#endif /* !HAVE_STDINT_H */
+
+#ifndef HAVE_INTTYPES_H
+# define HAVE_INTTYPES_H HAVE_STDINT_H
+#endif
+#if HAVE_INTTYPES_H
+# include 
+#endif
+
+/* Pre-C99 GCC compilers define __LONG_LONG_MAX__ instead of LLONG_MAX.  */
+#if defined __LONG_LONG_MAX__ && !defined __STRICT_ANSI__
+# ifndef LLONG_MAX
+#  define LLONG_MAX __LONG_LONG_MAX__
+# endif
+# ifndef LLONG_MIN
+#  define LLONG_MIN (-1 - LLONG_MAX)
+# endif
+# ifndef ULLONG_MAX
+#  define ULLONG_MAX (LLONG_MAX * 2ull + 1)
+# endif
+#endif
+
+#ifndef INT_FAST64_MAX
+# if 1 <= LONG_MAX >> 31 >> 31
+typedef long int_fast64_t;
+#  define INT_FAST64_MIN LONG_MIN
+#  define INT_FAST64_MAX LONG_MAX
+# else
+/* If this fails, compile with -DHAVE_STDINT_H or with a better compiler.  */
+typedef long long int_fast64_t;
+#  define INT_FAST64_MIN LLONG_MIN
+#  define INT_FAST64_MAX LLONG_MAX
+# endif
+#endif
+
+#ifndef PRIdFAST64
+# if INT_FAST64_MAX == LONG_MAX
+#  define PRIdFAST64 "ld"
+# else
+#  define PRIdFAST64 "lld"
+# endif
+#endif
+
+#ifndef SCNdFAST64
+# define SCNdFAST64 PRIdFAST64
+#endif
+
+#ifndef INT_FAST32_MAX
+# if INT_MAX >> 31 == 0
+typedef long int_fast32_t;
+#  define INT_FAST32_MAX LONG_MAX
+#  define INT_FAST32_MIN LONG_MIN
+# else
+typedef int int_fast32_t;
+#  define INT_FAST32_MAX INT_MAX
+#  define INT_FAST32_MIN INT_MIN
+# endif
+#endif
+
+#ifndef INTMAX_MAX
+# ifdef LLONG_MAX
+typedef long long intmax_t;
+#  ifndef HAVE_STRTOLL
+#   define HAVE_STRTOLL true
+#  endif
+#  if HAVE_STRTOLL
+#   define strtoimax strtoll
+#  endif
+#  define INTMAX_MAX LLONG_MAX
+#  define INTMAX_MIN LLONG_MIN
+# else
+typedef long intmax_t;
+#  define INTMAX_MAX LONG_MAX
+#  define INTMAX_MIN LONG_MIN
+# endif
+# ifndef strtoimax
+#  define strtoimax strtol
+# endif
+#endif
+
+#ifndef PRIdMAX
+# if INTMAX_MAX == LLONG_MAX
+#  define PRIdMAX "lld"
+# else
+#  define PRIdMAX "ld"
+# endif
+#endif
+
+#ifndef PTRDIFF_MAX
+# define PTRDIFF_MAX MAXVAL(ptrdiff_t, TYPE_BIT(ptrdiff_t))
+#endif
+
+#ifndef UINT_FAST32_MAX
+typedef unsigned long uint_fast32_t;
+#endif
+
+#ifndef UINT_FAST64_MAX
+# if 3 <= ULONG_MAX >> 31 >> 31
+typedef unsigned long uint_fast64_t;
+#  define UINT_FAST64_MAX ULONG_MAX
+# else
+/* If this fails, compile with -DHAVE_STDINT_H or with a better compiler.  */
+typedef unsigned long long uint_fast64_t;
+#  define UINT_FAST64_MAX ULLONG_MAX
+# endif
+#endif
+
+#ifndef UINTMAX_MAX
+# ifdef ULLONG_MAX
+typedef unsigned long long uintmax_t;
+#  define UINTMAX_MAX ULLONG_MAX
+# else
+typedef unsigned long uintmax_t;
+#  define UINTMAX_MAX ULONG_MAX
+# endif
+#endif
+
+#ifndef PRIuMAX
+# ifdef ULLONG_MAX
+#  define PRIuMAX "llu"
+# else
+#  define PRIuMAX "lu"
+# endif
+#endif
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#endif /* PORT_TO_C89 */
+
+/* The maximum size of any created object, as a signed integer.
+   Although the C standard does not outright prohibit larger objects,
+   behavior is undefined if the result of pointer subtraction does not
+   fit into ptrdiff_t, and the code assumes in several places that
+   pointer subtraction works.  As a practical matter it's OK to not
+   support objects larger than this.  */
+#define INDEX_MAX ((ptrdiff_t) min(PTRDIFF_MAX, SIZE_MAX))
+
+/* Support ckd_add, ckd_sub, ckd_mul on C23 or recent-enough GCC-like
+   hosts, unless compiled with -DHAVE_STDCKDINT_H=0 or with pre-C23 EDG.  */
+#if !defined HAVE_STDCKDINT_H && defined __has_include
+# if __has_include()
+#  define HAVE_STDCKDINT_H true
+# endif
+#endif
+#ifdef HAVE_STDCKDINT_H
+# if HAVE_STDCKDINT_H
+#  include 
+# endif
+#elif defined __EDG__
+/* Do nothing, to work around EDG bug .  */
+#elif defined __has_builtin
+# if __has_builtin(__builtin_add_overflow)
+#  define ckd_add(r, a, b) __builtin_add_overflow(a, b, r)
+# endif
+# if __has_builtin(__builtin_sub_overflow)
+#  define ckd_sub(r, a, b) __builtin_sub_overflow(a, b, r)
+# endif
+# if __has_builtin(__builtin_mul_overflow)
+#  define ckd_mul(r, a, b) __builtin_mul_overflow(a, b, r)
+# endif
+#elif 7 <= __GNUC__
+# define ckd_add(r, a, b) __builtin_add_overflow(a, b, r)
+# define ckd_sub(r, a, b) __builtin_sub_overflow(a, b, r)
+# define ckd_mul(r, a, b) __builtin_mul_overflow(a, b, r)
+#endif
+
+#if 3 <= __GNUC__
+# define ATTRIBUTE_MALLOC __attribute__((malloc))
+# define ATTRIBUTE_FORMAT(spec) __attribute__((format spec))
+#else
+# define ATTRIBUTE_MALLOC /* empty */
+# define ATTRIBUTE_FORMAT(spec) /* empty */
+#endif
+
+#if (defined __has_c_attribute \
+     && (202311 <= __STDC_VERSION__ || !defined __STRICT_ANSI__))
+# define HAVE___HAS_C_ATTRIBUTE true
+#else
+# define HAVE___HAS_C_ATTRIBUTE false
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(deprecated)
+#  define ATTRIBUTE_DEPRECATED [[deprecated]]
+# endif
+#endif
+#ifndef ATTRIBUTE_DEPRECATED
+# if 3 < __GNUC__ + (2 <= __GNUC_MINOR__)
+#  define ATTRIBUTE_DEPRECATED __attribute__((deprecated))
+# else
+#  define ATTRIBUTE_DEPRECATED /* empty */
+# endif
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(fallthrough)
+#  define ATTRIBUTE_FALLTHROUGH [[fallthrough]]
+# endif
+#endif
+#ifndef ATTRIBUTE_FALLTHROUGH
+# if 7 <= __GNUC__
+#  define ATTRIBUTE_FALLTHROUGH __attribute__((fallthrough))
+# else
+#  define ATTRIBUTE_FALLTHROUGH ((void) 0)
+# endif
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(maybe_unused)
+#  define ATTRIBUTE_MAYBE_UNUSED [[maybe_unused]]
+# endif
+#endif
+#ifndef ATTRIBUTE_MAYBE_UNUSED
+# if 2 < __GNUC__ + (7 <= __GNUC_MINOR__)
+#  define ATTRIBUTE_MAYBE_UNUSED __attribute__((unused))
+# else
+#  define ATTRIBUTE_MAYBE_UNUSED /* empty */
+# endif
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(noreturn)
+#  define ATTRIBUTE_NORETURN [[noreturn]]
+# endif
+#endif
+#ifndef ATTRIBUTE_NORETURN
+# if 201112 <= __STDC_VERSION__
+#  define ATTRIBUTE_NORETURN _Noreturn
+# elif 2 < __GNUC__ + (8 <= __GNUC_MINOR__)
+#  define ATTRIBUTE_NORETURN __attribute__((noreturn))
+# else
+#  define ATTRIBUTE_NORETURN /* empty */
+# endif
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(reproducible)
+#  define ATTRIBUTE_REPRODUCIBLE [[reproducible]]
+# endif
+#endif
+#ifndef ATTRIBUTE_REPRODUCIBLE
+# if 3 <= __GNUC__
+#  define ATTRIBUTE_REPRODUCIBLE __attribute__((pure))
+# else
+#  define ATTRIBUTE_REPRODUCIBLE /* empty */
+# endif
+#endif
+
+#if HAVE___HAS_C_ATTRIBUTE
+# if __has_c_attribute(unsequenced)
+#  define ATTRIBUTE_UNSEQUENCED [[unsequenced]]
+# endif
+#endif
+#ifndef ATTRIBUTE_UNSEQUENCED
+# if 3 <= __GNUC__
+#  define ATTRIBUTE_UNSEQUENCED __attribute__((const))
+# else
+#  define ATTRIBUTE_UNSEQUENCED /* empty */
+# endif
+#endif
+
+#if (__STDC_VERSION__ < 199901 && !defined restrict \
+     && (PORT_TO_C89 || defined _MSC_VER))
+# define restrict /* empty */
+#endif
+
+/*
+** Workarounds for compilers/systems.
+*/
+
+#ifndef EPOCH_LOCAL
+# define EPOCH_LOCAL 0
+#endif
+#ifndef EPOCH_OFFSET
+# define EPOCH_OFFSET 0
+#endif
+#ifndef RESERVE_STD_EXT_IDS
+# define RESERVE_STD_EXT_IDS 0
+#endif
+
+/* If standard C identifiers with external linkage (e.g., localtime)
+   are reserved and are not already being renamed anyway, rename them
+   as if compiling with '-Dtime_tz=time_t'.  */
+#if !defined time_tz && RESERVE_STD_EXT_IDS && USE_LTZ
+# define time_tz time_t
+#endif
+
+/*
+** Compile with -Dtime_tz=T to build the tz package with a private
+** time_t type equivalent to T rather than the system-supplied time_t.
+** This debugging feature can test unusual design decisions
+** (e.g., time_t wider than 'long', or unsigned time_t) even on
+** typical platforms.
+*/
+#if defined time_tz || EPOCH_LOCAL || EPOCH_OFFSET != 0
+# define TZ_TIME_T 1
+#else
+# define TZ_TIME_T 0
+#endif
+
+#if defined LOCALTIME_IMPLEMENTATION && TZ_TIME_T
+static time_t sys_time(time_t *x) { return time(x); }
+#endif
+
+#if TZ_TIME_T
+
+typedef time_tz tz_time_t;
+
+# undef  asctime
+# define asctime tz_asctime
+# undef  asctime_r
+# define asctime_r tz_asctime_r
+# undef  ctime
+# define ctime tz_ctime
+# undef  ctime_r
+# define ctime_r tz_ctime_r
+# undef  difftime
+# define difftime tz_difftime
+# undef  gmtime
+# define gmtime tz_gmtime
+# undef  gmtime_r
+# define gmtime_r tz_gmtime_r
+# undef  localtime
+# define localtime tz_localtime
+# undef  localtime_r
+# define localtime_r tz_localtime_r
+# undef  localtime_rz
+# define localtime_rz tz_localtime_rz
+# undef  mktime
+# define mktime tz_mktime
+# undef  mktime_z
+# define mktime_z tz_mktime_z
+# undef  offtime
+# define offtime tz_offtime
+# undef  posix2time
+# define posix2time tz_posix2time
+# undef  posix2time_z
+# define posix2time_z tz_posix2time_z
+# undef  strftime
+# define strftime tz_strftime
+# undef  time
+# define time tz_time
+# undef  time2posix
+# define time2posix tz_time2posix
+# undef  time2posix_z
+# define time2posix_z tz_time2posix_z
+# undef  time_t
+# define time_t tz_time_t
+# undef  timegm
+# define timegm tz_timegm
+# undef  timelocal
+# define timelocal tz_timelocal
+# undef  timeoff
+# define timeoff tz_timeoff
+# undef  tzalloc
+# define tzalloc tz_tzalloc
+# undef  tzfree
+# define tzfree tz_tzfree
+# undef  tzset
+# define tzset tz_tzset
+# if HAVE_STRFTIME_L
+#  undef  strftime_l
+#  define strftime_l tz_strftime_l
+# endif
+# if HAVE_TZNAME
+#  undef  tzname
+#  define tzname tz_tzname
+# endif
+# if USG_COMPAT
+#  undef  daylight
+#  define daylight tz_daylight
+#  undef  timezone
+#  define timezone tz_timezone
+# endif
+# if ALTZONE
+#  undef  altzone
+#  define altzone tz_altzone
+# endif
+
+# if __STDC_VERSION__ < 202311
+#  define DEPRECATED_IN_C23 /* empty */
+# else
+#  define DEPRECATED_IN_C23 ATTRIBUTE_DEPRECATED
+# endif
+DEPRECATED_IN_C23 char *asctime(struct tm const *);
+char *asctime_r(struct tm const *restrict, char *restrict);
+DEPRECATED_IN_C23 char *ctime(time_t const *);
+char *ctime_r(time_t const *, char *);
+ATTRIBUTE_UNSEQUENCED double difftime(time_t, time_t);
+size_t strftime(char *restrict, size_t, char const *restrict,
+		struct tm const *restrict);
+# if HAVE_STRFTIME_L
+size_t strftime_l(char *restrict, size_t, char const *restrict,
+		  struct tm const *restrict, locale_t);
+# endif
+struct tm *gmtime(time_t const *);
+struct tm *gmtime_r(time_t const *restrict, struct tm *restrict);
+struct tm *localtime(time_t const *);
+struct tm *localtime_r(time_t const *restrict, struct tm *restrict);
+time_t mktime(struct tm *);
+time_t time(time_t *);
+time_t timegm(struct tm *);
+void tzset(void);
+#endif
+
+#ifndef HAVE_DECL_TIMEGM
+# if (202311 <= __STDC_VERSION__ \
+      || defined __GLIBC__ || defined __tm_zone /* musl */ \
+      || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ \
+      || (defined __APPLE__ && defined __MACH__))
+#  define HAVE_DECL_TIMEGM true
+# else
+#  define HAVE_DECL_TIMEGM false
+# endif
+#endif
+#if !HAVE_DECL_TIMEGM && !defined timegm
+time_t timegm(struct tm *);
+#endif
+
+#if !HAVE_DECL_ASCTIME_R && !defined asctime_r
+extern char *asctime_r(struct tm const *restrict, char *restrict);
+#endif
+
+#ifndef HAVE_DECL_ENVIRON
+# if defined environ || defined __USE_GNU
+#  define HAVE_DECL_ENVIRON 1
+# else
+#  define HAVE_DECL_ENVIRON 0
+# endif
+#endif
+
+#if !HAVE_DECL_ENVIRON
+extern char **environ;
+#endif
+
+#if 2 <= HAVE_TZNAME + (TZ_TIME_T || !HAVE_POSIX_DECLS)
+extern char *tzname[];
+#endif
+#if 2 <= USG_COMPAT + (TZ_TIME_T || !HAVE_POSIX_DECLS)
+extern long timezone;
+extern int daylight;
+#endif
+#if 2 <= ALTZONE + (TZ_TIME_T || !HAVE_POSIX_DECLS)
+extern long altzone;
+#endif
+
+/*
+** The STD_INSPIRED functions are similar, but most also need
+** declarations if time_tz is defined.
+*/
+
+#ifndef STD_INSPIRED
+# define STD_INSPIRED 0
+#endif
+#if STD_INSPIRED
+# if TZ_TIME_T || !defined offtime
+struct tm *offtime(time_t const *, long);
+# endif
+# if TZ_TIME_T || !defined timelocal
+time_t timelocal(struct tm *);
+# endif
+# if TZ_TIME_T || !defined timeoff
+time_t timeoff(struct tm *, long);
+# endif
+# if TZ_TIME_T || !defined time2posix
+time_t time2posix(time_t);
+# endif
+# if TZ_TIME_T || !defined posix2time
+time_t posix2time(time_t);
+# endif
+#endif
+
+/* Infer TM_ZONE on systems where this information is known, but suppress
+   guessing if NO_TM_ZONE is defined.  Similarly for TM_GMTOFF.  */
+#if (defined __GLIBC__ \
+     || defined __tm_zone /* musl */ \
+     || defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ \
+     || (defined __APPLE__ && defined __MACH__))
+# if !defined TM_GMTOFF && !defined NO_TM_GMTOFF
+#  define TM_GMTOFF tm_gmtoff
+# endif
+# if !defined TM_ZONE && !defined NO_TM_ZONE
+#  define TM_ZONE tm_zone
+# endif
+#endif
+
+/*
+** Define functions that are ABI compatible with NetBSD but have
+** better prototypes.  NetBSD 6.1.4 defines a pointer type timezone_t
+** and labors under the misconception that 'const timezone_t' is a
+** pointer to a constant.  This use of 'const' is ineffective, so it
+** is not done here.  What we call 'struct state' NetBSD calls
+** 'struct __state', but this is a private name so it doesn't matter.
+*/
+#if NETBSD_INSPIRED
+typedef struct state *timezone_t;
+struct tm *localtime_rz(timezone_t restrict, time_t const *restrict,
+			struct tm *restrict);
+time_t mktime_z(timezone_t restrict, struct tm *restrict);
+timezone_t tzalloc(char const *);
+void tzfree(timezone_t);
+# if STD_INSPIRED
+#  if TZ_TIME_T || !defined posix2time_z
+ATTRIBUTE_REPRODUCIBLE time_t posix2time_z(timezone_t, time_t);
+#  endif
+#  if TZ_TIME_T || !defined time2posix_z
+ATTRIBUTE_REPRODUCIBLE time_t time2posix_z(timezone_t, time_t);
+#  endif
+# endif
+#endif
+
+/*
+** Finally, some convenience items.
+*/
+
+#define TYPE_BIT(type) (CHAR_BIT * (ptrdiff_t) sizeof(type))
+#define TYPE_SIGNED(type) (((type) -1) < 0)
+#define TWOS_COMPLEMENT(t) ((t) ~ (t) 0 < 0)
+
+/* Minimum and maximum of two values.  Use lower case to avoid
+   naming clashes with standard include files.  */
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Max and min values of the integer type T, of which only the bottom
+   B bits are used, and where the highest-order used bit is considered
+   to be a sign bit if T is signed.  */
+#define MAXVAL(t, b)						\
+  ((t) (((t) 1 << ((b) - 1 - TYPE_SIGNED(t)))			\
+	- 1 + ((t) 1 << ((b) - 1 - TYPE_SIGNED(t)))))
+#define MINVAL(t, b)						\
+  ((t) (TYPE_SIGNED(t) ? - TWOS_COMPLEMENT(t) - MAXVAL(t, b) : 0))
+
+/* The extreme time values, assuming no padding.  */
+#define TIME_T_MIN_NO_PADDING MINVAL(time_t, TYPE_BIT(time_t))
+#define TIME_T_MAX_NO_PADDING MAXVAL(time_t, TYPE_BIT(time_t))
+
+/* The extreme time values.  These are macros, not constants, so that
+   any portability problems occur only when compiling .c files that use
+   the macros, which is safer for applications that need only zdump and zic.
+   This implementation assumes no padding if time_t is signed and
+   either the compiler lacks support for _Generic or time_t is not one
+   of the standard signed integer types.  */
+#if HAVE__GENERIC
+# define TIME_T_MIN \
+    _Generic((time_t) 0, \
+	     signed char: SCHAR_MIN, short: SHRT_MIN, \
+	     int: INT_MIN, long: LONG_MIN, long long: LLONG_MIN, \
+	     default: TIME_T_MIN_NO_PADDING)
+# define TIME_T_MAX \
+    (TYPE_SIGNED(time_t) \
+     ? _Generic((time_t) 0, \
+		signed char: SCHAR_MAX, short: SHRT_MAX, \
+		int: INT_MAX, long: LONG_MAX, long long: LLONG_MAX, \
+		default: TIME_T_MAX_NO_PADDING)			    \
+     : (time_t) -1)
+enum { SIGNED_PADDING_CHECK_NEEDED
+         = _Generic((time_t) 0,
+		    signed char: false, short: false,
+		    int: false, long: false, long long: false,
+		    default: true) };
+#else
+# define TIME_T_MIN TIME_T_MIN_NO_PADDING
+# define TIME_T_MAX TIME_T_MAX_NO_PADDING
+enum { SIGNED_PADDING_CHECK_NEEDED = true };
+#endif
+/* Try to check the padding assumptions.  Although TIME_T_MAX and the
+   following check can both have undefined behavior on oddball
+   platforms due to shifts exceeding widths of signed integers, these
+   platforms' compilers are likely to diagnose these issues in integer
+   constant expressions, so it shouldn't hurt to check statically.  */
+static_assert(! TYPE_SIGNED(time_t) || ! SIGNED_PADDING_CHECK_NEEDED
+	      || TIME_T_MAX >> (TYPE_BIT(time_t) - 2) == 1);
+
+/*
+** 302 / 1000 is log10(2.0) rounded up.
+** Subtract one for the sign bit if the type is signed;
+** add one for integer division truncation;
+** add one more for a minus sign if the type is signed.
+*/
+#define INT_STRLEN_MAXIMUM(type) \
+	((TYPE_BIT(type) - TYPE_SIGNED(type)) * 302 / 1000 + \
+	1 + TYPE_SIGNED(type))
+
+/*
+** INITIALIZE(x)
+*/
+
+#ifdef GCC_LINT
+# define INITIALIZE(x)	((x) = 0)
+#else
+# define INITIALIZE(x)
+#endif
+
+/* Whether memory access must strictly follow the C standard.
+   If 0, it's OK to read uninitialized storage so long as the value is
+   not relied upon.  Defining it to 0 lets mktime access parts of
+   struct tm that might be uninitialized, as a heuristic when the
+   standard doesn't say what to return and when tm_gmtoff can help
+   mktime likely infer a better value.  */
+#ifndef UNINIT_TRAP
+# define UNINIT_TRAP 0
+#endif
+
+#ifdef DEBUG
+# undef unreachable
+# define unreachable() abort()
+#elif !defined unreachable
+# ifdef __has_builtin
+#  if __has_builtin(__builtin_unreachable)
+#   define unreachable() __builtin_unreachable()
+#  endif
+# elif 4 < __GNUC__ + (5 <= __GNUC_MINOR__)
+#  define unreachable() __builtin_unreachable()
+# endif
+# ifndef unreachable
+#  define unreachable() ((void) 0)
+# endif
+#endif
+
+/*
+** For the benefit of GNU folk...
+** '_(MSGID)' uses the current locale's message library string for MSGID.
+** The default is to use gettext if available, and use MSGID otherwise.
+*/
+
+#if HAVE_GETTEXT
+#define _(msgid) gettext(msgid)
+#else /* !HAVE_GETTEXT */
+#define _(msgid) msgid
+#endif /* !HAVE_GETTEXT */
+
+#if !defined TZ_DOMAIN && defined HAVE_GETTEXT
+# define TZ_DOMAIN "tz"
+#endif
+
+#if HAVE_INCOMPATIBLE_CTIME_R
+#undef asctime_r
+#undef ctime_r
+char *asctime_r(struct tm const *restrict, char *restrict);
+char *ctime_r(time_t const *, char *);
+#endif /* HAVE_INCOMPATIBLE_CTIME_R */
+
+/* Handy macros that are independent of tzfile implementation.  */
+
+enum {
+  SECSPERMIN = 60,
+  MINSPERHOUR = 60,
+  SECSPERHOUR = SECSPERMIN * MINSPERHOUR,
+  HOURSPERDAY = 24,
+  DAYSPERWEEK = 7,
+  DAYSPERNYEAR = 365,
+  DAYSPERLYEAR = DAYSPERNYEAR + 1,
+  MONSPERYEAR = 12,
+  YEARSPERREPEAT = 400	/* years before a Gregorian repeat */
+};
+
+#define SECSPERDAY	((int_fast32_t) SECSPERHOUR * HOURSPERDAY)
+
+#define DAYSPERREPEAT		((int_fast32_t) 400 * 365 + 100 - 4 + 1)
+#define SECSPERREPEAT		((int_fast64_t) DAYSPERREPEAT * SECSPERDAY)
+#define AVGSECSPERYEAR		(SECSPERREPEAT / YEARSPERREPEAT)
+
+enum {
+  TM_SUNDAY,
+  TM_MONDAY,
+  TM_TUESDAY,
+  TM_WEDNESDAY,
+  TM_THURSDAY,
+  TM_FRIDAY,
+  TM_SATURDAY
+};
+
+enum {
+  TM_JANUARY,
+  TM_FEBRUARY,
+  TM_MARCH,
+  TM_APRIL,
+  TM_MAY,
+  TM_JUNE,
+  TM_JULY,
+  TM_AUGUST,
+  TM_SEPTEMBER,
+  TM_OCTOBER,
+  TM_NOVEMBER,
+  TM_DECEMBER
+};
+
+enum {
+  TM_YEAR_BASE = 1900,
+  TM_WDAY_BASE = TM_MONDAY,
+  EPOCH_YEAR = 1970,
+  EPOCH_WDAY = TM_THURSDAY
+};
+
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+
+/*
+** Since everything in isleap is modulo 400 (or a factor of 400), we know that
+**	isleap(y) == isleap(y % 400)
+** and so
+**	isleap(a + b) == isleap((a + b) % 400)
+** or
+**	isleap(a + b) == isleap(a % 400 + b % 400)
+** This is true even if % means modulo rather than Fortran remainder
+** (which is allowed by C89 but not by C99 or later).
+** We use this to avoid addition overflow problems.
+*/
+
+#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
+
+#endif /* !defined PRIVATE_H */
diff --git a/lib-tzcode/strftime.c b/lib-tzcode/strftime.c
new file mode 100644
index 0000000..df16983
--- /dev/null
+++ b/lib-tzcode/strftime.c
@@ -0,0 +1,659 @@
+/* Convert a broken-down timestamp to a string.  */
+
+/* Copyright 1989 The Regents of the University of California.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+   2. Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+   3. Neither the name of the University nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
+   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+   SUCH DAMAGE.  */
+
+/*
+** Based on the UCB version with the copyright notice appearing above.
+**
+** This is ANSIish only when "multibyte character == plain character".
+*/
+
+#include "private.h"
+
+#include 
+#include 
+#include 
+
+#ifndef DEPRECATE_TWO_DIGIT_YEARS
+# define DEPRECATE_TWO_DIGIT_YEARS false
+#endif
+
+struct lc_time_T {
+	const char *	mon[MONSPERYEAR];
+	const char *	month[MONSPERYEAR];
+	const char *	wday[DAYSPERWEEK];
+	const char *	weekday[DAYSPERWEEK];
+	const char *	X_fmt;
+	const char *	x_fmt;
+	const char *	c_fmt;
+	const char *	am;
+	const char *	pm;
+	const char *	date_fmt;
+};
+
+static const struct lc_time_T	C_time_locale = {
+	{
+		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+	}, {
+		"January", "February", "March", "April", "May", "June",
+		"July", "August", "September", "October", "November", "December"
+	}, {
+		"Sun", "Mon", "Tue", "Wed",
+		"Thu", "Fri", "Sat"
+	}, {
+		"Sunday", "Monday", "Tuesday", "Wednesday",
+		"Thursday", "Friday", "Saturday"
+	},
+
+	/* X_fmt */
+	"%H:%M:%S",
+
+	/*
+	** x_fmt
+	** C99 and later require this format.
+	** Using just numbers (as here) makes Quakers happier;
+	** it's also compatible with SVR4.
+	*/
+	"%m/%d/%y",
+
+	/*
+	** c_fmt
+	** C99 and later require this format.
+	** Previously this code used "%D %X", but we now conform to C99.
+	** Note that
+	**	"%a %b %d %H:%M:%S %Y"
+	** is used by Solaris 2.3.
+	*/
+	"%a %b %e %T %Y",
+
+	/* am */
+	"AM",
+
+	/* pm */
+	"PM",
+
+	/* date_fmt */
+	"%a %b %e %H:%M:%S %Z %Y"
+};
+
+enum warn { IN_NONE, IN_SOME, IN_THIS, IN_ALL };
+
+static char *	_add(const char *, char *, const char *);
+static char *	_conv(int, const char *, char *, const char *);
+static char *	_fmt(const char *, const struct tm *, char *, const char *,
+		     enum warn *);
+static char *	_yconv(int, int, bool, bool, char *, char const *);
+
+#ifndef YEAR_2000_NAME
+# define YEAR_2000_NAME "CHECK_STRFTIME_FORMATS_FOR_TWO_DIGIT_YEARS"
+#endif /* !defined YEAR_2000_NAME */
+
+#if HAVE_STRFTIME_L
+size_t
+strftime_l(char *restrict s, size_t maxsize, char const *restrict format,
+	   struct tm const *restrict t,
+	   ATTRIBUTE_MAYBE_UNUSED locale_t locale)
+{
+  /* Just call strftime, as only the C locale is supported.  */
+  return strftime(s, maxsize, format, t);
+}
+#endif
+
+size_t
+strftime(char *restrict s, size_t maxsize, char const *restrict format,
+	 struct tm const *restrict t)
+{
+	char *	p;
+	int saved_errno = errno;
+	enum warn warn = IN_NONE;
+
+	tzset();
+	p = _fmt(format, t, s, s + maxsize, &warn);
+	if (!p) {
+	  errno = EOVERFLOW;
+	  return 0;
+	}
+	if (DEPRECATE_TWO_DIGIT_YEARS
+	    && warn != IN_NONE && getenv(YEAR_2000_NAME)) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "strftime format \"%s\" ", format);
+		fprintf(stderr, "yields only two digits of years in ");
+		if (warn == IN_SOME)
+			fprintf(stderr, "some locales");
+		else if (warn == IN_THIS)
+			fprintf(stderr, "the current locale");
+		else	fprintf(stderr, "all locales");
+		fprintf(stderr, "\n");
+	}
+	if (p == s + maxsize) {
+		errno = ERANGE;
+		return 0;
+	}
+	*p = '\0';
+	errno = saved_errno;
+	return p - s;
+}
+
+static char *
+_fmt(const char *format, const struct tm *t, char *pt,
+     const char *ptlim, enum warn *warnp)
+{
+	struct lc_time_T const *Locale = &C_time_locale;
+
+	for ( ; *format; ++format) {
+		if (*format == '%') {
+label:
+			switch (*++format) {
+			case '\0':
+				--format;
+				break;
+			case 'A':
+				pt = _add((t->tm_wday < 0 ||
+					t->tm_wday >= DAYSPERWEEK) ?
+					"?" : Locale->weekday[t->tm_wday],
+					pt, ptlim);
+				continue;
+			case 'a':
+				pt = _add((t->tm_wday < 0 ||
+					t->tm_wday >= DAYSPERWEEK) ?
+					"?" : Locale->wday[t->tm_wday],
+					pt, ptlim);
+				continue;
+			case 'B':
+				pt = _add((t->tm_mon < 0 ||
+					t->tm_mon >= MONSPERYEAR) ?
+					"?" : Locale->month[t->tm_mon],
+					pt, ptlim);
+				continue;
+			case 'b':
+			case 'h':
+				pt = _add((t->tm_mon < 0 ||
+					t->tm_mon >= MONSPERYEAR) ?
+					"?" : Locale->mon[t->tm_mon],
+					pt, ptlim);
+				continue;
+			case 'C':
+				/*
+				** %C used to do a...
+				**	_fmt("%a %b %e %X %Y", t);
+				** ...whereas now POSIX 1003.2 calls for
+				** something completely different.
+				** (ado, 1993-05-24)
+				*/
+				pt = _yconv(t->tm_year, TM_YEAR_BASE,
+					    true, false, pt, ptlim);
+				continue;
+			case 'c':
+				{
+				enum warn warn2 = IN_SOME;
+
+				pt = _fmt(Locale->c_fmt, t, pt, ptlim, &warn2);
+				if (warn2 == IN_ALL)
+					warn2 = IN_THIS;
+				if (warn2 > *warnp)
+					*warnp = warn2;
+				}
+				continue;
+			case 'D':
+				pt = _fmt("%m/%d/%y", t, pt, ptlim, warnp);
+				continue;
+			case 'd':
+				pt = _conv(t->tm_mday, "%02d", pt, ptlim);
+				continue;
+			case 'E':
+			case 'O':
+				/*
+				** Locale modifiers of C99 and later.
+				** The sequences
+				**	%Ec %EC %Ex %EX %Ey %EY
+				**	%Od %oe %OH %OI %Om %OM
+				**	%OS %Ou %OU %OV %Ow %OW %Oy
+				** are supposed to provide alternative
+				** representations.
+				*/
+				goto label;
+			case 'e':
+				pt = _conv(t->tm_mday, "%2d", pt, ptlim);
+				continue;
+			case 'F':
+				pt = _fmt("%Y-%m-%d", t, pt, ptlim, warnp);
+				continue;
+			case 'H':
+				pt = _conv(t->tm_hour, "%02d", pt, ptlim);
+				continue;
+			case 'I':
+				pt = _conv((t->tm_hour % 12) ?
+					(t->tm_hour % 12) : 12,
+					"%02d", pt, ptlim);
+				continue;
+			case 'j':
+				pt = _conv(t->tm_yday + 1, "%03d", pt, ptlim);
+				continue;
+			case 'k':
+				/*
+				** This used to be...
+				**	_conv(t->tm_hour % 12 ?
+				**		t->tm_hour % 12 : 12, 2, ' ');
+				** ...and has been changed to the below to
+				** match SunOS 4.1.1 and Arnold Robbins'
+				** strftime version 3.0. That is, "%k" and
+				** "%l" have been swapped.
+				** (ado, 1993-05-24)
+				*/
+				pt = _conv(t->tm_hour, "%2d", pt, ptlim);
+				continue;
+#ifdef KITCHEN_SINK
+			case 'K':
+				/*
+				** After all this time, still unclaimed!
+				*/
+				pt = _add("kitchen sink", pt, ptlim);
+				continue;
+#endif /* defined KITCHEN_SINK */
+			case 'l':
+				/*
+				** This used to be...
+				**	_conv(t->tm_hour, 2, ' ');
+				** ...and has been changed to the below to
+				** match SunOS 4.1.1 and Arnold Robbin's
+				** strftime version 3.0. That is, "%k" and
+				** "%l" have been swapped.
+				** (ado, 1993-05-24)
+				*/
+				pt = _conv((t->tm_hour % 12) ?
+					(t->tm_hour % 12) : 12,
+					"%2d", pt, ptlim);
+				continue;
+			case 'M':
+				pt = _conv(t->tm_min, "%02d", pt, ptlim);
+				continue;
+			case 'm':
+				pt = _conv(t->tm_mon + 1, "%02d", pt, ptlim);
+				continue;
+			case 'n':
+				pt = _add("\n", pt, ptlim);
+				continue;
+			case 'p':
+				pt = _add((t->tm_hour >= (HOURSPERDAY / 2)) ?
+					Locale->pm :
+					Locale->am,
+					pt, ptlim);
+				continue;
+			case 'R':
+				pt = _fmt("%H:%M", t, pt, ptlim, warnp);
+				continue;
+			case 'r':
+				pt = _fmt("%I:%M:%S %p", t, pt, ptlim, warnp);
+				continue;
+			case 'S':
+				pt = _conv(t->tm_sec, "%02d", pt, ptlim);
+				continue;
+			case 's':
+				{
+					struct tm	tm;
+					char		buf[INT_STRLEN_MAXIMUM(
+								time_t) + 1];
+					time_t		mkt;
+
+					tm.tm_sec = t->tm_sec;
+					tm.tm_min = t->tm_min;
+					tm.tm_hour = t->tm_hour;
+					tm.tm_mday = t->tm_mday;
+					tm.tm_mon = t->tm_mon;
+					tm.tm_year = t->tm_year;
+					tm.tm_isdst = t->tm_isdst;
+#if defined TM_GMTOFF && ! UNINIT_TRAP
+					tm.TM_GMTOFF = t->TM_GMTOFF;
+#endif
+					mkt = mktime(&tm);
+					/* If mktime fails, %s expands to the
+					   value of (time_t) -1 as a failure
+					   marker; this is better in practice
+					   than strftime failing.  */
+					if (TYPE_SIGNED(time_t)) {
+					  intmax_t n = mkt;
+					  sprintf(buf, "%"PRIdMAX, n);
+					} else {
+					  uintmax_t n = mkt;
+					  sprintf(buf, "%"PRIuMAX, n);
+					}
+					pt = _add(buf, pt, ptlim);
+				}
+				continue;
+			case 'T':
+				pt = _fmt("%H:%M:%S", t, pt, ptlim, warnp);
+				continue;
+			case 't':
+				pt = _add("\t", pt, ptlim);
+				continue;
+			case 'U':
+				pt = _conv((t->tm_yday + DAYSPERWEEK -
+					t->tm_wday) / DAYSPERWEEK,
+					"%02d", pt, ptlim);
+				continue;
+			case 'u':
+				/*
+				** From Arnold Robbins' strftime version 3.0:
+				** "ISO 8601: Weekday as a decimal number
+				** [1 (Monday) - 7]"
+				** (ado, 1993-05-24)
+				*/
+				pt = _conv((t->tm_wday == 0) ?
+					DAYSPERWEEK : t->tm_wday,
+					"%d", pt, ptlim);
+				continue;
+			case 'V':	/* ISO 8601 week number */
+			case 'G':	/* ISO 8601 year (four digits) */
+			case 'g':	/* ISO 8601 year (two digits) */
+/*
+** From Arnold Robbins' strftime version 3.0: "the week number of the
+** year (the first Monday as the first day of week 1) as a decimal number
+** (01-53)."
+** (ado, 1993-05-24)
+**
+** From  by Markus Kuhn:
+** "Week 01 of a year is per definition the first week which has the
+** Thursday in this year, which is equivalent to the week which contains
+** the fourth day of January. In other words, the first week of a new year
+** is the week which has the majority of its days in the new year. Week 01
+** might also contain days from the previous year and the week before week
+** 01 of a year is the last week (52 or 53) of the previous year even if
+** it contains days from the new year. A week starts with Monday (day 1)
+** and ends with Sunday (day 7). For example, the first week of the year
+** 1997 lasts from 1996-12-30 to 1997-01-05..."
+** (ado, 1996-01-02)
+*/
+				{
+					int	year;
+					int	base;
+					int	yday;
+					int	wday;
+					int	w;
+
+					year = t->tm_year;
+					base = TM_YEAR_BASE;
+					yday = t->tm_yday;
+					wday = t->tm_wday;
+					for ( ; ; ) {
+						int	len;
+						int	bot;
+						int	top;
+
+						len = isleap_sum(year, base) ?
+							DAYSPERLYEAR :
+							DAYSPERNYEAR;
+						/*
+						** What yday (-3 ... 3) does
+						** the ISO year begin on?
+						*/
+						bot = ((yday + 11 - wday) %
+							DAYSPERWEEK) - 3;
+						/*
+						** What yday does the NEXT
+						** ISO year begin on?
+						*/
+						top = bot -
+							(len % DAYSPERWEEK);
+						if (top < -3)
+							top += DAYSPERWEEK;
+						top += len;
+						if (yday >= top) {
+							++base;
+							w = 1;
+							break;
+						}
+						if (yday >= bot) {
+							w = 1 + ((yday - bot) /
+								DAYSPERWEEK);
+							break;
+						}
+						--base;
+						yday += isleap_sum(year, base) ?
+							DAYSPERLYEAR :
+							DAYSPERNYEAR;
+					}
+#ifdef XPG4_1994_04_09
+					if ((w == 52 &&
+						t->tm_mon == TM_JANUARY) ||
+						(w == 1 &&
+						t->tm_mon == TM_DECEMBER))
+							w = 53;
+#endif /* defined XPG4_1994_04_09 */
+					if (*format == 'V')
+						pt = _conv(w, "%02d",
+							pt, ptlim);
+					else if (*format == 'g') {
+						*warnp = IN_ALL;
+						pt = _yconv(year, base,
+							false, true,
+							pt, ptlim);
+					} else	pt = _yconv(year, base,
+							true, true,
+							pt, ptlim);
+				}
+				continue;
+			case 'v':
+				/*
+				** From Arnold Robbins' strftime version 3.0:
+				** "date as dd-bbb-YYYY"
+				** (ado, 1993-05-24)
+				*/
+				pt = _fmt("%e-%b-%Y", t, pt, ptlim, warnp);
+				continue;
+			case 'W':
+				pt = _conv((t->tm_yday + DAYSPERWEEK -
+					(t->tm_wday ?
+					(t->tm_wday - 1) :
+					(DAYSPERWEEK - 1))) / DAYSPERWEEK,
+					"%02d", pt, ptlim);
+				continue;
+			case 'w':
+				pt = _conv(t->tm_wday, "%d", pt, ptlim);
+				continue;
+			case 'X':
+				pt = _fmt(Locale->X_fmt, t, pt, ptlim, warnp);
+				continue;
+			case 'x':
+				{
+				enum warn warn2 = IN_SOME;
+
+				pt = _fmt(Locale->x_fmt, t, pt, ptlim, &warn2);
+				if (warn2 == IN_ALL)
+					warn2 = IN_THIS;
+				if (warn2 > *warnp)
+					*warnp = warn2;
+				}
+				continue;
+			case 'y':
+				*warnp = IN_ALL;
+				pt = _yconv(t->tm_year, TM_YEAR_BASE,
+					false, true,
+					pt, ptlim);
+				continue;
+			case 'Y':
+				pt = _yconv(t->tm_year, TM_YEAR_BASE,
+					true, true,
+					pt, ptlim);
+				continue;
+			case 'Z':
+#ifdef TM_ZONE
+				pt = _add(t->TM_ZONE, pt, ptlim);
+#elif HAVE_TZNAME
+				if (t->tm_isdst >= 0)
+					pt = _add(tzname[t->tm_isdst != 0],
+						pt, ptlim);
+#endif
+				/*
+				** C99 and later say that %Z must be
+				** replaced by the empty string if the
+				** time zone abbreviation is not
+				** determinable.
+				*/
+				continue;
+			case 'z':
+#if defined TM_GMTOFF || USG_COMPAT || ALTZONE
+				{
+				long		diff;
+				char const *	sign;
+				bool negative;
+
+# ifdef TM_GMTOFF
+				diff = t->TM_GMTOFF;
+# else
+				/*
+				** C99 and later say that the UT offset must
+				** be computed by looking only at
+				** tm_isdst. This requirement is
+				** incorrect, since it means the code
+				** must rely on magic (in this case
+				** altzone and timezone), and the
+				** magic might not have the correct
+				** offset. Doing things correctly is
+				** tricky and requires disobeying the standard;
+				** see GNU C strftime for details.
+				** For now, punt and conform to the
+				** standard, even though it's incorrect.
+				**
+				** C99 and later say that %z must be replaced by
+				** the empty string if the time zone is not
+				** determinable, so output nothing if the
+				** appropriate variables are not available.
+				*/
+				if (t->tm_isdst < 0)
+					continue;
+				if (t->tm_isdst == 0)
+#  if USG_COMPAT
+					diff = -timezone;
+#  else
+					continue;
+#  endif
+				else
+#  if ALTZONE
+					diff = -altzone;
+#  else
+					continue;
+#  endif
+# endif
+				negative = diff < 0;
+				if (diff == 0) {
+# ifdef TM_ZONE
+				  negative = t->TM_ZONE[0] == '-';
+# else
+				  negative = t->tm_isdst < 0;
+#  if HAVE_TZNAME
+				  if (tzname[t->tm_isdst != 0][0] == '-')
+				    negative = true;
+#  endif
+# endif
+				}
+				if (negative) {
+					sign = "-";
+					diff = -diff;
+				} else	sign = "+";
+				pt = _add(sign, pt, ptlim);
+				diff /= SECSPERMIN;
+				diff = (diff / MINSPERHOUR) * 100 +
+					(diff % MINSPERHOUR);
+				pt = _conv(diff, "%04d", pt, ptlim);
+				}
+#endif
+				continue;
+			case '+':
+				pt = _fmt(Locale->date_fmt, t, pt, ptlim,
+					warnp);
+				continue;
+			case '%':
+			/*
+			** X311J/88-090 (4.12.3.5): if conversion char is
+			** undefined, behavior is undefined. Print out the
+			** character itself as printf(3) also does.
+			*/
+			default:
+				break;
+			}
+		}
+		if (pt == ptlim)
+			break;
+		*pt++ = *format;
+	}
+	return pt;
+}
+
+static char *
+_conv(int n, const char *format, char *pt, const char *ptlim)
+{
+	char	buf[INT_STRLEN_MAXIMUM(int) + 1];
+
+	sprintf(buf, format, n);
+	return _add(buf, pt, ptlim);
+}
+
+static char *
+_add(const char *str, char *pt, const char *ptlim)
+{
+	while (pt < ptlim && (*pt = *str++) != '\0')
+		++pt;
+	return pt;
+}
+
+/*
+** POSIX and the C Standard are unclear or inconsistent about
+** what %C and %y do if the year is negative or exceeds 9999.
+** Use the convention that %C concatenated with %y yields the
+** same output as %Y, and that %Y contains at least 4 bytes,
+** with more only if necessary.
+*/
+
+static char *
+_yconv(int a, int b, bool convert_top, bool convert_yy,
+       char *pt, const char *ptlim)
+{
+	register int	lead;
+	register int	trail;
+
+	int DIVISOR = 100;
+	trail = a % DIVISOR + b % DIVISOR;
+	lead = a / DIVISOR + b / DIVISOR + trail / DIVISOR;
+	trail %= DIVISOR;
+	if (trail < 0 && lead > 0) {
+		trail += DIVISOR;
+		--lead;
+	} else if (lead < 0 && trail > 0) {
+		trail -= DIVISOR;
+		++lead;
+	}
+	if (convert_top) {
+		if (lead == 0 && trail < 0)
+			pt = _add("-0", pt, ptlim);
+		else	pt = _conv(lead, "%02d", pt, ptlim);
+	}
+	if (convert_yy)
+		pt = _conv(((trail < 0) ? -trail : trail), "%02d", pt, ptlim);
+	return pt;
+}
diff --git a/lib-tzcode/test/test-tzcode.c b/lib-tzcode/test/test-tzcode.c
new file mode 100644
index 0000000..6dc950c
--- /dev/null
+++ b/lib-tzcode/test/test-tzcode.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include 
+
+#include "tzcode.h"
+
+int
+main(void)
+{
+    struct tm  my_tm, local_tm;
+    timezone_t my_tz;
+    time_t     my_time, local_time;
+    char       buf1[TZ_ASCTIME_BUF_SIZE], buf2[TZ_ASCTIME_BUF_SIZE];
+
+    plan_tests(9);
+    tzset();          // Set the timezone using the TZ variable passed by make, which must match the one allocated below
+
+    diag("Test the tz_tzalloc, tz_tzfree, tz_localtime_rz and tz_asctime_r");
+    {
+        isnt(my_time = time(NULL), (time_t)-1,               "Got the time");
+        is(ctime_r(&my_time, buf1), buf1,                    "Converted it to a string using libc");
+        ok(my_tz = tz_tzalloc(":America/Vancouver"),         "Got the timezone definition using lib-tzcode");
+        is(tz_localtime_rz(my_tz, &my_time, &my_tm), &my_tm, "Got the broken down time using the timezone definition");
+        is(tz_asctime_r(&my_tm, buf2), buf2,                 "Converted it to a string using lib-tzcode");
+        is_eq(buf1, buf2,                                    "String representations match");
+
+        my_tm.tm_isdst  = 0;                                   // No daylight savings time for UTC
+        isnt(local_time = tz_mktime_z(NULL, &my_tm), (time_t)-1, "Converted local time using the UTC timezone definition");
+        is(gmtime_r(&local_time, &local_tm), &local_tm,          "Converted back to broken down time using libc");
+        is(my_tm.tm_hour, local_tm.tm_hour,                      "The hours are the same");
+
+        tz_tzfree(my_tz);
+    }
+
+    return exit_status();
+}
diff --git a/lib-tzcode/theory.html b/lib-tzcode/theory.html
new file mode 100644
index 0000000..369c754
--- /dev/null
+++ b/lib-tzcode/theory.html
@@ -0,0 +1,1501 @@
+
+
+
+  Theory and pragmatics of the tz code and data
+  
+  
+
+
+
+

Theory and pragmatics of the tz code and data

+

Outline

+ + +
+

Scope of the tz database

+

+The tz +database attempts to record the history and predicted future of +civil time scales. +It organizes time zone and daylight saving time +data by partitioning the world into timezones +whose clocks all agree about timestamps that occur after the POSIX Epoch +(1970-01-01 00:00:00 UTC). +Although 1970 is a somewhat-arbitrary cutoff, there are significant +challenges to moving the cutoff earlier even by a decade or two, due +to the wide variety of local practices before computer timekeeping +became prevalent. +Most timezones correspond to a notable location and the database +records all known clock transitions for that location; +some timezones correspond instead to a fixed UTC offset. +

+ +

+Each timezone typically corresponds to a geographical region that is +smaller than a traditional time zone, because clocks in a timezone +all agree after 1970 whereas a traditional time zone merely +specifies current standard time. For example, applications that deal +with current and future timestamps in the traditional North +American mountain time zone can choose from the timezones +America/Denver which observes US-style daylight saving +time (DST), +and America/Phoenix which does not observe DST. +Applications that also deal with past timestamps in the mountain time +zone can choose from over a dozen timezones, such as +America/Boise, America/Edmonton, and +America/Hermosillo, each of which currently uses mountain +time but differs from other timezones for some timestamps after 1970. +

+ +

+Clock transitions before 1970 are recorded for location-based timezones, +because most systems support timestamps before 1970 and could +misbehave if data entries were omitted for pre-1970 transitions. +However, the database is not designed for and does not suffice for +applications requiring accurate handling of all past times everywhere, +as it would take far too much effort and guesswork to record all +details of pre-1970 civil timekeeping. +Although some information outside the scope of the database is +collected in a file backzone that is distributed along +with the database proper, this file is less reliable and does not +necessarily follow database guidelines. +

+ +

+As described below, reference source code for using the +tz database is also available. +The tz code is upwards compatible with POSIX, an international +standard for UNIX-like systems. +As of this writing, the current edition of POSIX is: The Open +Group Base Specifications Issue 7, IEEE Std 1003.1-2017, 2018 +Edition. +Because the database's scope encompasses real-world changes to civil +timekeeping, its model for describing time is more complex than the +standard and daylight saving times supported by POSIX. +A tz timezone corresponds to a ruleset that can +have more than two changes per year, these changes need not merely +flip back and forth between two alternatives, and the rules themselves +can change at times. +Whether and when a timezone changes its clock, +and even the timezone's notional base offset from UTC, +are variable. +It does not always make sense to talk about a timezone's +"base offset", which is not necessarily a single number. +

+ +
+ +
+

Timezone identifiers

+

+Each timezone has a name that uniquely identifies the timezone. +Inexperienced users are not expected to select these names unaided. +Distributors should provide documentation and/or a simple selection +interface that explains each name via a map or via descriptive text like +"Czech Republic" instead of the timezone name "Europe/Prague". +If geolocation information is available, a selection interface can +locate the user on a timezone map or prioritize names that are +geographically close. For an example selection interface, see the +tzselect program in the tz code. +The Unicode Common Locale Data +Repository contains data that may be useful for other selection +interfaces; it maps timezone names like Europe/Prague to +locale-dependent strings like "Prague", "Praha", "Прага", and "布拉格". +

+ +

+The naming conventions attempt to strike a balance +among the following goals: +

+ +
    +
  • + Uniquely identify every timezone where clocks have agreed since 1970. + This is essential for the intended use: static clocks keeping local + civil time. +
  • +
  • + Indicate to experts where the timezone's clocks typically are. +
  • +
  • + Be robust in the presence of political changes. + For example, names are typically not tied to countries, to avoid + incompatibilities when countries change their name (e.g., + Swaziland→Eswatini) or when locations change countries (e.g., Hong + Kong from UK colony to China). + There is no requirement that every country or national + capital must have a timezone name. +
  • +
  • + Be portable to a wide variety of implementations. +
  • +
  • + Use a consistent naming conventions over the entire world. +
  • +
+ +

+Names normally have the form +AREA/LOCATION, where +AREA is a continent or ocean, and +LOCATION is a specific location within the area. +North and South America share the same area, 'America'. +Typical names are 'Africa/Cairo', +'America/New_York', and 'Pacific/Honolulu'. +Some names are further qualified to help avoid confusion; for example, +'America/Indiana/Petersburg' distinguishes Petersburg, +Indiana from other Petersburgs in America. +

+ +

+Here are the general guidelines used for +choosing timezone names, +in decreasing order of importance: +

+ +
    +
  • + Use only valid POSIX file name components (i.e., the parts of + names other than '/'). + Do not use the file name components '.' and + '..'. + Within a file name component, use only ASCII letters, + '.', '-' and '_'. + Do not use digits, as that might create an ambiguity with POSIX + TZ strings. + A file name component must not exceed 14 characters or start with + '-'. + E.g., prefer America/Noronha to + America/Fernando_de_Noronha. + Exceptions: see the discussion of legacy names below. +
  • +
  • + A name must not be empty, or contain '//', or + start or end with '/'. +
  • +
  • + Do not use names that differ only in case. + Although the reference implementation is case-sensitive, some + other implementations are not, and they would mishandle names + differing only in case. +
  • +
  • + If one name A is an initial prefix of another + name AB (ignoring case), then B must not + start with '/', as a regular file cannot have the + same name as a directory in POSIX. + For example, America/New_York precludes + America/New_York/Bronx. +
  • +
  • + Uninhabited regions like the North Pole and Bouvet Island + do not need locations, since local time is not defined there. +
  • +
  • + If all the clocks in a timezone have agreed since 1970, + do not bother to include more than one timezone + even if some of the clocks disagreed before 1970. + Otherwise these tables would become annoyingly large. +
  • +
  • + If boundaries between regions are fluid, such as during a war or + insurrection, do not bother to create a new timezone merely + because of yet another boundary change. This helps prevent table + bloat and simplifies maintenance. +
  • +
  • + If a name is ambiguous, use a less ambiguous alternative; + e.g., many cities are named San José and Georgetown, so + prefer America/Costa_Rica to + America/San_Jose and America/Guyana + to America/Georgetown. +
  • +
  • + Keep locations compact. + Use cities or small islands, not countries or regions, so that any + future changes do not split individual locations into different + timezones. + E.g., prefer Europe/Paris to Europe/France, + since + France + has had multiple time zones. +
  • +
  • + Use mainstream English spelling, e.g., prefer + Europe/Rome to Europa/Roma, and + prefer Europe/Athens to the Greek + Ευρώπη/Αθήνα or the Romanized + Evrópi/Athína. + The POSIX file name restrictions encourage this guideline. +
  • +
  • + Use the most populous among locations in a region, + e.g., prefer Asia/Shanghai to + Asia/Beijing. + Among locations with similar populations, pick the best-known + location, e.g., prefer Europe/Rome to + Europe/Milan. +
  • +
  • + Use the singular form, e.g., prefer Atlantic/Canary to + Atlantic/Canaries. +
  • +
  • + Omit common suffixes like '_Islands' and + '_City', unless that would lead to ambiguity. + E.g., prefer America/Cayman to + America/Cayman_Islands and + America/Guatemala to + America/Guatemala_City, but prefer + America/Mexico_City to + America/Mexico + because the + country of Mexico has several time zones. +
  • +
  • + Use '_' to represent a space. +
  • +
  • + Omit '.' from abbreviations in names. + E.g., prefer Atlantic/St_Helena to + Atlantic/St._Helena. +
  • +
  • + Do not change established names if they only marginally violate + the above guidelines. + For example, do not change the existing name Europe/Rome to + Europe/Milan merely because Milan's population has grown + to be somewhat greater than Rome's. +
  • +
  • + If a name is changed, put its old spelling in the + 'backward' file as a link to the new spelling. + This means old spellings will continue to work. + Ordinarily a name change should occur only in the rare case when + a location's consensus English-language spelling changes; for example, + in 2008 Asia/Calcutta was renamed to Asia/Kolkata + due to long-time widespread use of the new city name instead of the old. +
  • +
+ +

+Guidelines have evolved with time, and names following old versions of +these guidelines might not follow the current version. When guidelines +have changed, old names continue to be supported. Guideline changes +have included the following: +

+ +
    +
  • +Older versions of this package used a different naming scheme. +See the file 'backward' for most of these older names +(e.g., 'US/Eastern' instead of 'America/New_York'). +The other old-fashioned names still supported are +'WET', 'CET', 'MET', and +'EET' (see the file 'europe'). +
  • + +
  • +Older versions of this package defined legacy names that are +incompatible with the first guideline of location names, but which are +still supported. +These legacy names are mostly defined in the file +'etcetera'. +Also, the file 'backward' defines the legacy names +'Etc/GMT0', 'Etc/GMT-0', 'Etc/GMT+0', +'GMT0', 'GMT-0' and 'GMT+0', +and the file 'northamerica' defines the legacy names +'EST5EDT', 'CST6CDT', +'MST7MDT', and 'PST8PDT'. +
  • + +
  • +Older versions of these guidelines said that +there should typically be at least one name for each ISO +3166-1 officially assigned two-letter code for an inhabited +country or territory. +This old guideline has been dropped, as it was not needed to handle +timestamps correctly and it increased maintenance burden. +
  • +
+ +

+The file zone1970.tab lists geographical locations used +to name timezones. +It is intended to be an exhaustive list of names for geographic +regions as described above; this is a subset of the timezones in the data. +Although a zone1970.tab location's +longitude +corresponds to +its local mean +time (LMT) offset with one hour for every 15° +east longitude, this relationship is not exact. +The backward-compatibility file zone.tab is similar +but conforms to the older-version guidelines related to ISO 3166-1; +it lists only one country code per entry and unlike zone1970.tab +it can list names defined in backward. +

+ +

+The database defines each timezone name to be a zone, or a link to a zone. +The source file backward defines links for backward +compatibility; it does not define zones. +Although backward was originally designed to be optional, +nowadays distributions typically use it +and no great weight should be attached to whether a link +is defined in backward or in some other file. +The source file etcetera defines names that may be useful +on platforms that do not support POSIX-style TZ strings; +no other source file other than backward +contains links to its zones. +One of etcetera's names is Etc/UTC, +used by functions like gmtime to obtain leap +second information on platforms that support leap seconds. +Another etcetera name, GMT, +is used by older code releases. +

+
+ +
+

Time zone abbreviations

+

+When this package is installed, it generates time zone abbreviations +like 'EST' to be compatible with human tradition and POSIX. +Here are the general guidelines used for choosing time zone abbreviations, +in decreasing order of importance: +

+ +
    +
  • + Use three to six characters that are ASCII alphanumerics or + '+' or '-'. + Previous editions of this database also used characters like + space and '?', but these characters have a + special meaning to the + UNIX shell + and cause commands like + 'set + `date`' + to have unexpected effects. + Previous editions of this guideline required upper-case letters, but the + Congressman who introduced + Chamorro + Standard Time preferred "ChST", so lower-case letters are now + allowed. + Also, POSIX from 2001 on relaxed the rule to allow '-', + '+', and alphanumeric characters from the portable + character set in the current locale. + In practice ASCII alphanumerics and '+' and + '-' are safe in all locales. + +

    + In other words, in the C locale the POSIX extended regular + expression [-+[:alnum:]]{3,6} should match the + abbreviation. + This guarantees that all abbreviations could have been specified by a + POSIX TZ string. +

    +
  • +
  • + Use abbreviations that are in common use among English-speakers, + e.g., 'EST' for Eastern Standard Time in North America. + We assume that applications translate them to other languages + as part of the normal localization process; for example, + a French application might translate 'EST' to 'HNE'. + +

    + These abbreviations (for standard/daylight/etc. time) are: + ACST/ACDT Australian Central, + AST/ADT/APT/AWT/ADDT Atlantic, + AEST/AEDT Australian Eastern, + AHST/AHDT Alaska-Hawaii, + AKST/AKDT Alaska, + AWST/AWDT Australian Western, + BST/BDT Bering, + CAT/CAST Central Africa, + CET/CEST/CEMT Central European, + ChST Chamorro, + CST/CDT/CWT/CPT Central [North America], + CST/CDT China, + GMT/BST/IST/BDST Greenwich, + EAT East Africa, + EST/EDT/EWT/EPT Eastern [North America], + EET/EEST Eastern European, + GST/GDT Guam, + HST/HDT/HWT/HPT Hawaii, + HKT/HKST/HKWT Hong Kong, + IST India, + IST/GMT Irish, + IST/IDT/IDDT Israel, + JST/JDT Japan, + KST/KDT Korea, + MET/MEST Middle European (a backward-compatibility alias for + Central European), + MSK/MSD Moscow, + MST/MDT/MWT/MPT Mountain, + NST/NDT/NWT/NPT/NDDT Newfoundland, + NST/NDT/NWT/NPT Nome, + NZMT/NZST New Zealand through 1945, + NZST/NZDT New Zealand 1946–present, + PKT/PKST Pakistan, + PST/PDT/PWT/PPT Pacific, + PST/PDT Philippine, + SAST South Africa, + SST Samoa, + UTC Universal, + WAT/WAST West Africa, + WET/WEST/WEMT Western European, + WIB Waktu Indonesia Barat, + WIT Waktu Indonesia Timur, + WITA Waktu Indonesia Tengah, + YST/YDT/YWT/YPT/YDDT Yukon. +

    +
  • +
  • +

    + For times taken from a city's longitude, use the + traditional xMT notation. + The only abbreviation like this in current use is 'GMT'. + The others are for timestamps before 1960, + except that Monrovia Mean Time persisted until 1972. + Typically, numeric abbreviations (e.g., '-004430' for + MMT) would cause trouble here, as the numeric strings would exceed + the POSIX length limit. +

    + +

    + These abbreviations are: + AMT Asunción, Athens; + BMT Baghdad, Bangkok, Batavia, Bermuda, Bern, Bogotá, + Brussels, Bucharest; + CMT Calamarca, Caracas, Chisinau, Colón, Córdoba; + DMT Dublin/Dunsink; + EMT Easter; + FFMT Fort-de-France; + FMT Funchal; + GMT Greenwich; + HMT Havana, Helsinki, Horta, Howrah; + IMT Irkutsk, Istanbul; + JMT Jerusalem; + KMT Kaunas, Kyiv, Kingston; + LMT Lima, Lisbon, local; + MMT Macassar, Madras, Malé, Managua, Minsk, Monrovia, Montevideo, + Moratuwa, Moscow; + PLMT Phù Liễn; + PMT Paramaribo, Paris, Perm, Pontianak, Prague; + PMMT Port Moresby; + PPMT Port-au-Prince; + QMT Quito; + RMT Rangoon, Riga, Rome; + SDMT Santo Domingo; + SJMT San José; + SMT Santiago, Simferopol, Singapore, Stanley; + TBMT Tbilisi; + TMT Tallinn, Tehran; + WMT Warsaw. +

    + +

    + A few abbreviations also follow the pattern that + GMT/BST established for time in the UK. + They are: + BMT/BST for Bermuda 1890–1930, + CMT/BST for Calamarca Mean Time and Bolivian Summer Time + 1890–1932, + DMT/IST for Dublin/Dunsink Mean Time and Irish Summer Time + 1880–1916, + MMT/MST/MDST for Moscow 1880–1919, and + RMT/LST for Riga Mean Time and Latvian Summer time 1880–1926. + +

    +
  • +
  • + Use 'LMT' for local mean time of locations before the + introduction of standard time; see "Scope of the + tz database". +
  • +
  • + If there is no common English abbreviation, use numeric offsets like + -05 and +0530 that are generated + by zic's %z notation. +
  • +
  • + Use current abbreviations for older timestamps to avoid confusion. + For example, in 1910 a common English abbreviation for time + in central Europe was 'MEZ' (short for both "Middle European + Zone" and for "Mitteleuropäische Zeit" in German). + Nowadays 'CET' ("Central European Time") is more common in + English, and the database uses 'CET' even for circa-1910 + timestamps as this is less confusing for modern users and avoids + the need for determining when 'CET' supplanted 'MEZ' in common + usage. +
  • +
  • + Use a consistent style in a timezone's history. + For example, if a history tends to use numeric + abbreviations and a particular entry could go either way, use a + numeric abbreviation. +
  • +
  • + Use + Universal Time + (UT) (with time zone abbreviation '-00') for + locations while uninhabited. + The leading '-' is a flag that the UT offset is in + some sense undefined; this notation is derived + from Internet + RFC 3339. +
  • +
+ +

+Application writers should note that these abbreviations are ambiguous +in practice: e.g., 'CST' means one thing in China and something else +in North America, and 'IST' can refer to time in India, Ireland or +Israel. +To avoid ambiguity, use numeric UT offsets like +'-0600' instead of time zone abbreviations like 'CST'. +

+
+ +
+

Accuracy of the tz database

+

+The tz database is not authoritative, and it +surely has errors. +Corrections are welcome and encouraged; see the file CONTRIBUTING. +Users requiring authoritative data should consult national standards +bodies and the references cited in the database's comments. +

+ +

+Errors in the tz database arise from many sources: +

+ +
    +
  • + The tz database predicts future + timestamps, and current predictions + will be incorrect after future governments change the rules. + For example, if today someone schedules a meeting for 13:00 next + October 1, Casablanca time, and tomorrow Morocco changes its + daylight saving rules, software can mess up after the rule change + if it blithely relies on conversions made before the change. +
  • +
  • + The pre-1970 entries in this database cover only a tiny sliver of how + clocks actually behaved; the vast majority of the necessary + information was lost or never recorded. + Thousands more timezones would be needed if + the tz database's scope were extended to + cover even just the known or guessed history of standard time; for + example, the current single entry for France would need to split + into dozens of entries, perhaps hundreds. + And in most of the world even this approach would be misleading + due to widespread disagreement or indifference about what times + should be observed. + In her 2015 book + The + Global Transformation of Time, 1870–1950, + Vanessa Ogle writes + "Outside of Europe and North America there was no system of time + zones at all, often not even a stable landscape of mean times, + prior to the middle decades of the twentieth century". + See: Timothy Shenk, Booked: + A Global History of Time. Dissent 2015-12-17. +
  • +
  • + Most of the pre-1970 data entries come from unreliable sources, often + astrology books that lack citations and whose compilers evidently + invented entries when the true facts were unknown, without + reporting which entries were known and which were invented. + These books often contradict each other or give implausible entries, + and on the rare occasions when they are checked they are + typically found to be incorrect. +
  • +
  • + For the UK the tz database relies on + years of first-class work done by + Joseph Myers and others; see + "History of + legal time in Britain". + Other countries are not done nearly as well. +
  • +
  • + Sometimes, different people in the same city maintain clocks + that differ significantly. + Historically, railway time was used by railroad companies (which + did not always + agree with each other), church-clock time was used for birth + certificates, etc. + More recently, competing political groups might disagree about + clock settings. Often this is merely common practice, but + sometimes it is set by law. + For example, from 1891 to 1911 the UT offset in France + was legally UT +00:09:21 outside train stations and + UT +00:04:21 inside. Other examples include + Chillicothe in 1920, Palm Springs in 1946/7, and Jerusalem and + Ürümqi to this day. +
  • +
  • + Although a named location in the tz + database stands for the containing region, its pre-1970 data + entries are often accurate for only a small subset of that region. + For example, Europe/London stands for the United + Kingdom, but its pre-1847 times are valid only for locations that + have London's exact meridian, and its 1847 transition + to GMT is known to be valid only for the L&NW and + the Caledonian railways. +
  • +
  • + The tz database does not record the + earliest time for which a timezone's + data entries are thereafter valid for every location in the region. + For example, Europe/London is valid for all locations + in its region after GMT was made the standard time, + but the date of standardization (1880-08-02) is not in the + tz database, other than in commentary. + For many timezones the earliest time of + validity is unknown. +
  • +
  • + The tz database does not record a + region's boundaries, and in many cases the boundaries are not known. + For example, the timezone + America/Kentucky/Louisville represents a region + around the city of Louisville, the boundaries of which are + unclear. +
  • +
  • + Changes that are modeled as instantaneous transitions in the + tz + database were often spread out over hours, days, or even decades. +
  • +
  • + Even if the time is specified by law, locations sometimes + deliberately flout the law. +
  • +
  • + Early timekeeping practices, even assuming perfect clocks, were + often not specified to the accuracy that the + tz database requires. +
  • +
  • + The tz database cannot represent stopped clocks. + However, on 1911-03-11 at 00:00, some public-facing French clocks + were changed by stopping them for a few minutes to effect a transition. + The tz database models this via a + backward transition; the relevant French legislation does not + specify exactly how the transition was to occur. +
  • +
  • + Sometimes historical timekeeping was specified more precisely + than what the tz code can handle. + For example, from 1880 to 1916 clocks in Ireland observed Dublin Mean + Time (estimated to be UT + −00:25:21.1); although the tz + source data can represent the .1 second, TZif files and the code cannot. + In practice these old specifications were rarely if ever + implemented to subsecond precision. +
  • +
  • + Even when all the timestamp transitions recorded by the + tz database are correct, the + tz rules that generate them may not + faithfully reflect the historical rules. + For example, from 1922 until World War II the UK moved clocks + forward the day following the third Saturday in April unless that + was Easter, in which case it moved clocks forward the previous + Sunday. + Because the tz database has no + way to specify Easter, these exceptional years are entered as + separate tz Rule lines, even though the + legal rules did not change. + When transitions are known but the historical rules behind them are not, + the database contains Zone and Rule + entries that are intended to represent only the generated + transitions, not any underlying historical rules; however, this + intent is recorded at best only in commentary. +
  • +
  • + The tz database models time + using the proleptic + Gregorian calendar with days containing 24 equal-length hours + numbered 00 through 23, except when clock transitions occur. + Pre-standard time is modeled as local mean time. + However, historically many people used other calendars and other timescales. + For example, the Roman Empire used + the Julian + calendar, + and Roman + timekeeping had twelve varying-length daytime hours with a + non-hour-based system at night. + And even today, some local practices diverge from the Gregorian + calendar with 24-hour days. These divergences range from + relatively minor, such as Japanese bars giving times like "24:30" for the + wee hours of the morning, to more-significant differences such as the + east African practice of starting the day at dawn, renumbering + the Western 06:00 to be 12:00. These practices are largely outside + the scope of the tz code and data, which + provide only limited support for date and time localization + such as that required by POSIX. + If DST is not used a different time zone + can often do the trick; for example, in Kenya a TZ setting + like <-03>3 or America/Cayenne starts + the day six hours later than Africa/Nairobi does. +
  • +
  • + Early clocks were less reliable, and data entries do not represent + clock error. +
  • +
  • + The tz database assumes Universal Time + (UT) as an origin, even though UT is not + standardized for older timestamps. + In the tz database commentary, + UT denotes a family of time standards that includes + Coordinated Universal Time (UTC) along with other + variants such as UT1 and GMT, + with days starting at midnight. + Although UT equals UTC for modern + timestamps, UTC was not defined until 1960, so + commentary uses the more general abbreviation UT for + timestamps that might predate 1960. + Since UT, UT1, etc. disagree slightly, + and since pre-1972 UTC seconds varied in length, + interpretation of older timestamps can be problematic when + subsecond accuracy is needed. +
  • +
  • + Civil time was not based on atomic time before 1972, and we do not + know the history of + earth's + rotation accurately enough to map SI seconds to + historical solar time + to more than about one-hour accuracy. + See: Stephenson FR, Morrison LV, Hohenkerk CY. + Measurement of + the Earth's rotation: 720 BC to AD 2015. + Proc Royal Soc A. 2016;472:20160404. + Also see: Espenak F. Uncertainty + in Delta T (ΔT). +
  • +
  • + The relationship between POSIX time (that is, UTC but + ignoring leap + seconds) and UTC is not agreed upon. + This affects time stamps during the leap second era (1972–2035). + Although the POSIX + clock officially stops during an inserted leap second, at least one + proposed standard has it jumping back a second instead; and in + practice POSIX clocks more typically either progress glacially during + a leap second, or are slightly slowed while near a leap second. +
  • +
  • + The tz database does not represent how + uncertain its information is. + Ideally it would contain information about when data entries are + incomplete or dicey. + Partial temporal knowledge is a field of active research, though, + and it is not clear how to apply it here. +
  • +
+ +

+In short, many, perhaps most, of the tz +database's pre-1970 and future timestamps are either wrong or +misleading. +Any attempt to pass the +tz database off as the definition of time +should be unacceptable to anybody who cares about the facts. +In particular, the tz database's +LMT offsets should not be considered meaningful, and +should not prompt creation of timezones +merely because two locations +differ in LMT or transitioned to standard time at +different dates. +

+
+ +
+

Time and date functions

+

+The tz code contains time and date functions +that are upwards compatible with those of POSIX. +Code compatible with this package is already +part of many platforms, where the +primary use of this package is to update obsolete time-related files. +To do this, you may need to compile the time zone compiler +'zic' supplied with this package instead of using the +system 'zic', since the format of zic's +input is occasionally extended, and a platform may still be shipping +an older zic. +

+ +

POSIX properties and limitations

+
    +
  • +

    + In POSIX, time display in a process is controlled by the + environment variable TZ. + Unfortunately, the POSIX + TZ string takes a form that is hard to describe and + is error-prone in practice. + Also, POSIX TZ strings cannot deal with daylight + saving time rules not based on the Gregorian calendar (as in + Morocco), or with situations where more than two time zone + abbreviations or UT offsets are used in an area. +

    + +

    + The POSIX TZ string takes the following form: +

    + +

    + stdoffset[dst[offset][,date[/time],date[/time]]] +

    + +

    + where: +

    + +
    +
    std and dst
    + are 3 or more characters specifying the standard + and daylight saving time (DST) zone abbreviations. + Starting with POSIX.1-2001, std and dst + may also be in a quoted form like '<+09>'; + this allows "+" and "-" in the names. +
    +
    offset
    + is of the form + '[±]hh:[mm[:ss]]' + and specifies the offset west of UT. + 'hh' may be a single digit; + 0≤hh≤24. + The default DST offset is one hour ahead of + standard time. +
    +
    date[/time],date[/time]
    + specifies the beginning and end of DST. + If this is absent, the system supplies its own ruleset + for DST, typically current US + DST rules. +
    +
    time
    + takes the form + 'hh:[mm[:ss]]' + and defaults to 02:00. + This is the same format as the offset, except that a + leading '+' or '-' is not allowed. +
    +
    date
    + takes one of the following forms: +
    +
    Jn (1≤n≤365)
    + origin-1 day number not counting February 29 +
    +
    n (0≤n≤365)
    + origin-0 day number counting February 29 if present +
    +
    Mm.n.d + (0[Sunday]≤d≤6[Saturday], 1≤n≤5, + 1≤m≤12)
    + for the dth day of week n of + month m of the year, where week 1 is the first + week in which day d appears, and + '5' stands for the last week in which + day d appears (which may be either the 4th or + 5th week). + Typically, this is the only useful form; the n + and Jn forms are rarely used. +
    +
    +
    +
    + +

    + Here is an example POSIX TZ string for New + Zealand after 2007. + It says that standard time (NZST) is 12 hours ahead + of UT, and that daylight saving time + (NZDT) is observed from September's last Sunday at + 02:00 until April's first Sunday at 03:00: +

    + +
    TZ='NZST-12NZDT,M9.5.0,M4.1.0/3'
    + +

    + This POSIX TZ string is hard to remember, and + mishandles some timestamps before 2008. + With this package you can use this instead: +

    + +
    TZ='Pacific/Auckland'
    +
  • +
  • + POSIX does not define the DST transitions + for TZ values like + "EST5EDT". + Traditionally the current US DST rules + were used to interpret such values, but this meant that the + US DST rules were compiled into each + time conversion package, and when + US time conversion rules changed (as in the United + States in 1987 and again in 2007), all packages that + interpreted TZ values had to be updated + to ensure proper results. +
  • +
  • + The TZ environment variable is process-global, which + makes it hard to write efficient, thread-safe applications that + need access to multiple timezones. +
  • +
  • + In POSIX, there is no tamper-proof way for a process to learn the + system's best idea of local (wall clock) time. + This is important for applications that an administrator wants + used only at certain times – without regard to whether the + user has fiddled the + TZ environment variable. + While an administrator can "do everything in UT" to + get around the problem, doing so is inconvenient and precludes + handling daylight saving time shifts – as might be required to + limit phone calls to off-peak hours. +
  • +
  • + POSIX provides no convenient and efficient way to determine + the UT offset and time zone abbreviation of arbitrary + timestamps, particularly for timezones + that do not fit into the POSIX model. +
  • +
  • + POSIX requires that time_t clock counts exclude leap + seconds. +
  • +
  • + The tz code attempts to support all the + time_t implementations allowed by POSIX. + The time_t type represents a nonnegative count of seconds + since 1970-01-01 00:00:00 UTC, ignoring leap seconds. + In practice, time_t is usually a signed 64- or 32-bit + integer; 32-bit signed time_t values stop working after + 2038-01-19 03:14:07 UTC, so new implementations these + days typically use a signed 64-bit integer. + Unsigned 32-bit integers are used on one or two platforms, and 36-bit + and 40-bit integers are also used occasionally. + Although earlier POSIX versions allowed time_t to be a + floating-point type, this was not supported by any practical system, + and POSIX.1-2013 and the tz code both + require time_t to be an integer type. +
  • +
+ +

Extensions to POSIX in the +tz code

+
    +
  • +

    + The TZ environment variable is used in generating + the name of a file from which time-related information is read + (or is interpreted à la POSIX); TZ is no longer + constrained to be a string containing abbreviations + and numeric data as described above. + The file's format is TZif, + a timezone information format that contains binary data; see + Internet + RFC 8536. + The daylight saving time rules to be used for a + particular timezone are encoded in the + TZif file; the format of the file allows US, + Australian, and other rules to be encoded, and + allows for situations where more than two time zone + abbreviations are used. +

    +

    + It was recognized that allowing the TZ environment + variable to take on values such as 'America/New_York' + might cause "old" programs (that expect TZ to have a + certain form) to operate incorrectly; consideration was given to using + some other environment variable (for example, TIMEZONE) + to hold the string used to generate the TZif file's name. + In the end, however, it was decided to continue using + TZ: it is widely used for time zone purposes; + separately maintaining both TZ + and TIMEZONE seemed a nuisance; and systems where + "new" forms of TZ might cause problems can simply + use legacy TZ values such as "EST5EDT" which + can be used by "new" programs as well as by "old" programs that + assume pre-POSIX TZ values. +

    +
  • +
  • + The code supports platforms with a UT offset member + in struct tm, e.g., tm_gmtoff, + or with a time zone abbreviation member in + struct tm, e.g., tm_zone. As noted + in Austin + Group defect 1533, a future version of POSIX is planned to + require tm_gmtoff and tm_zone. +
  • +
  • + Functions tzalloc, tzfree, + localtime_rz, and mktime_z for + more-efficient thread-safe applications that need to use multiple + timezones. + The tzalloc and tzfree functions + allocate and free objects of type timezone_t, + and localtime_rz and mktime_z are + like localtime_r and mktime with an + extra timezone_t argument. + The functions were inspired by NetBSD. +
  • +
  • + Negative time_t values are supported, on systems + where time_t is signed. +
  • +
  • + These functions can account for leap seconds; + see Leap seconds below. +
  • +
+ +

POSIX features no longer needed

+

+POSIX and ISO C +define some APIs that are vestigial: +they are not needed, and are relics of a too-simple model that does +not suffice to handle many real-world timestamps. +Although the tz code supports these +vestigial APIs for backwards compatibility, they should +be avoided in portable applications. +The vestigial APIs are: +

+
    +
  • + The POSIX tzname variable does not suffice and is no + longer needed. + To get a timestamp's time zone abbreviation, consult + the tm_zone member if available; otherwise, + use strftime's "%Z" conversion + specification. +
  • +
  • + The POSIX daylight and timezone + variables do not suffice and are no longer needed. + To get a timestamp's UT offset, consult + the tm_gmtoff member if available; otherwise, + subtract values returned by localtime + and gmtime using the rules of the Gregorian calendar, + or use strftime's "%z" conversion + specification if a string like "+0900" suffices. +
  • +
  • + The tm_isdst member is almost never needed and most of + its uses should be discouraged in favor of the abovementioned + APIs. + Although it can still be used in arguments to + mktime to disambiguate timestamps near + a DST transition when the clock jumps back on + platforms lacking tm_gmtoff, this + disambiguation does not work when standard time itself jumps back, + which can occur when a location changes to a time zone with a + lesser UT offset. +
  • +
+ +

Other portability notes

+
    +
  • + The 7th Edition + UNIX timezone function is not present in this + package; it is impossible to reliably map timezone's + arguments (a "minutes west of GMT" value and a + "daylight saving time in effect" flag) to a time zone + abbreviation, and we refuse to guess. + Programs that in the past used the timezone function + may now examine localtime(&clock)->tm_zone + (if TM_ZONE is defined) or + tzname[localtime(&clock)->tm_isdst] + (if HAVE_TZNAME is nonzero) to learn the correct time + zone abbreviation to use. +
  • +
  • + The 4.2BSD + gettimeofday function is not + used in this package. + This formerly let users obtain the current UTC offset + and DST flag, but this functionality was removed in + later versions of BSD. +
  • +
  • + In SVR2, time conversion fails for near-minimum or + near-maximum time_t values when doing conversions + for places that do not use UT. + This package takes care to do these conversions correctly. + A comment in the source code tells how to get compatibly wrong + results. +
  • +
  • + The functions that are conditionally compiled + if STD_INSPIRED is nonzero should, at this point, be + looked on primarily as food for thought. + They are not in any sense "standard compatible" – some are + not, in fact, specified in any standard. + They do, however, represent responses of various authors to + standardization proposals. +
  • +
  • + Other time conversion proposals, in particular those supported by the + Time Zone + Database Parser, offer a wider selection of functions + that provide capabilities beyond those provided here. + The absence of such functions from this package is not meant to + discourage the development, standardization, or use of such + functions. + Rather, their absence reflects the decision to make this package + contain valid extensions to POSIX, to ensure its broad + acceptability. + If more powerful time conversion functions can be standardized, so + much the better. +
  • +
+
+ +
+

Interface stability

+

+The tz code and data supply the following interfaces: +

+ +
    +
  • + A set of timezone names as per + "Timezone identifiers" above. +
  • +
  • + Library functions described in "Time and date + functions" above. +
  • +
  • + The programs tzselect, zdump, + and zic, documented in their man pages. +
  • +
  • + The format of zic input files, documented in + the zic man page. +
  • +
  • + The format of zic output files, documented in + the tzfile man page. +
  • +
  • + The format of zone table files, documented in zone1970.tab. +
  • +
  • + The format of the country code file, documented in iso3166.tab. +
  • +
  • + The version number of the code and data, as the first line of + the text file 'version' in each release. +
  • +
+ +

+Interface changes in a release attempt to preserve compatibility with +recent releases. +For example, tz data files typically do not +rely on recently added zic features, so that users can +run older zic versions to process newer data files. +Downloading +the tz database describes how releases +are tagged and distributed. +

+ +

+Interfaces not listed above are less stable. +For example, users should not rely on particular UT +offsets or abbreviations for timestamps, as data entries are often +based on guesswork and these guesses may be corrected or improved. +

+ +

+Timezone boundaries are not part of the stable interface. +For example, even though the Asia/Bangkok timezone +currently includes Chang Mai, Hanoi, and Phnom Penh, this is not part +of the stable interface and the timezone can split at any time. +If a calendar application records a future event in some location other +than Bangkok by putting "Asia/Bangkok" in the event's record, +the application should be robust in the presence of timezone splits +between now and the future time. +

+
+ +
+

Leap seconds

+

+Leap seconds were introduced in 1972 to accommodate the +difference between atomic time and the less regular rotation of the earth. +Unfortunately they caused so many problems with civil +timekeeping that they +are planned +to be discontinued by 2035, with some as-yet-undetermined +mechanism replacing them, perhaps after the year 2135. +Despite their impending obsolescence, a record of leap seconds is still +needed to resolve timestamps from 1972 through 2035. +

+ +

+The tz code and data can account for leap seconds, +thanks to code contributed by Bradley White. +However, the leap second support of this package is rarely used directly +because POSIX requires leap seconds to be excluded and many +software packages would mishandle leap seconds if they were present. +Instead, leap seconds are more commonly handled by occasionally adjusting +the operating system kernel clock as described in +Precision timekeeping, +and this package by default installs a leapseconds file +commonly used by +NTP +software that adjusts the kernel clock. +However, kernel-clock twiddling approximates UTC only roughly, +and systems needing more precise UTC can use this package's leap +second support directly. +

+ +

+The directly supported mechanism assumes that time_t +counts of seconds since the POSIX epoch normally include leap seconds, +as opposed to POSIX time_t counts which exclude leap seconds. +This modified timescale is converted to UTC +at the same point that time zone and DST +adjustments are applied – +namely, at calls to localtime and analogous functions – +and the process is driven by leap second information +stored in alternate versions of the TZif files. +Because a leap second adjustment may be needed even +if no time zone correction is desired, +calls to gmtime-like functions +also need to consult a TZif file, +conventionally named Etc/UTC +(GMT in previous versions), +to see whether leap second corrections are needed. +To convert an application's time_t timestamps to or from +POSIX time_t timestamps (for use when, say, +embedding or interpreting timestamps in portable +tar +files), +the application can call the utility functions +time2posix and posix2time +included with this package. +

+ +

+If the POSIX-compatible TZif file set is installed +in a directory whose basename is zoneinfo, the +leap-second-aware file set is by default installed in a separate +directory zoneinfo-leaps. +Although each process can have its own time zone by setting +its TZ environment variable, there is no support for some +processes being leap-second aware while other processes are +POSIX-compatible; the leap-second choice is system-wide. +So if you configure your kernel to count leap seconds, you should also +discard zoneinfo and rename zoneinfo-leaps +to zoneinfo. +Alternatively, you can install just one set of TZif files +in the first place; see the REDO variable in this package's +makefile. +

+
+ +
+

Calendrical issues

+

+Calendrical issues are a bit out of scope for a time zone database, +but they indicate the sort of problems that we would run into if we +extended the time zone database further into the past. +An excellent resource in this area is Edward M. Reingold +and Nachum Dershowitz, Calendrical +Calculations: The Ultimate Edition, Cambridge University Press (2018). +Other information and sources are given in the file 'calendars' +in the tz distribution. +They sometimes disagree. +

+
+ +
+

Time and time zones off Earth

+

+The European Space Agency is considering +the establishment of a reference timescale for the Moon, which has +days roughly equivalent to 29.5 Earth days, and where relativistic +effects cause clocks to tick slightly faster than on Earth. +

+ +

+Some people's work schedules have used +Mars time. +Jet Propulsion Laboratory (JPL) coordinators kept Mars time on +and off during the +Mars +Pathfinder mission (1997). +Some of their family members also adapted to Mars time. +Dozens of special Mars watches were built for JPL workers who kept +Mars time during the +Mars +Exploration Rovers (MER) mission (2004–2018). +These timepieces looked like normal Seikos and Citizens but were adjusted +to use Mars seconds rather than terrestrial seconds, although +unfortunately the adjusted watches were unreliable and appear to have +had only limited use. +

+ +

+A Mars solar day is called a "sol" and has a mean period equal to +about 24 hours 39 minutes 35.244 seconds in terrestrial time. +It is divided into a conventional 24-hour clock, so each Mars second +equals about 1.02749125 terrestrial seconds. +(One MER worker noted, "If I am working Mars hours, and Mars hours are +2.5% more than Earth hours, shouldn't I get an extra 2.5% pay raise?") +

+ +

+The prime +meridian of Mars goes through the center of the crater +Airy-0, named in +honor of the British astronomer who built the Greenwich telescope that +defines Earth's prime meridian. +Mean solar time on the Mars prime meridian is +called Mars Coordinated Time (MTC). +

+ +

+Each landed mission on Mars has adopted a different reference for +solar timekeeping, so there is no real standard for Mars time zones. +For example, the MER mission defined two time zones "Local +Solar Time A" and "Local Solar Time B" for its two missions, each zone +designed so that its time equals local true solar time at +approximately the middle of the nominal mission. +The A and B zones differ enough so that an MER worker assigned to +the A zone might suffer "Mars lag" when switching to work in the B zone. +Such a "time zone" is not particularly suited for any application +other than the mission itself. +

+ +

+Many calendars have been proposed for Mars, but none have achieved +wide acceptance. +Astronomers often use Mars Sol Date (MSD) which is a +sequential count of Mars solar days elapsed since about 1873-12-29 +12:00 GMT. +

+ +

+In our solar system, Mars is the planet with time and calendar most +like Earth's. +On other planets, Sun-based time and calendars would work quite +differently. +For example, although Mercury's +sidereal +rotation period is 58.646 Earth days, Mercury revolves around the +Sun so rapidly that an observer on Mercury's equator would see a +sunrise only every 175.97 Earth days, i.e., a Mercury year is 0.5 of a +Mercury day. +Venus is more complicated, partly because its rotation is slightly +retrograde: +its year is 1.92 of its days. +Gas giants like Jupiter are trickier still, as their polar and +equatorial regions rotate at different rates, so that the length of a +day depends on latitude. +This effect is most pronounced on Neptune, where the day is about 12 +hours at the poles and 18 hours at the equator. +

+ +

+Although the tz database does not support +time on other planets, it is documented here in the hopes that support +will be added eventually. +

+ +

+Sources for time on other planets: +

+ + +
+ +
+
+ This file is in the public domain, so clarified as of 2009-05-17 by + Arthur David Olson. +
+ + diff --git a/lib-tzcode/time2posix.3 b/lib-tzcode/time2posix.3 new file mode 100644 index 0000000..f48402b --- /dev/null +++ b/lib-tzcode/time2posix.3 @@ -0,0 +1,133 @@ +.\" This file is in the public domain, so clarified as of +.\" 1996-06-05 by Arthur David Olson. +.TH time2posix 3 "" "Time Zone Database" +.SH NAME +time2posix, posix2time \- convert seconds since the Epoch +.SH SYNOPSIS +.nf +.ie \n(.g .ds - \f(CR-\fP +.el .ds - \- +.B #include +.PP +.B time_t time2posix(time_t t); +.PP +.B time_t posix2time(time_t t); +.PP +.B cc ... \*-ltz +.fi +.SH DESCRIPTION +.ie '\(en'' .ds en \- +.el .ds en \(en +.ie '\(lq'' .ds lq \&"\" +.el .ds lq \(lq\" +.ie '\(rq'' .ds rq \&"\" +.el .ds rq \(rq\" +.de q +\\$3\*(lq\\$1\*(rq\\$2 +.. +IEEE Standard 1003.1 +(POSIX) +requires the time_t value 536457599 to stand for 1986-12-31 23:59:59 UTC. +This effectively implies that POSIX time_t values cannot include leap +seconds and, +therefore, +that the system time must be adjusted as each leap occurs. +.PP +If the time package is configured with leap-second support +enabled, +however, +no such adjustment is needed and +time_t values continue to increase over leap events +(as a true +.q "seconds since...\&" +value). +This means that these values will differ from those required by POSIX +by the net number of leap seconds inserted since the Epoch. +.PP +Typically this is not a problem as the type time_t is intended +to be +(mostly) +opaque \*(en time_t values should only be obtained-from and +passed-to functions such as +.BR time(2) , +.BR localtime(3) , +.BR mktime(3) , +and +.BR difftime(3) . +However, +POSIX gives an arithmetic +expression for directly computing a time_t value from a given date/time, +and the same relationship is assumed by some +(usually older) +applications. +Any programs creating/dissecting time_t values +using such a relationship will typically not handle intervals +over leap seconds correctly. +.PP +The +.B time2posix +and +.B posix2time +functions are provided to address this time_t mismatch by converting +between local time_t values and their POSIX equivalents. +This is done by accounting for the number of time-base changes that +would have taken place on a POSIX system as leap seconds were inserted +or deleted. +These converted values can then be used in lieu of correcting the older +applications, +or when communicating with POSIX-compliant systems. +.PP +The +.B time2posix +function +is single-valued. +That is, +every local time_t +corresponds to a single POSIX time_t. +The +.B posix2time +function +is less well-behaved: +for a positive leap second hit the result is not unique, +and for a negative leap second hit the corresponding +POSIX time_t doesn't exist so an adjacent value is returned. +Both of these are good indicators of the inferiority of the +POSIX representation. +.PP +The following table summarizes the relationship between a time +T and its conversion to, +and back from, +the POSIX representation over the leap second inserted at the end of June, +1993. +.nf +.ta \w'93/06/30 'u +\w'23:59:59 'u +\w'A+0 'u +\w'X=time2posix(T) 'u +DATE TIME T X=time2posix(T) posix2time(X) +93/06/30 23:59:59 A+0 B+0 A+0 +93/06/30 23:59:60 A+1 B+1 A+1 or A+2 +93/07/01 00:00:00 A+2 B+1 A+1 or A+2 +93/07/01 00:00:01 A+3 B+2 A+3 + +A leap second deletion would look like... + +DATE TIME T X=time2posix(T) posix2time(X) +??/06/30 23:59:58 A+0 B+0 A+0 +??/07/01 00:00:00 A+1 B+2 A+1 +??/07/01 00:00:01 A+2 B+3 A+2 +.sp +.ce + [Note: posix2time(B+1) => A+0 or A+1] +.fi +.PP +If leap-second support is not enabled, +local time_t and +POSIX time_t values are equivalent, +and both +.B time2posix +and +.B posix2time +degenerate to the identity function. +.SH SEE ALSO +difftime(3), +localtime(3), +mktime(3), +time(2) diff --git a/lib-tzcode/time2posix.3.txt b/lib-tzcode/time2posix.3.txt new file mode 100644 index 0000000..87789b3 --- /dev/null +++ b/lib-tzcode/time2posix.3.txt @@ -0,0 +1,76 @@ +time2posix(3) Library Functions Manual time2posix(3) + +NAME + time2posix, posix2time - convert seconds since the Epoch + +SYNOPSIS + #include + + time_t time2posix(time_t t); + + time_t posix2time(time_t t); + + cc ... -ltz + +DESCRIPTION + IEEE Standard 1003.1 (POSIX) requires the time_t value 536457599 to + stand for 1986-12-31 23:59:59 UTC. This effectively implies that POSIX + time_t values cannot include leap seconds and, therefore, that the + system time must be adjusted as each leap occurs. + + If the time package is configured with leap-second support enabled, + however, no such adjustment is needed and time_t values continue to + increase over leap events (as a true "seconds since..." value). This + means that these values will differ from those required by POSIX by the + net number of leap seconds inserted since the Epoch. + + Typically this is not a problem as the type time_t is intended to be + (mostly) opaque - time_t values should only be obtained-from and + passed-to functions such as time(2), localtime(3), mktime(3), and + difftime(3). However, POSIX gives an arithmetic expression for + directly computing a time_t value from a given date/time, and the same + relationship is assumed by some (usually older) applications. Any + programs creating/dissecting time_t values using such a relationship + will typically not handle intervals over leap seconds correctly. + + The time2posix and posix2time functions are provided to address this + time_t mismatch by converting between local time_t values and their + POSIX equivalents. This is done by accounting for the number of time- + base changes that would have taken place on a POSIX system as leap + seconds were inserted or deleted. These converted values can then be + used in lieu of correcting the older applications, or when + communicating with POSIX-compliant systems. + + The time2posix function is single-valued. That is, every local time_t + corresponds to a single POSIX time_t. The posix2time function is less + well-behaved: for a positive leap second hit the result is not unique, + and for a negative leap second hit the corresponding POSIX time_t + doesn't exist so an adjacent value is returned. Both of these are good + indicators of the inferiority of the POSIX representation. + + The following table summarizes the relationship between a time T and + its conversion to, and back from, the POSIX representation over the + leap second inserted at the end of June, 1993. + DATE TIME T X=time2posix(T) posix2time(X) + 93/06/30 23:59:59 A+0 B+0 A+0 + 93/06/30 23:59:60 A+1 B+1 A+1 or A+2 + 93/07/01 00:00:00 A+2 B+1 A+1 or A+2 + 93/07/01 00:00:01 A+3 B+2 A+3 + + A leap second deletion would look like... + + DATE TIME T X=time2posix(T) posix2time(X) + ??/06/30 23:59:58 A+0 B+0 A+0 + ??/07/01 00:00:00 A+1 B+2 A+1 + ??/07/01 00:00:01 A+2 B+3 A+2 + + [Note: posix2time(B+1) => A+0 or A+1] + + If leap-second support is not enabled, local time_t and POSIX time_t + values are equivalent, and both time2posix and posix2time degenerate to + the identity function. + +SEE ALSO + difftime(3), localtime(3), mktime(3), time(2) + +Time Zone Database time2posix(3) diff --git a/lib-tzcode/tz-art.html b/lib-tzcode/tz-art.html new file mode 100644 index 0000000..c86c186 --- /dev/null +++ b/lib-tzcode/tz-art.html @@ -0,0 +1,649 @@ + + + + +Time and the Arts + + +

Time and the Arts

+

Documentaries

+ +

Movies

+
    +
  • +In the 1946 movie A Matter of Life and Death +(U.S. title Stairway to Heaven) +there is a reference to British Double Summer Time. +The time does not play a large part in the plot; +it's just a passing reference to the time when one of the +characters was supposed to have died (but didn't). +(IMDb entry.) +(Dave Cantor) +
  • +The 1953 railway comedy movie The Titfield Thunderbolt includes a +play on words on British Double Summer Time. Valentine's wife wants +him to leave the pub and asks him, "Do you know what time it is?" +And he, happy where he is, replies: "Yes, my love. Summer double time." +(IMDb entry.) +(Mark Brader, 2009-10-02) +
  • +
  • +The premise of the 1999 caper movie Entrapment involves computers +in an international banking network being shut down briefly at +midnight in each time zone to avoid any problems at the transition +from the year 1999 to 2000 in that zone. (Hmmmm.) If this shutdown +is extended by 10 seconds, it will create a one-time opportunity for +a gigantic computerized theft. To achieve this, at one location the +crooks interfere with the microwave system supplying time signals to +the computer, advancing the time by 0.1 second each minute over the +last hour of 1999. (So this movie teaches us that 0.1 × 60 = 10.) +(IMDb entry.) +(Mark Brader, 2009-10-02) +
  • +
  • +One mustn't forget the +trailer +(2014; 2:23) for the movie Daylight Saving. +
  • +
+

TV episodes

+
    +
  • +An episode of The Adventures of Superman entitled "The Mysterious +Cube," first aired 1958-02-24, had Superman convincing the controllers +of the Arlington Time Signal to broadcast ahead of actual time; +doing so got a crook trying to be declared dead to +emerge a bit too early from the titular enclosure. +(IMDb entry.) +
  • +
  • +"The Chimes +of Big Ben", The Prisoner, episode 2, ITC, 1967-10-06. +Our protagonist tumbles to +the fraudulent nature of a Poland-to-England escape upon hearing "Big +Ben" chiming on Polish local time. +(IMDb entry.) +
  • +
  • +"The Susie", Seinfeld, season 8, episode 15, NBC, 1997-02-13. +Kramer decides that daylight saving time +isn't coming fast enough, so he sets his watch ahead an hour. +
  • +
  • +"20 Hours in America", The West Wing, season 4, episodes 1–2, +2002-09-25, contained a scene that +saw White House staffers stranded in Indiana; they thought they had time to +catch Air Force One but were done in by intra-Indiana local time changes. +
  • +
  • +"In what time zone would you find New York City?" was a $200 question on +the 1999-11-13 United States airing of Who Wants to Be a Millionaire?, +and "In 1883, what industry led the movement to divide the U.S. into four time +zones?" was a $32,000 question on the 2001-05-23 United States airing of +the same show. At this rate, the million-dollar time-zone +question should have been asked 2002-06-04. +
  • +
  • +A private jet's mid-flight change of time zones distorts Alison Dubois' +premonition in the "We Had a Dream" episode of Medium +(originally aired 2007-02-28). +
  • +
  • +A criminal's failure to account for the start of daylight saving is pivotal +in "Mr. Monk +and the Rapper" (first aired 2007-07-20). +
  • +
  • +In the 30 Rock episode "Anna Howard Shaw Day" +(first broadcast 2010-02-11), +Jack Donaghy's date realizes that a Geneva-to-New-York business phone call +received in the evening must be fake given the difference in local times. +
  • +
  • +In the "Run by the Monkeys" episode of Da Vinci's Inquest +(first broadcast 2002-11-17), +a witness in a five-year-old fire case realizes they may not have set +their clock back when daylight saving ended on the day of the fire, +introducing the possibility of an hour when arson might have occurred. +
  • +
  • +In "The Todd Couple" episode of Outsourced (first aired 2011-02-10), +Manmeet sets up Valentine's Day teledates for 6:00 and 9:00pm; +since one is with a New Yorker and the other with a San Franciscan, +hilarity ensues. +(Never mind that this should be 7:30am in Mumbai, yet for some reason the show +proceeds as though it's also mid-evening there.) +
  • +
  • +In the "14 Days to Go"/"T Minus..." episode of +You, Me and the Apocalypse +(first aired 2015-11-11 in the UK, 2016-03-10 in the US), +the success of a mission to deal with a comet +hinges on whether or not Russia observes daylight saving time. +(In the US, +the episode first aired in the week before the switch to DST.) +
  • +
  • +"The Lost Hour", Eerie, Indiana, episode 10, NBC, 1991-12-01. +Despite Indiana's then-lack of DST, +Marshall changes his clock with unusual consequences. +See "Eerie, +Indiana was a few dimensions ahead of its time". +
  • +
  • +"Time Tunnel", The Adventures of Pete & Pete, season 2, episode 5, +Nickelodeon, 1994-10-23. +The two Petes travel back in time an hour +on the day that DST ends. +
  • +
  • +"King-Size Homer", The Simpsons, episode 135, Fox, 1995-11-05. +Homer, working from home, remarks "8:58, first +time I've ever been early for work. Except for all those daylight +savings days. Lousy farmers." +
  • +
  • +Last Week Tonight with John Oliver, season 2, episode 5, 2015-03-08, +asked, "Daylight Saving +Time – How Is This Still A Thing?" +
  • +
  • +"Tracks", The Good Wife, season 7, episode 12, +CBS, 2016-01-17. +The applicability of a contract hinges on the +time zone associated with a video timestamp. +
  • +
  • +"Justice", Veep, season 6, episode 4, HBO, 2017-05-07. +Jonah's inability to understand DST ends up impressing a wealthy +backer who sets him up for a 2020 presidential run. +
  • +
+

Books, plays, and magazines

+
    +
  • +Jules Verne, Around the World in Eighty Days +(Le tour du monde en quatre-vingts jours), 1873. +Wall-clock time plays a central role in the plot. +European readers of the 1870s clearly held the U.S. press in +deep contempt; the protagonists cross the U.S. without once +reading a paper. +Available versions include +an English +translation, and +the original French +"with illustrations from the original 1873 French-language edition". +
  • +
  • +Nick Enright, Daylight Saving, 1989. +A fast-paced comedy about love and loneliness as the clocks turn back. +
  • +
  • +Umberto Eco, +The +Island of the Day Before +(L'isola del giorno prima), 1994. +"...the story of a 17th century Italian nobleman trapped near an island +on the International Date Line. Time and time zones play an integral +part in the novel." (Paul Eggert, 2006-04-22) +
  • +
  • +John Dunning, Two +O'Clock, Eastern Wartime, 2001. +Mystery, history, daylight saving time, and old-time radio. +
  • +
  • +Surrealist artist Guy Billout's work "Date Line" appeared on page 103 +of the 1999-11 Atlantic Monthly. +
  • +
  • +"Gloom, Gloom, Go Away" by Walter Kirn appeared on page 106 of Time +magazine's 2002-11-11 issue; among other things, it proposed +year-round DST as a way of lessening wintertime despair. +
  • +
+

Music

+

+Data on recordings of "Save That Time," Russ Long, Serrob Publishing, BMI:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ArtistKarrin Allyson
CDI Didn't Know About You
Copyright Date1993
LabelConcord Jazz, Inc.
IDCCD-4543
Track Time3:44
PersonnelKarrin Allyson, vocal; +Russ Long, piano; +Gerald Spaits, bass; +Todd Strait, drums
NotesCD notes "additional lyric by Karrin Allyson; +arranged by Russ Long and Karrin Allyson"
ADO Rating1 star
AMG Rating4 stars
Penguin Rating3.5 stars
 
ArtistKevin Mahogany
CDDouble Rainbow
Copyright Date1993
LabelEnja Records
IDENJ-7097 2
Track Time6:27
PersonnelKevin Mahogany, vocal; +Kenny Barron, piano; +Ray Drummond, bass; +Ralph Moore, tenor saxophone; +Lewis Nash, drums
ADO Rating1.5 stars
AMG Rating3 stars
Penguin Rating3 stars
 
ArtistJoe Williams
CDHere's to Life
Copyright Date1994
LabelTelarc International Corporation
IDCD-83357
Track Time3:58
PersonnelJoe Williams, vocal +The Robert Farnon [39 piece] Orchestra
NotesThis CD is also available as part of a 3-CD package from +Telarc, "Triple Play" (CD-83461)
ADO Ratingblack dot
AMG Rating2 stars
Penguin Rating3 stars
 
ArtistCharles Fambrough
CDKeeper of the Spirit
Copyright Date1995
LabelAudioQuest Music
IDAQ-CD1033
Track Time7:07
PersonnelCharles Fambrough, bass; +Joel Levine, tenor recorder; +Edward Simon, piano; +Lenny White, drums; +Marion Simon, percussion
ADO Rating2 stars
AMG Ratingunrated
Penguin Rating3 stars
+
+

Also of note:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ArtistHolly Cole Trio
CDBlame It On My Youth
Copyright Date1992
LabelManhattan
IDCDP 7 97349 2
Total Time37:45
PersonnelHolly Cole, voice; +Aaron Davis, piano; +David Piltch, string bass
NotesLyrical reference to "Eastern Standard Time" in +Tom Waits' "Purple Avenue"
ADO Rating2.5 stars
AMG Rating3 stars
Penguin Ratingunrated
 
ArtistMilt Hinton
CDOld Man Time
Copyright Date1990
LabelChiaroscuro
IDCR(D) 310
Total Time149:38 (two CDs)
PersonnelMilt Hinton, bass; +Doc Cheatham, Dizzy Gillespie, Clark Terry, trumpet; +Al Grey, trombone; +Eddie Barefield, Joe Camel (Flip Phillips), Buddy Tate, +clarinet and saxophone; +John Bunch, Red Richards, Norman Simmons, Derek Smith, +Ralph Sutton, piano; +Danny Barker, Al Casey, guitar; +Gus Johnson, Gerryck King, Bob Rosengarden, Jackie Williams, +drums; +Lionel Hampton, vibraphone; +Cab Calloway, Joe Williams, vocal; +Buck Clayton, arrangements
Notestunes include Old Man Time, Time After Time, +Sometimes I'm Happy, +A Hot Time in the Old Town Tonight, +Four or Five Times, Now's the Time, +Time on My Hands, This Time It's Us, +and Good Time Charlie. +Album info +is available.
ADO Rating3 stars
AMG Rating4.5 stars
Penguin Rating3 stars
 
ArtistAlan Broadbent
CDPacific Standard Time
Copyright Date1995
LabelConcord Jazz, Inc.
IDCCD-4664
Total Time62:42
PersonnelAlan Broadbent, piano; +Putter Smith, Bass; +Frank Gibson, Jr., drums
NotesThe CD cover features an analemma for equation-of-time fans
ADO Rating1 star
AMG Rating4 stars
Penguin Rating3.5 stars
 
ArtistAnthony Braxton/Richard Teitelbaum
CDSilence/Time Zones
Copyright Date1996
LabelBlack Lion
IDBLCD 760221
Total Time72:58
PersonnelAnthony Braxton, sopranino and alto saxophones, +contrebasse clarinet, miscellaneous instruments; +Leo Smith, trumpet and miscellaneous instruments; +Leroy Jenkins, violin and miscellaneous instruments; +Richard Teitelbaum, modular moog and micromoog synthesizer
ADO Ratingblack dot
AMG Rating4 stars
 
ArtistCharles Gayle
CDTime Zones
Copyright Date2006
LabelTompkins Square
IDTSQ2839
Total Time49:06
PersonnelCharles Gayle, piano
ADO Rating1 star
AMG Rating4.5 stars
 
ArtistThe Get Up Kids
CDEudora
Copyright Date2001
LabelVagrant
ID357
Total Time65:12
NotesIncludes the song "Central Standard Time." Thanks to Colin Bowern for this information.
AMG Rating2.5 stars
 
ArtistColdplay
SongClocks
Copyright Date2003
LabelCapitol Records
ID52608
Total Time4:13
NotesWon the 2004 Record of the Year honor at the +Grammy Awards. Co-written and performed by Chris Martin, +great-great-grandson of DST inventor William Willett. +The song's first line is "Lights go out and I can't be saved".
 
ArtistJaime Guevara
SongQué +hora es
Date1993
Total Time3:04
NotesThe song protested "Sixto Hour" in Ecuador +(1992–3). Its lyrics include "Amanecía en mitad de la noche, los +guaguas iban a clase sin sol" ("It was dawning in the middle of the +night, the buses went to class without sun"). +
 
ArtistIrving Kahal and Harry Richman
SongThere Ought to be a Moonlight Saving Time
Copyright Date1931
NotesThis musical standard was a No. 1 hit for Guy Lombardo +in 1931, and was also performed by Maurice Chevalier, Blossom Dearie +and many others. The phrase "Moonlight saving time" also appears in +the 1995 country song "Not Enough Hours in the Night" written by Aaron +Barker, Kim Williams and Rob Harbin and performed by Doug +Supernaw.
 
ArtistThe Microscopic Septet
CDLobster Leaps In
Copyright Date2008
LabelCuneiform
ID272
Total Time73:05
NotesIncludes the song "Twilight Time Zone."
AMG Rating3.5 stars
ADO Rating2 stars
 
ArtistBob Dylan
CDThe Times They Are a-Changin'
Copyright Date1964
LabelColumbia
IDCK-8905
Total Time45:36
AMG Rating4.5 stars
ADO Rating1.5 stars
NotesThe title song is also available on "Bob Dylan's Greatest Hits" and "The Essential Bob Dylan."
 
ArtistLuciana Souza
CDTide
Copyright Date2009
LabelUniversal Jazz France
IDB0012688-02
Total Time42:31
AMG Rating3.5 stars
ADO Rating2.5 stars
NotesIncludes the song "Fire and Wood" with the lyric +"The clocks were turned back you remember/Think it's still November." +
 
ArtistKen Nordine
CDYou're Getting Better: The Word Jazz Dot Masters
Copyright Date2005
LabelGeffen
IDB0005171-02
Total Time156:22
ADO Rating1 star
AMG Rating4.5 stars
NotesIncludes the piece "What Time Is It" +("He knew what time it was everywhere...that counted").
 
ArtistChicago
CDChicago Transit Authority
Copyright Date1969
LabelColumbia
ID64409
Total Time1:16:20
AMG Rating4 stars
NotesIncludes the song "Does Anybody Really Know What Time It Is?"
 
ArtistEmanuele Arciuli
ComposerWilliam Duckworth
CDThe Time Curve Preludes
Copyright Date2023
LabelNeuma
Total Time44:46
NotesThe first work of postminimal music. Unlike minimalism, it does not assume that the listener has plenty of time.
+

Comics

+ +

Jokes

+
    +
  • +The idea behind daylight saving time was first proposed as a joke by +Benjamin Franklin. To enforce it, he suggested, "Every +morning, as soon as the sun rises, let all the bells in every church +be set ringing; and if that is not sufficient, let cannon be fired in +every street, to wake the sluggards effectually, and make them open +their eyes to see their true interest. All the difficulty will be in +the first two or three days: after which the reformation will be as +natural and easy as the present irregularity; for, ce n'est que le +premier pas qui coûte." +Franklin's +joke was first published on 1784-04-26 by the +Journal de Paris as an +anonymous letter translated into French. +
  • +
  • +"We've been using the five-cent nickel in this country since 1492. +Now that's pretty near 100 years, daylight saving." +(Groucho Marx as Captain Spaulding in Animal Crackers, 1930, +as noted by Will Fitzgerald) +
  • +
  • +BRADY. ...[Bishop Usher] determined that the Lord began the Creation +on the 23rd of October in the Year 4,004 B.C. at – uh, 9 A.M.! +
    +DRUMMOND. That Eastern Standard Time? (Laughter.) Or Rocky Mountain +Time? (More laughter.) It wasn't daylight-saving time, was it? Because +the Lord didn't make the sun until the fourth day! +
    +(From the play Inherit the Wind by Jerome Lawrence and Robert E. Lee, +filmed in 1960 with Spencer Tracy as Drummond and Fredric March as +Brady, and several other times. Thanks to Mark Brader.) +
  • +
  • +"Good news." +"What did they do? Extend Daylight Saving Time year round?" +(Professional tanner George Hamilton, in dialog from a +May, 1999 episode of the syndicated television series Baywatch) +
  • +
  • +"A fundamental belief held by Americans is that if you are on land, you +cannot be killed by a fish...So most Americans remain on land, believing +they're safe. Unfortunately, this belief – like so many myths, such as that +there's a reason for 'Daylight Saving Time' – is false." +(Dave Barry column, 2000-07-02) +
  • +
  • +"I once had sex for an hour and five minutes, but that was on the day +when you turn the clocks ahead." +(Garry Shandling, 52nd Annual Emmys, 2000-09-10) +
  • +
  • +"Would it impress you if I told you I invented Daylight Savings Time?" +("Sahjhan" to "Lilah" in dialog from the "Loyalty" episode of Angel, +originally aired 2002-02-25) +
  • +
  • +"I thought you said Tulsa was a three-hour flight." +"Well, you're forgetting about the time difference." +("Joey" and "Chandler" in dialog from the episode of Friends +entitled "The One With Rachel's Phone Number," originally aired 2002-12-05) +
  • +
  • +"Is that a pertinent fact, +or are you just trying to dazzle me with your command of time zones?" +(Kelsey Grammer as "Frasier Crane" to "Roz" from the episode of Frasier +entitled "The Kid," originally aired 1997-11-04) +
  • +
  • +"I put myself and my staff through this crazy, huge ordeal, all because +I refused to go on at midnight, okay? And so I work, you know, and +then I get this job at eleven, supposed to be a big deal. Then +yesterday daylight [saving] time ended. Right now it's basically +midnight." (Conan O'Brien on the 2010-11-08 premiere of Conan.) +
  • +
  • +"The best method, I told folks, was to hang a large clock high on a +barn wall where all the cows could see it. If you have Holsteins, you +will need to use an analog clock." (Jerry Nelson, How +to adjust dairy cows to daylight saving time", Successful Farming, +2017-10-09.) +
  • +
  • +"And now, driving to California, I find that I must enter a password +in order to change the time zone on my laptop clock. Evidently, +someone is out to mess up my schedule and my clock must be secured." +(Garrison Keillor, +"We've +never been here before", 2017-08-22) +
  • +
  • +"Well, in my time zone that's all the time I have, +but maybe in your time zone I haven't finished yet. So stay tuned!" +(Goldie Hawn, Rowan & Martin's Laugh-In No. 65, 1970-03-09) +
  • +
+

See also

+ +
+
+This web page is in the public domain, so clarified as of +2009-05-17 by Arthur David Olson. +
+Please send corrections to this web page to the +time zone mailing list. +
+ + diff --git a/lib-tzcode/tz-how-to.html b/lib-tzcode/tz-how-to.html new file mode 100644 index 0000000..9e438f9 --- /dev/null +++ b/lib-tzcode/tz-how-to.html @@ -0,0 +1,719 @@ + + + +How to Read the tz Database + + + + +

How to Read the tz +Database Source Files

+

by Bill Seymour

+

This guide uses the America/Chicago and +Pacific/Honolulu zones as examples of how to infer +times of day from the tz database +source files. It might be helpful, but not absolutely necessary, +for the reader to have already downloaded the +latest release of the database and become familiar with the basic layout +of the data files. The format is explained in the “man +page” for the zic compiler, zic.8.txt, in +the code subdirectory. +Although this guide covers many of the common cases, it is not a +complete summary of what zic accepts; the man page is the +authoritative reference.

+ +

We’ll begin by talking about the rules for changing between standard +and daylight saving time since we’ll need that information when we talk +about the zones.

+ +

First, let’s consider the special daylight saving time rules +for Chicago (from the northamerica file in +the data subdirectory):

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
From the Source File
+ +
+
+#Rule NAME    FROM TO    -   IN  ON      AT   SAVE LETTER
+Rule  Chicago 1920 only  -   Jun 13      2:00 1:00 D
+Rule  Chicago 1920 1921  -   Oct lastSun 2:00 0    S
+Rule  Chicago 1921 only  -   Mar lastSun 2:00 1:00 D
+Rule  Chicago 1922 1966  -   Apr lastSun 2:00 1:00 D
+Rule  Chicago 1922 1954  -   Sep lastSun 2:00 0    S
+Rule  Chicago 1955 1966  -   Oct lastSun 2:00 0    S
+
+
Reformatted a Bit
FromToOnAtAction
1920 onlyJune 13th02:00 localgo to daylight saving time
19201921last Sundayin Octoberreturn to standard time
1921 onlyin Marchgo to daylight saving time
19221966in April
1954in Septemberreturn to standard time
19551966in October
+ +

The FROM and TO columns, respectively, specify the +first and last calendar years defining a contiguous range over which a specific +Rule line is to apply. The keyword only can be used in the +TO field to repeat the value of the FROM field in the +event that a rule should only apply to a single year. Often, the keyword +max is used to extend a rule’s application into the +indefinite future; it is a platform-agnostic stand-in for the largest +representable year. + +

The next column, -, is reserved; for compatibility with earlier +releases, it always contains a hyphen, which acts as a kind of null value. +Prior to the 2020b release, it was called the TYPE field, though +it had not been used in the main data since the 2000e release. +An obsolescent supplementary file used the +field as a proof-of-concept to allow zic to apply a given Rule +line only to certain “types” of years within the specified range as +dictated by the output of a separate script, such as: only years which would +have a US presidential election, or only years which wouldn’t. + +

The SAVE column contains the local (wall clock) offset from +local standard time. +This is usually either zero for standard time or one hour for daylight +saving time; but there’s no reason, in principle, why it can’t +take on other values. + +

The LETTER (sometimes called LETTER/S) +column can contain a variable +part of the usual abbreviation of the time zone’s name, or it can just +be a hyphen if there’s no variable part. For example, the abbreviation +used in the central time zone will be either “CST” or +“CDT”. The variable part is ‘S’ or ‘D’; +and, sure enough, that’s just what we find in +the LETTER column +in the Chicago rules. More about this when we talk about +“Zone” lines. + +

One important thing to notice is that “Rule” lines +want at once to be both transitions and steady states: +

    +
  • On the one hand, they represent transitions between standard and +daylight saving time; and any number of Rule lines can be in effect +during a given period (which will always be a non-empty set of +contiguous calendar years).
  • +
  • On the other hand, the SAVE and LETTER +columns contain state that exists between transitions. More about this +when we talk about the US rules.
  • +
+ +

In the example above, the transition to daylight saving time +happened on the 13th of June in 1920, and on +the last Sunday in March in 1921; but the return to standard time +happened on the last Sunday in October in both of those +years. Similarly, the rule for changing to daylight saving time was +the same from 1922 to 1966; but the rule for returning to standard +time changed in 1955. Got it?

+ +

OK, now for the somewhat more interesting “US” rules:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
From the Source File
+ +
+
+#Rule NAME FROM TO    -   IN  ON        AT   SAVE LETTER/S
+Rule  US   1918 1919  -   Mar lastSun  2:00  1:00 D
+Rule  US   1918 1919  -   Oct lastSun  2:00  0    S
+Rule  US   1942 only  -   Feb 9        2:00  1:00 W # War
+Rule  US   1945 only  -   Aug 14      23:00u 1:00 P # Peace
+Rule  US   1945 only  -   Sep 30       2:00  0    S
+Rule  US   1967 2006  -   Oct lastSun  2:00  0    S
+Rule  US   1967 1973  -   Apr lastSun  2:00  1:00 D
+Rule  US   1974 only  -   Jan 6        2:00  1:00 D
+Rule  US   1975 only  -   Feb 23       2:00  1:00 D
+Rule  US   1976 1986  -   Apr lastSun  2:00  1:00 D
+Rule  US   1987 2006  -   Apr Sun>=1   2:00  1:00 D
+Rule  US   2007 max   -   Mar Sun>=8   2:00  1:00 D
+Rule  US   2007 max   -   Nov Sun>=1   2:00  0    S
+
+
Reformatted a Bit
FromToOnAtAction
19181919last Sundayin March02:00 localgo to daylight saving time
in Octoberreturn to standard time
1942 onlyFebruary 9thgo to “war time”
1945 onlyAugust 14th23:00 UT + rename “war time” to “peace
time;” + clocks don’t change +
September 30th02:00 localreturn to standard time
19672006last Sundayin October
1973in Aprilgo to daylight saving time
1974 onlyJanuary 6th
1975 onlyFebruary 23rd
19761986last Sundayin April
19872006first Sunday
2007presentsecond Sunday in March
first Sunday in Novemberreturn to standard time
+ +

There are two interesting things to note here.

+ +

First, the time that something happens (in the AT +column) is not necessarily the local (wall clock) time. The time can be +suffixed with ‘s’ (for “standard”) to mean +local standard time, different from local (wall clock) time when observing +daylight saving time; or it can be suffixed with ‘g’, +‘u’, or ‘z’, all three of which mean the +standard time at the +prime meridian. +‘g’ stands for “GMT”; +‘u’ stands for “UT” or “UTC” +(whichever was official at the time); ‘z’ stands for the +nautical time zone +Z (a.k.a. “Zulu” which, in turn, stands for ‘Z’). +The time can also be suffixed with ‘w’ meaning local (wall +clock) time; but it usually isn’t because that’s the +default.

+ +

Second, the day in the ON column, in addition to +“lastSun” or a particular day of the month, +can have the form, “Sun>=x” or +“Sun<=x,” where x is a day +of the month. For example, “Sun>=8” means +“the first Sunday on or after the eighth of the month,” in +other words, the second Sunday of the month. Furthermore, although +there are no examples above, the weekday needn’t be +“Sun” in either form, but can be the usual +three-character English abbreviation for any day of the week.

+ +

And the US rules give us more examples of a couple of things +already mentioned:

+ +
    +
  • The rules for changing to and from daylight saving time are +actually different sets of rules; and the two sets can change +independently. Consider, for example, that the rule for the return to +standard time stayed the same from 1967 to 2006; but the rule for the +transition to daylight saving time changed several times in the same +period. There can also be periods, 1946 to 1966 for example, when no +rule from this group is in effect, and so either no transition +happened in those years, or some other rule is in effect (perhaps a +state or other more local rule).
  • + +
  • The SAVE and LETTER columns +contain steady state, not transitions. Consider, for example, +the transition from “war time” to “peace time” +that happened on August 14, 1945. The “1:00” in +the SAVE column is not an instruction to advance +the clock an hour. It means that clocks should be one hour +ahead of standard time, which they already are because of the previous +rule, so there should be no change.
  • + +
+ +

OK, now let’s look at a Zone record:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
From the Source File
+ +
+
+#Zone       NAME      STDOFF   RULES FORMAT [UNTIL]
+Zone  America/Chicago -5:50:36 -       LMT  1883 Nov 18 12:09:24
+                      -6:00    US      C%sT 1920
+                      -6:00    Chicago C%sT 1936 Mar  1  2:00
+                      -5:00    -       EST  1936 Nov 15  2:00
+                      -6:00    Chicago C%sT 1942
+                      -6:00    US      C%sT 1946
+                      -6:00    Chicago C%sT 1967
+                      -6:00    US      C%sT
+
+
Columns Renamed
Standard Offset
+ from Prime + Meridian
Daylight
Saving Time
Abbreviation(s)Ending at Local Time
DateTime
−5:50:36not observedLMT1883-11-1812:09:24
−6:00:00US rulesCST or CDT1920-01-0100:00:00
Chicago rules1936-03-0102:00:00
−5:00:00not observedEST1936-11-15
−6:00:00Chicago rulesCST or CDT1942-01-0100:00:00
US rulesCST, CWT or CPT1946-01-01
Chicago rulesCST or CDT1967-01-01
US rules
+ +

There are a couple of interesting differences between Zones and Rules.

+ +

First, and somewhat trivially, whereas Rules are considered to +contain one or more records, a Zone is considered to be a single +record with zero or more continuation lines. Thus, the keyword, +“Zone,” and the zone name are not +repeated. The last line is the one without anything in +the [UNTIL] column.

+ +

Second, and more fundamentally, each line of a Zone represents a +steady state, not a transition between states. The state exists from +the date and time in the previous line’s [UNTIL] +column up to the date and time in the current +line’s [UNTIL] column. In other words, the date and +time in the [UNTIL] column is the instant that separates +this state from the next. Where that would be ambiguous because +we’re setting our clocks back, the [UNTIL] column +specifies the first occurrence of the instant. The state specified by +the last line, the one without anything in the [UNTIL] +column, continues to the present.

+ +

The first line typically specifies the mean solar time observed +before the introduction of standard time. Since there’s no line before +that, it has no beginning. 8-) For some places near the International +Date Line, the first two lines will show solar times +differing by 24 hours; this corresponds to a movement of the Date +Line. For example:

+ +
+#Zone NAME          STDOFF   RULES FORMAT [UNTIL]
+Zone America/Juneau 15:02:19 -     LMT    1867 Oct 18
+                    -8:57:41 -     LMT    ...
+
+ +

When Alaska was purchased from Russia in 1867, the Date Line moved +from the Alaska/Canada border to the Bering Strait; and the time in +Alaska was then 24 hours earlier than it had +been. <aside>(6 October in the Julian calendar, +which Russia was still using then for religious reasons, was followed +by a second instance of the same day with a different name, 18 +October in the Gregorian calendar. Isn’t civil time +wonderful? 8-))</aside>

+ +

The abbreviation, “LMT” stands for “local mean +time”, which is an invention of +the tz +database and was probably never actually used during the +period. Furthermore, the value is almost certainly wrong except in the +archetypal place after which the zone is named. (The tz database +usually doesn’t provide a separate Zone record for places where +nothing significant happened after 1970.)

+ +

The RULES column tells us whether daylight saving time is being observed: +

    +
  • A hyphen, a kind of null value, means that we have not set our +clocks ahead of standard time.
  • + +
  • An amount of time (usually but not necessarily “1:00” +meaning one hour) means that we have set our clocks ahead by that +amount.
  • + +
  • Some alphabetic string means that we might have set our +clocks ahead; and we need to check the rule the name of which is the +given alphabetic string.
  • +
+ +

An example of a specific amount of time is:

+
+#Zone NAME            STDOFF RULES FORMAT [UNTIL]
+Zone Pacific/Honolulu ...                 1933 Apr 30  2:00
+                      -10:30 1:00  HDT    1933 May 21 12:00
+                      ...
+
+ +

Hawaii tried daylight saving time for three weeks in 1933 and +decided they didn’t like it. 8-) Note that +the STDOFF column always contains the standard time +offset, so the local (wall clock) time during this period was GMT − +10:30 + 1:00 = GMT − 9:30.

+ +

The FORMAT column specifies the usual abbreviation of +the time zone name. It should have one of four forms:

+
    + +
  • a time zone abbreviation that is a string of three or more +characters that are either ASCII alphanumerics, +“+”, or “-
  • + +
  • the string “%z”, in which case the +“%z” will be replaced by a numeric time zone +abbreviation
  • + +
  • a pair of time zone abbreviations separated by a slash +(‘/’), in which case the first string is the +abbreviation for the standard time name and the second string is the +abbreviation for the daylight saving time name
  • + +
  • a string containing “%s”, in which case +the “%s” will be replaced by the text in the +appropriate Rule’s LETTER column, and the resulting +string should be a time zone abbreviation
  • +
+ +

The last two make sense only if there’s a named rule in effect.

+ +

An example of a slash is:

+
+#Zone NAME          STDOFF RULES FORMAT  [UNTIL]
+Zone  Europe/London ...                  1996
+                    0:00   EU    GMT/BST
+
+ +

The current time in the UK is called either Greenwich mean time or +British summer time.

+ +

One wrinkle, not fully explained in zic.8.txt, is what +happens when switching to a named rule. To what values should +the SAVE and LETTER data be initialized?

+ +
    +
  • If at least one transition has happened, use +the SAVE and LETTER data from the most +recent.
  • + +
  • If switching to a named rule before any transition has happened, +assume standard time (SAVE zero), and use +the LETTER data from the earliest transition with +a SAVE of zero. + +
+ +

And three last things about the FORMAT column:

+
    + +
  • The tz +database gives abbreviations for time zones +in popular English-language usage. For +example, the last line in +Zone Pacific/Honolulu (shown below) gives +“HST” for “Hawaii standard time” even though the +legal +name for that time zone is “Hawaii-Aleutian standard time.” +This author has read that there are also some places in Australia where +popular time zone names differ from the legal ones. + +
  • No attempt is made to localize +the abbreviations. They are intended to be the values returned through the +"%Z" format specifier to +C’s +strftime +function in the +“C” locale. + +
  • If there is no generally accepted abbreviation for a time zone, +a numeric offset is used instead, e.g., +07 for 7 hours +ahead of Greenwich. By convention, -00 is used in a +zone while uninhabited, where the offset is zero but in some sense +the true offset is undefined. +
+ +

As a final example, here’s the complete history for Hawaii:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Relevant Excerpts from the US Rules
+ +
+
+#Rule NAME FROM TO   -    IN  ON      AT     SAVE LETTER/S
+Rule  US   1918 1919 -    Oct lastSun  2:00  0    S
+Rule  US   1942 only -    Feb  9       2:00  1:00 W # War
+Rule  US   1945 only -    Aug 14      23:00u 1:00 P # Peace
+Rule  US   1945 only -    Sep lastSun  2:00  0    S
+
+
The Zone Record
+ +
+
+#Zone NAME            STDOFF    RULES FORMAT [UNTIL]
+Zone Pacific/Honolulu -10:31:26 -     LMT    1896 Jan 13 12:00
+                      -10:30    -     HST    1933 Apr 30  2:00
+                      -10:30    1:00  HDT    1933 May 21  2:00
+                      -10:30    US    H%sT   1947 Jun  8  2:00
+                      -10:00    -     HST
+
+
What We Infer
Wall-Clock
Offset from
Prime Meridian
Adjust
Clocks
Time ZoneEnding at Local Time
Abbrv.NameDateTime
−10:31:26LMTlocal mean time1896-01-1312:00
−10:30+0:01:26HSTHawaii standard time1933-04-3002:00
−9:30+1:00HDTHawaii daylight time1933-05-2112:00
−10:30¹−1:00¹HST¹Hawaii standard time1942-02-0902:00
−9:30+1:00HWTHawaii war time1945-08-1413:30²
0HPTHawaii peace time1945-09-3002:00
−10:30−1:00HSTHawaii standard time1947-06-08
−10:00³+0:30³
+ ¹Switching to US rules…most recent transition (in 1919) was to standard time +
+ ²23:00 UT + + (−9:30) = 13:30 local +
+ ³Since 1947–06–08T12:30Z, + the civil time in Hawaii has been + UT/UTC + − 10:00 year-round. +
+ +

There will be a short quiz later. 8-)

+ +
+
+This web page is in the public domain, so clarified as of +2015-10-20 by Bill Seymour. +
+All suggestions and corrections will be welcome; all flames will be amusing. +Mail to was at pobox dot com. +
+ + diff --git a/lib-tzcode/tz-link.html b/lib-tzcode/tz-link.html new file mode 100644 index 0000000..43190dd --- /dev/null +++ b/lib-tzcode/tz-link.html @@ -0,0 +1,1216 @@ + + + +Time zone and daylight saving time data + + + + +

Time zone and daylight saving time data

+

+Time zone and +daylight-saving +rules are controlled by individual +governments. They are sometimes changed with little notice, and their +histories and planned futures are often recorded only fitfully. Here +is a summary of attempts to organize and record relevant data in this +area. +

+

Outline

+ + +
+

The tz database

+

+The public-domain +time zone database contains code and data +that represent the history of local time +for many representative locations around the globe. +It is updated periodically to reflect changes made by political bodies +to time zone boundaries and daylight saving rules. +This database (known as tz, +tzdb, or zoneinfo) +is used by several implementations, +including +the +GNU +C Library (used in +GNU/Linux), +Android, +FreeBSD, +NetBSD, +OpenBSD, +Chromium OS, +Cygwin, +MariaDB, +MINIX, +MySQL, +webOS, +AIX, +iOS, +macOS, +Microsoft Windows, +OpenVMS, +Oracle Database, and +Oracle Solaris.

+

+Each main entry in the database represents a timezone +for a set of civil-time clocks that have all agreed since 1970. +Timezones are typically identified by continent or ocean and then by the +name of the largest city within the region containing the clocks. +For example, America/New_York +represents most of the US eastern time zone; +America/Phoenix represents most of Arizona, which +uses mountain time without daylight saving time (DST); +America/Detroit represents most of Michigan, which uses +eastern time but with different DST rules in 1975; +and other entries represent smaller regions like Starke County, +Indiana, which switched from central to eastern time in 1991 +and switched back in 2006. +To use the database on an extended POSIX +implementation set the TZ +environment variable to the location's full name, +e.g., TZ="America/New_York".

+

+Associated with each timezone is a history of offsets from +Universal +Time (UT), which is Greenwich Mean +Time (GMT) with days beginning at midnight; +for timestamps after 1960 this is more precisely Coordinated +Universal Time (UTC). +The database also records when daylight saving time was in use, +along with some time zone abbreviations such as EST +for Eastern Standard Time in the US.

+
+ +
+

Downloading the tz database

+

+The following shell commands download +the latest release's two +tarballs +to a GNU/Linux or similar host.

+
mkdir tzdb
+cd tzdb
+wget https://www.iana.org/time-zones/repository/tzcode-latest.tar.gz
+wget https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz
+gzip -dc tzcode-latest.tar.gz | tar -xf -
+gzip -dc tzdata-latest.tar.gz | tar -xf -
+
+

Alternatively, the following shell commands download the same +release in a single-tarball format containing extra data +useful for regression testing:

+
wget https://www.iana.org/time-zones/repository/tzdb-latest.tar.lz
+lzip -dc tzdb-latest.tar.lz | tar -xf -
+
+

These commands use convenience links to the latest release +of the tz database hosted by the +Time Zone Database website +of the Internet Assigned Numbers +Authority (IANA). +Older releases are in files named +tzcodeV.tar.gz, +tzdataV.tar.gz, and +tzdb-V.tar.lz, +where V is the version. +Since 1996, each version has been a four-digit year followed by +lower-case letter (a through z, +then za through zz, then zza +through zzz, and so on). +Since version 2022a, each release has been distributed in +POSIX +ustar interchange format, compressed as described above; +older releases use a nearly compatible format. +Since version 2016h, each release has contained a text file named +"version" whose first (and currently only) line is the version. +Older releases are archived, +and are also available in an +FTP directory via a +less secure protocol.

+

Alternatively, a development repository of code and data can be +retrieved from GitHub via the shell +command:

+
git clone https://github.com/eggert/tz
+
+

+Since version 2012e, each release has been tagged in development repositories. +Untagged commits are less well tested and probably contain +more errors.

+

+After obtaining the code and data files, see the +README file for what to do next. +The code lets you compile the tz source files into +machine-readable binary files, one for each location. The binary files +are in a special timezone information format (TZif) +specified by Internet +RFC 8536. +The code also lets +you read a TZif file and interpret timestamps for that +location.

+
+ +
+

Changes to the tz database

+

+The tz code and data +are by no means authoritative. If you find errors, please +send changes to tz@iana.org, +the time zone mailing list. You can also subscribe to it +and browse the archive of old +messages. +Metadata for mailing list +discussions and corresponding data changes can be +generated automatically. +

+

+Changes to the tz code and data are often +propagated to clients via operating system updates, so +client tz data can often be corrected by +applying these updates. With GNU/Linux and similar systems, if your +maintenance provider has not yet adopted the +latest tz data, you can often short-circuit +the process by tailoring the generic instructions in +the tz README file and installing the latest +data yourself. System-specific instructions for installing the +latest tz data have also been published +for AIX, +Android, +ICU, +IBM +JDK, +Joda-Time, MySQL, +Noda Time, and OpenJDK/Oracle JDK. +

+

Since version 2013a, +sources for the tz database have been +UTF-8 +text files +with lines terminated by LF, +which can be modified by common text editors such +as GNU Emacs, +gedit, and +vim. +Specialized source-file editing can be done via the +Sublime +zoneinfo package for Sublime Text and the VSCode +zoneinfo extension for Visual +Studio Code. +

+

+For further information about updates, please see +Procedures for +Maintaining the Time Zone Database (Internet RFC 6557). More detail can be +found in Theory and pragmatics of the +tz code and data. +A0 TimeZone Migration +displays changes between recent tzdb versions. +

+
+ +
+

Coordinating with governments and distributors

+

+As discussed in +"How +Time Zones Are Coordinated", the time zone database relies on +collaboration among governments, the time zone database volunteer +community, and data distributors downstream. +

+If your government plans to change its time zone boundaries or +daylight saving rules, please send email to tz@iana.org well in advance, +as this will lessen confusion and will coordinate updates to many cell phones, +computers, and other devices around the world. +In your email, please cite the legislation or regulation that specifies +the change, so that it can be checked for details such as the exact times +when clock transitions occur. +It is OK if a rule change is planned to affect clocks +far into the future, as a long-planned change can easily be reverted +or otherwise altered with a year's notice before the change would have +affected clocks.

+

+There is no fixed schedule for tzdb releases. +However, typically a release occurs every few months. +Many downstream timezone data distributors wait for +a tzdb release before they produce an update +to time zone behavior in consumer devices and software products. +After a release, various parties must integrate, test, +and roll out an update before end users see changes. +These updates can be expensive, for both the quality +assurance process and the overall cost of shipping and installing +updates to each device's copy of tzdb. +Updates may be batched with other updates and may take substantial +time to reach end users after a release. +Older devices may no longer be supported and thus may never be updated, +which means they will continue to use out-of-date rules.

+

+For these reasons any rule change should be promulgated at least a +year before it affects how clocks operate; otherwise, there is a good +chance that many clocks will be wrong due to delays in propagating updates, +and that residents will be confused or even actively resist the change. +The shorter the notice, the more likely clock problems will arise; see "On +the Timing of Time Zone Changes" for examples. +

+
+ +
+

Commentary on the tz database

+ +
+ +
+

Web sites using recent versions of the +tz database

+

+These are listed roughly in ascending order of complexity and fanciness. +

+ +
+ +
+

Network protocols for tz data

+ +
+ +
+

Other tz compilers

+

Although some of these do not fully support +tz data, in recent tzdb +distributions you can generally work around compatibility problems by +running the command make rearguard_tarballs and compiling +from the resulting tarballs instead.

+ +
+ +
+

Other TZif readers

+
    +
  • The GNU C +Library +has an independent, thread-safe implementation of +a TZif file reader. +This library is freely available under the LGPL +and is widely used in GNU/Linux systems.
  • +
  • GNOME's +GLib has +a TZif file reader written in C that +creates a GTimeZone object representing sets +of UT offsets. +It is freely available under the LGPL.
  • +
  • The +BDE Standard Library's +baltzo::TimeZoneUtil component contains a C++ +implementation of a TZif file reader. It is freely available under +the Apache License.
  • +
  • CCTZ is a simple C++ +library that translates between UT and civil time and +can read TZif files. It is freely available under the Apache +License.
  • +
  • Timelib is a C +library that reads TZif files and converts +timestamps from one time zone or format to another. +It is used by PHP, +HHVM, +and MongoDB. +It is freely available under the MIT license.
  • +
  • Tcl, mentioned above, also contains a +TZif file reader.
  • +
  • +DateTime::TimeZone::Tzfile +is a TZif file reader written in Perl. +It is freely available under the same terms as Perl +(dual GPL and Artistic license).
  • +
  • Python has a zoneinfo.ZoneInfo +class that reads TZif data and creates objects +that represent tzdb timezones. +Python is freely available under the +Python Software Foundation +License. +A companion PyPI module +tzdata +supplies TZif data if the underlying system data cannot be found; +it is freely available under the Apache License.
  • +
  • The +public-domain tz.js +library contains a Python tool that +converts TZif data into +JSON-format data suitable for use +in its JavaScript library for time zone conversion. Dates before 1970 +are not supported.
  • +
  • The timezone-olson +package contains Haskell code that +parses and uses TZif data. It is freely +available under a BSD-style license.
  • +
+
+ +
+

Other tz-based time zone software

+ +
+ +
+

Other time zone databases

+ +
+ +
+

Maps

+ +
+ +
+

Time zone boundaries

+

Geographical boundaries between timezones are available +from several Internet +geolocation +services and other sources.

+ +
+ +
+

Civil time concepts and history

+ +
+ +
+

National histories of legal time

+
+
Australia
+
The Parliamentary Library commissioned a research +paper on daylight saving time in Australia. +The Bureau of Meteorology publishes a list of Implementation +Dates of Daylight Savings Time within Australia.
+
Belgium
+
The Royal Observatory of Belgium maintains a table of time in +Belgium (in +Dutch and French).
+
Brazil
+
The Time Service Department of the National Observatory +records Brazil's daylight saving time decrees (in +Portuguese).
+
Canada
+
National Research Council Canada publishes current +and some older information about time +zones and daylight saving time.
+
Chile
+
The Hydrographic and Oceanographic Service of the Chilean Navy publishes a +history of +Chile's official time (in Spanish).
+
China
+
The Hong Kong Observatory maintains a +history of + summer time in Hong Kong, +and Macau's Meteorological and Geophysical Bureau maintains a similar +history for Macau. +Unfortunately the latter is incomplete and has errors.
+
Czech Republic
+
When daylight saving time starts and ends (in Czech) +summarizes and cites historical DST regulations.
+
Germany
+
The National Institute for Science and Technology maintains the Realisation +of Legal Time in Germany.
+
Israel
+
The Interior Ministry periodically issues announcements (in Hebrew).
+
Malaysia
+
See Singapore below.
+
Mexico
+
The Investigation and Analysis Service of the Mexican Library of +Congress has published a history of Mexican local time (in Spanish).
+
Netherlands
+
Legal time in the Netherlands (in Dutch) +covers the history of local time in the Netherlands from ancient times.
+
New Zealand
+
The Department of Internal Affairs maintains a brief History of +Daylight Saving.
+
Palestine
+
The Ministry of Telecom and IT publishes a history of clock changes (in Arabic).
+
Portugal
+
The Lisbon Astronomical Observatory publishes a +history of +legal time (in Portuguese).
+
Singapore
+
Why +is Singapore in the "Wrong" Time Zone? details the +history of legal time in Singapore and Malaysia.
+
United Kingdom
+
History of +legal time in Britain discusses in detail the country +with perhaps the best-documented history of clock adjustments.
+
United States
+
The Department of Transportation's Recent +Time Zone Proceedings lists changes to time zone boundaries.
+
Uruguay
+
The Oceanography, Hydrography, and Meteorology Service of the Uruguayan +Navy (SOHMA) publishes an annual almanac +(in Spanish).
+
+
+ +
+

Costs and benefits of time shifts

+

Various sources argue for and against daylight saving time and time +zone shifts, and many scientific studies have been conducted. This +section summarizes reviews and position statements based on +scientific literature in the area.

+ +
+ +
+

Precision timekeeping

+
    +
  • The +Science of Timekeeping is a thorough introduction +to the theory and practice of precision timekeeping.
  • +
  • The Science of +Time 2016 contains several freely readable papers.
  • +
  • NTP: The Network +Time Protocol (Internet RFC 5905) +discusses how to synchronize clocks of +Internet hosts.
  • +
  • The Huygens +family of software algorithms can achieve accuracy to a few tens of +nanoseconds in scalable server farms without special hardware.
  • +
  • The Precision +Time Protocol (IEEE 1588) +can achieve submicrosecond clock accuracy on a local area network +with special-purpose hardware.
  • +
  • Timezone +Options for DHCP +(Internet RFC 4833) +specifies a DHCP +option for a server to configure +a client's time zone and daylight saving settings automatically.
  • +
  • Time +Scales describes astronomical time scales like +TDT, +TCG, and +TDB. +
  • The IAU's SOFA +collection contains C and Fortran +code for converting among time scales like +TAI, +TDB, TDT and +UTC. It is freely available under the +SOFA license.
  • +
  • Mars24 Sunclock +– Time on Mars describes Airy Mean Time (AMT) and the +diverse local time +scales used by each landed mission on Mars.
  • +
  • LeapSecond.com is +dedicated not only to leap seconds but to precise time and frequency +in general. It covers the state of the art in amateur timekeeping, and +how the art has progressed over the past few decades.
  • +
  • The rules for leap seconds are specified in Annex 1 (Time scales) of Standard-frequency +and time-signal emissions, International Telecommunication Union – +Radiocommunication Sector (ITU-R) Recommendation TF.460-6 (02/2002).
  • +
  • IERS +Bulletins contains official publications of the International +Earth Rotation and Reference Systems Service, which decides when leap +seconds occur. The tz code and data support leap seconds +via an optional "right" configuration where a computer's internal +time_t integer clock counts every TAI second, +as opposed to the default "posix" configuration +where the internal clock ignores leap seconds. +The two configurations agree for timestamps starting with 1972-01-01 00:00:00 +UTC (time_t 63 072 000) and diverge for +timestamps starting with time_t 78 796 800, +which corresponds to the first leap second +1972-06-30 23:59:60 UTC in the "right" configuration, +and to +1972-07-01 00:00:00 UTC in the "posix" configuration. +In practice the two configurations also agree for timestamps before +1972 even though the historical situation is messy, partly because +neither UTC nor TAI +is well-defined for sufficiently old timestamps.
  • +
  • Leap Smear +discusses how to gradually adjust POSIX clocks near a +leap second so that they disagree with UTC by at most a +half second, even though every POSIX minute has exactly +sixty seconds. This approach works with the default tz +"posix" configuration, is supported by +the NTP reference implementation, supports conversion between +UTC and smeared POSIX timestamps, and is used by major +cloud service providers. However, according to +§3.7.1 of +Network Time Protocol Best Current Practices +(Internet RFC 8633), leap smearing is not suitable for +applications requiring accurate UTC or civil time, +and is intended for use only in single, well-controlled environments.
  • +
  • The Leap +Second Discussion List covers McCarthy +and Klepczynski's 1999 proposal to discontinue leap seconds, +discussed further in +The +leap second: its history and possible future. +UTC +might be redefined +without Leap Seconds gives pointers on this +contentious issue. +The General Conference on Weights and Measures +voted in 2022 +to discontinue the use of leap seconds by 2035, replacing them with an +as-yet-undetermined scheme some time after the year 2135. +
  • +
+
+ +
+

Time notation

+
    +
  • The Unicode Common Locale Data +Repository (CLDR) Project has localizations for time +zone names, abbreviations, identifiers, and formats. For example, it +contains French translations for "Eastern European Summer Time", +"EEST", and +"Bucharest". Its +by-type +charts show these values for many locales. Data values are available in +both LDML +(an XML format) and JSON. +
  • +A summary of +the international standard date and time notation covers +ISO +8601-1:2019 – Date and time – Representations for information +interchange – Part 1: Basic rules.
  • +
  • +XML +Schema: Datatypes – dateTime specifies a format inspired by +ISO 8601 that is in common use in XML data.
  • +
  • §3.3 of +Internet Message Format (Internet RFC 5322) +specifies the time notation used in email and HTTP +headers.
  • +
  • +Date and Time +on the Internet: Timestamps (Internet RFC 3339) +specifies an ISO 8601 +profile for use in new Internet +protocols.
  • +
  • +Date & Time +Formats on the Web surveys web- and Internet-oriented date and time +formats.
  • +
  • Alphabetic time zone abbreviations should not be used as unique +identifiers for UT offsets as they are ambiguous in +practice. For example, in English-speaking North America +"CST" denotes 6 hours behind UT, +but in China it denotes 8 hours ahead of UT, +and French-speaking North Americans prefer +"HNC" to +"CST". The tz +database contains English abbreviations for many timestamps; +unfortunately some of these abbreviations were merely the database maintainers' +inventions, and these have been removed when possible.
  • +
  • Numeric time zone abbreviations typically count hours east of +UT, e.g., +09 for Japan and +−10 for Hawaii. However, the POSIX +TZ environment variable uses the opposite convention. +For example, one might use TZ="JST-9" and +TZ="HST10" +for Japan and Hawaii, respectively. If the +tz database is available, it is usually better to use +settings like TZ="Asia/Tokyo" and +TZ="Pacific/Honolulu" instead, as this should avoid +confusion, handle old timestamps better, and insulate you better from +any future changes to the rules. One should never set +POSIX TZ to a value like +"GMT-9", though, since this would incorrectly imply that +local time is nine hours ahead of UT and the time zone +is called "GMT".
  • +
+
+ +
+

See also

+ +
+ +
+
+This web page is in the public domain, so clarified as of +2009-05-17 by Arthur David Olson. +
+Please send corrections to this web page to the +time zone mailing list. +
+ + diff --git a/lib-tzcode/tzcode.h b/lib-tzcode/tzcode.h new file mode 100644 index 0000000..745b703 --- /dev/null +++ b/lib-tzcode/tzcode.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Cisco Systems, Inc. and its affiliates + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + +/* This is the minimal public API. It must be in sync with private.h */ + +#include + +#define TZ_ASCTIME_BUF_SIZE 26 // Must be the same as STD_ASCTIME_BUF_SIZE in asctime.c + +typedef void *timezone_t; + +/* Patched in localtime.c to allow malloc and free to be overriden + */ +extern void *(*tz_malloc)(size_t size); +extern void (*tz_free)(void *ptr); + +/* Prototypes of useful functions. tz_asctime_r is provided for testing. Note the tz_ prefixing to prevent collisions. + */ +char *tz_asctime_r(struct tm const *restrict timeptr, char *restrict buf); +struct tm *tz_localtime_rz(timezone_t restrict, time_t const *restrict, struct tm *restrict); +time_t tz_mktime_z(timezone_t restrict, struct tm *restrict); +timezone_t tz_tzalloc(char const *); +void tz_tzfree(timezone_t); diff --git a/lib-tzcode/tzfile.5 b/lib-tzcode/tzfile.5 new file mode 100644 index 0000000..59d9f6b --- /dev/null +++ b/lib-tzcode/tzfile.5 @@ -0,0 +1,496 @@ +.\" This file is in the public domain, so clarified as of +.\" 1996-06-05 by Arthur David Olson. +.TH tzfile 5 "" "Time Zone Database" +.SH NAME +tzfile \- timezone information +.SH DESCRIPTION +.ie '\(lq'' .ds lq \&"\" +.el .ds lq \(lq\" +.ie '\(rq'' .ds rq \&"\" +.el .ds rq \(rq\" +.de q +\\$3\*(lq\\$1\*(rq\\$2 +.. +.ie \n(.g .ds - \f(CR-\fP +.el .ds - \- +The timezone information files used by +.BR tzset (3) +are typically found under a directory with a name like +.IR /usr/share/zoneinfo . +These files use the format described in Internet RFC 8536. +Each file is a sequence of 8-bit bytes. +In a file, a binary integer is represented by a sequence of one or +more bytes in network order (bigendian, or high-order byte first), +with all bits significant, +a signed binary integer is represented using two's complement, +and a boolean is represented by a one-byte binary integer that is +either 0 (false) or 1 (true). +The format begins with a 44-byte header containing the following fields: +.IP * 2 +The magic four-byte ASCII sequence +.q "TZif" +identifies the file as a timezone information file. +.IP * +A byte identifying the version of the file's format +(as of 2021, either an ASCII NUL, +.q "2", +.q "3", +or +.q "4" ). +.IP * +Fifteen bytes containing zeros reserved for future use. +.IP * +Six four-byte integer values, in the following order: +.RS +.TP +.B tzh_ttisutcnt +The number of UT/local indicators stored in the file. +(UT is Universal Time.) +.TP +.B tzh_ttisstdcnt +The number of standard/wall indicators stored in the file. +.TP +.B tzh_leapcnt +The number of leap seconds for which data entries are stored in the file. +.TP +.B tzh_timecnt +The number of transition times for which data entries are stored +in the file. +.TP +.B tzh_typecnt +The number of local time types for which data entries are stored +in the file (must not be zero). +.TP +.B tzh_charcnt +The number of bytes of time zone abbreviation strings +stored in the file. +.RE +.PP +The above header is followed by the following fields, whose lengths +depend on the contents of the header: +.IP * 2 +.B tzh_timecnt +four-byte signed integer values sorted in ascending order. +These values are written in network byte order. +Each is used as a transition time (as returned by +.BR time (2)) +at which the rules for computing local time change. +.IP * +.B tzh_timecnt +one-byte unsigned integer values; +each one but the last tells which of the different types of local time types +described in the file is associated with the time period +starting with the same-indexed transition time +and continuing up to but not including the next transition time. +(The last time type is present only for consistency checking with the +POSIX-style TZ string described below.) +These values serve as indices into the next field. +.IP * +.B tzh_typecnt +.B ttinfo +entries, each defined as follows: +.in +.5i +.sp +.nf +.ta .5i +\w'unsigned char\0\0'u +struct ttinfo { + int32_t tt_utoff; + unsigned char tt_isdst; + unsigned char tt_desigidx; +}; +.in -.5i +.fi +.sp +Each structure is written as a four-byte signed integer value for +.BR tt_utoff , +in network byte order, followed by a one-byte boolean for +.B tt_isdst +and a one-byte value for +.BR tt_desigidx . +In each structure, +.B tt_utoff +gives the number of seconds to be added to UT, +.B tt_isdst +tells whether +.B tm_isdst +should be set by +.BR localtime (3) +and +.B tt_desigidx +serves as an index into the array of time zone abbreviation bytes +that follow the +.B ttinfo +entries in the file; if the designated string is "\*-00", the +.B ttinfo +entry is a placeholder indicating that local time is unspecified. +The +.B tt_utoff +value is never equal to \-2**31, to let 32-bit clients negate it without +overflow. +Also, in realistic applications +.B tt_utoff +is in the range [\-89999, 93599] (i.e., more than \-25 hours and less +than 26 hours); this allows easy support by implementations that +already support the POSIX-required range [\-24:59:59, 25:59:59]. +.IP * +.B tzh_charcnt +bytes that represent time zone designations, +which are null-terminated byte strings, each indexed by the +.B tt_desigidx +values mentioned above. +The byte strings can overlap if one is a suffix of the other. +The encoding of these strings is not specified. +.IP * +.B tzh_leapcnt +pairs of four-byte values, written in network byte order; +the first value of each pair gives the nonnegative time +(as returned by +.BR time (2)) +at which a leap second occurs or at which the leap second table expires; +the second is a signed integer specifying the correction, which is the +.I total +number of leap seconds to be applied during the time period +starting at the given time. +The pairs of values are sorted in strictly ascending order by time. +Each pair denotes one leap second, either positive or negative, +except that if the last pair has the same correction as the previous one, +the last pair denotes the leap second table's expiration time. +Each leap second is at the end of a UTC calendar month. +The first leap second has a nonnegative occurrence time, +and is a positive leap second if and only if its correction is positive; +the correction for each leap second after the first differs +from the previous leap second by either 1 for a positive leap second, +or \-1 for a negative leap second. +If the leap second table is empty, the leap-second correction is zero +for all timestamps; +otherwise, for timestamps before the first occurrence time, +the leap-second correction is zero if the first pair's correction is 1 or \-1, +and is unspecified otherwise (which can happen only in files +truncated at the start). +.IP * +.B tzh_ttisstdcnt +standard/wall indicators, each stored as a one-byte boolean; +they tell whether the transition times associated with local time types +were specified as standard time or local (wall clock) time. +.IP * +.B tzh_ttisutcnt +UT/local indicators, each stored as a one-byte boolean; +they tell whether the transition times associated with local time types +were specified as UT or local time. +If a UT/local indicator is set, the corresponding standard/wall indicator +must also be set. +.PP +The standard/wall and UT/local indicators were designed for +transforming a TZif file's transition times into transitions appropriate +for another time zone specified via a POSIX-style TZ string that lacks rules. +For example, when TZ="EET\*-2EEST" and there is no TZif file "EET\*-2EEST", +the idea was to adapt the transition times from a TZif file with the +well-known name "posixrules" that is present only for this purpose and +is a copy of the file "Europe/Brussels", a file with a different UT offset. +POSIX does not specify this obsolete transformational behavior, +the default rules are installation-dependent, and no implementation +is known to support this feature for timestamps past 2037, +so users desiring (say) Greek time should instead specify +TZ="Europe/Athens" for better historical coverage, falling back on +TZ="EET\*-2EEST,M3.5.0/3,M10.5.0/4" if POSIX conformance is required +and older timestamps need not be handled accurately. +.PP +The +.BR localtime (3) +function +normally uses the first +.B ttinfo +structure in the file +if either +.B tzh_timecnt +is zero or the time argument is less than the first transition time recorded +in the file. +.SS Version 2 format +For version-2-format timezone files, +the above header and data are followed by a second header and data, +identical in format except that +eight bytes are used for each transition time or leap second time. +(Leap second counts remain four bytes.) +After the second header and data comes a newline-enclosed, +POSIX-TZ-environment-variable-style string for use in handling instants +after the last transition time stored in the file +or for all instants if the file has no transitions. +The POSIX-style TZ string is empty (i.e., nothing between the newlines) +if there is no POSIX-style representation for such instants. +If nonempty, the POSIX-style TZ string must agree with the local time +type after the last transition time if present in the eight-byte data; +for example, given the string +.q "WET0WEST,M3.5.0/1,M10.5.0" +then if a last transition time is in July, the transition's local time +type must specify a daylight-saving time abbreviated +.q "WEST" +that is one hour east of UT. +Also, if there is at least one transition, time type 0 is associated +with the time period from the indefinite past up to but not including +the earliest transition time. +.SS Version 3 format +For version-3-format timezone files, the POSIX-TZ-style string may +use two minor extensions to the POSIX TZ format, as described in +.BR newtzset (3). +First, the hours part of its transition times may be signed and range from +\-167 through 167 instead of the POSIX-required unsigned values +from 0 through 24. +Second, DST is in effect all year if it starts +January 1 at 00:00 and ends December 31 at 24:00 plus the difference +between daylight saving and standard time. +.SS Version 4 format +For version-4-format TZif files, +the first leap second record can have a correction that is neither ++1 nor \-1, to represent truncation of the TZif file at the start. +Also, if two or more leap second transitions are present and the last +entry's correction equals the previous one, the last entry +denotes the expiration of the leap second table instead of a leap second; +timestamps after this expiration are unreliable in that future +releases will likely add leap second entries after the expiration, and +the added leap seconds will change how post-expiration timestamps are treated. +.SS Interoperability considerations +Future changes to the format may append more data. +.PP +Version 1 files are considered a legacy format and +should not be generated, as they do not support transition +times after the year 2038. +Readers that understand only Version 1 must ignore +any data that extends beyond the calculated end of the version +1 data block. +.PP +Other than version 1, writers should generate +the lowest version number needed by a file's data. +For example, a writer should generate a version 4 file +only if its leap second table either expires or is truncated at the start. +Likewise, a writer not generating a version 4 file +should generate a version 3 file only if +TZ string extensions are necessary to accurately +model transition times. +.PP +The sequence of time changes defined by the version 1 +header and data block should be a contiguous sub-sequence +of the time changes defined by the version 2+ header and data +block, and by the footer. +This guideline helps obsolescent version 1 readers +agree with current readers about timestamps within the +contiguous sub-sequence. It also lets writers not +supporting obsolescent readers use a +.B tzh_timecnt +of zero +in the version 1 data block to save space. +.PP +When a TZif file contains a leap second table expiration +time, TZif readers should either refuse to process +post-expiration timestamps, or process them as if the expiration +time did not exist (possibly with an error indication). +.PP +Time zone designations should consist of at least three (3) +and no more than six (6) ASCII characters from the set of +alphanumerics, +.q "\*-", +and +.q "+". +This is for compatibility with POSIX requirements for +time zone abbreviations. +.PP +When reading a version 2 or higher file, readers +should ignore the version 1 header and data block except for +the purpose of skipping over them. +.PP +Readers should calculate the total lengths of the +headers and data blocks and check that they all fit within +the actual file size, as part of a validity check for the file. +.PP +When a positive leap second occurs, readers should append an extra +second to the local minute containing the second just before the leap +second. If this occurs when the UTC offset is not a multiple of 60 +seconds, the leap second occurs earlier than the last second of the +local minute and the minute's remaining local seconds are numbered +through 60 instead of the usual 59; the UTC offset is unaffected. +.SS Common interoperability issues +This section documents common problems in reading or writing TZif files. +Most of these are problems in generating TZif files for use by +older readers. +The goals of this section are: +.IP * 2 +to help TZif writers output files that avoid common +pitfalls in older or buggy TZif readers, +.IP * +to help TZif readers avoid common pitfalls when reading +files generated by future TZif writers, and +.IP * +to help any future specification authors see what sort of +problems arise when the TZif format is changed. +.PP +When new versions of the TZif format have been defined, a +design goal has been that a reader can successfully use a TZif +file even if the file is of a later TZif version than what the +reader was designed for. +When complete compatibility was not achieved, an attempt was +made to limit glitches to rarely used timestamps and allow +simple partial workarounds in writers designed to generate +new-version data useful even for older-version readers. +This section attempts to document these compatibility issues and +workarounds, as well as to document other common bugs in +readers. +.PP +Interoperability problems with TZif include the following: +.IP * 2 +Some readers examine only version 1 data. +As a partial workaround, a writer can output as much version 1 +data as possible. +However, a reader should ignore version 1 data, and should use +version 2+ data even if the reader's native timestamps have only +32 bits. +.IP * +Some readers designed for version 2 might mishandle +timestamps after a version 3 or higher file's last transition, because +they cannot parse extensions to POSIX in the TZ-like string. +As a partial workaround, a writer can output more transitions +than necessary, so that only far-future timestamps are +mishandled by version 2 readers. +.IP * +Some readers designed for version 2 do not support +permanent daylight saving time with transitions after 24:00 +\(en e.g., a TZ string +.q "EST5EDT,0/0,J365/25" +denoting permanent Eastern Daylight Time +(\-04). +As a workaround, a writer can substitute standard time +for two time zones east, e.g., +.q "XXX3EDT4,0/0,J365/23" +for a time zone with a never-used standard time (XXX, \-03) +and negative daylight saving time (EDT, \-04) all year. +Alternatively, +as a partial workaround a writer can substitute standard time +for the next time zone east \(en e.g., +.q "AST4" +for permanent +Atlantic Standard Time (\-04). +.IP * +Some readers designed for version 2 or 3, and that require strict +conformance to RFC 8536, reject version 4 files whose leap second +tables are truncated at the start or that end in expiration times. +.IP * +Some readers ignore the footer, and instead predict future +timestamps from the time type of the last transition. +As a partial workaround, a writer can output more transitions +than necessary. +.IP * +Some readers do not use time type 0 for timestamps before +the first transition, in that they infer a time type using a +heuristic that does not always select time type 0. +As a partial workaround, a writer can output a dummy (no-op) +first transition at an early time. +.IP * +Some readers mishandle timestamps before the first +transition that has a timestamp not less than \-2**31. +Readers that support only 32-bit timestamps are likely to be +more prone to this problem, for example, when they process +64-bit transitions only some of which are representable in 32 +bits. +As a partial workaround, a writer can output a dummy +transition at timestamp \-2**31. +.IP * +Some readers mishandle a transition if its timestamp has +the minimum possible signed 64-bit value. +Timestamps less than \-2**59 are not recommended. +.IP * +Some readers mishandle POSIX-style TZ strings that +contain +.q "<" +or +.q ">". +As a partial workaround, a writer can avoid using +.q "<" +or +.q ">" +for time zone abbreviations containing only alphabetic +characters. +.IP * +Many readers mishandle time zone abbreviations that contain +non-ASCII characters. +These characters are not recommended. +.IP * +Some readers may mishandle time zone abbreviations that +contain fewer than 3 or more than 6 characters, or that +contain ASCII characters other than alphanumerics, +.q "\*-", +and +.q "+". +These abbreviations are not recommended. +.IP * +Some readers mishandle TZif files that specify +daylight-saving time UT offsets that are less than the UT +offsets for the corresponding standard time. +These readers do not support locations like Ireland, which +uses the equivalent of the POSIX TZ string +.q "IST\*-1GMT0,M10.5.0,M3.5.0/1", +observing standard time +(IST, +01) in summer and daylight saving time (GMT, +00) in winter. +As a partial workaround, a writer can output data for the +equivalent of the POSIX TZ string +.q "GMT0IST,M3.5.0/1,M10.5.0", +thus swapping standard and daylight saving time. +Although this workaround misidentifies which part of the year +uses daylight saving time, it records UT offsets and time zone +abbreviations correctly. +.IP * +Some readers generate ambiguous timestamps for positive leap seconds +that occur when the UTC offset is not a multiple of 60 seconds. +For example, in a timezone with UTC offset +01:23:45 and with +a positive leap second 78796801 (1972-06-30 23:59:60 UTC), some readers will +map both 78796800 and 78796801 to 01:23:45 local time the next day +instead of mapping the latter to 01:23:46, and they will map 78796815 to +01:23:59 instead of to 01:23:60. +This has not yet been a practical problem, since no civil authority +has observed such UTC offsets since leap seconds were +introduced in 1972. +.PP +Some interoperability problems are reader bugs that +are listed here mostly as warnings to developers of readers. +.IP * 2 +Some readers do not support negative timestamps. +Developers of distributed applications should keep this +in mind if they need to deal with pre-1970 data. +.IP * +Some readers mishandle timestamps before the first +transition that has a nonnegative timestamp. +Readers that do not support negative timestamps are likely to +be more prone to this problem. +.IP * +Some readers mishandle time zone abbreviations like +.q "\*-08" +that contain +.q "+", +.q "\*-", +or digits. +.IP * +Some readers mishandle UT offsets that are out of the +traditional range of \-12 through +12 hours, and so do not +support locations like Kiritimati that are outside this +range. +.IP * +Some readers mishandle UT offsets in the range [\-3599, \-1] +seconds from UT, because they integer-divide the offset by +3600 to get 0 and then display the hour part as +.q "+00". +.IP * +Some readers mishandle UT offsets that are not a multiple +of one hour, or of 15 minutes, or of 1 minute. +.SH SEE ALSO +.BR time (2), +.BR localtime (3), +.BR tzset (3), +.BR tzselect (8), +.BR zdump (8), +.BR zic (8). +.PP +Olson A, Eggert P, Murchison K. The Time Zone Information Format (TZif). +2019 Feb. +.UR https://\:datatracker.ietf.org/\:doc/\:html/\:rfc8536 +Internet RFC 8536 +.UE +.UR https://\:doi.org/\:10.17487/\:RFC8536 +doi:10.17487/RFC8536 +.UE . diff --git a/lib-tzcode/tzfile.5.txt b/lib-tzcode/tzfile.5.txt new file mode 100644 index 0000000..6607de7 --- /dev/null +++ b/lib-tzcode/tzfile.5.txt @@ -0,0 +1,369 @@ +tzfile(5) File Formats Manual tzfile(5) + +NAME + tzfile - timezone information + +DESCRIPTION + The timezone information files used by tzset(3) are typically found + under a directory with a name like /usr/share/zoneinfo. These files + use the format described in Internet RFC 8536. Each file is a sequence + of 8-bit bytes. In a file, a binary integer is represented by a + sequence of one or more bytes in network order (bigendian, or high- + order byte first), with all bits significant, a signed binary integer + is represented using two's complement, and a boolean is represented by + a one-byte binary integer that is either 0 (false) or 1 (true). The + format begins with a 44-byte header containing the following fields: + + * The magic four-byte ASCII sequence "TZif" identifies the file as a + timezone information file. + + * A byte identifying the version of the file's format (as of 2021, + either an ASCII NUL, "2", "3", or "4"). + + * Fifteen bytes containing zeros reserved for future use. + + * Six four-byte integer values, in the following order: + + tzh_ttisutcnt + The number of UT/local indicators stored in the file. (UT is + Universal Time.) + + tzh_ttisstdcnt + The number of standard/wall indicators stored in the file. + + tzh_leapcnt + The number of leap seconds for which data entries are stored + in the file. + + tzh_timecnt + The number of transition times for which data entries are + stored in the file. + + tzh_typecnt + The number of local time types for which data entries are + stored in the file (must not be zero). + + tzh_charcnt + The number of bytes of time zone abbreviation strings stored + in the file. + + The above header is followed by the following fields, whose lengths + depend on the contents of the header: + + * tzh_timecnt four-byte signed integer values sorted in ascending + order. These values are written in network byte order. Each is used + as a transition time (as returned by time(2)) at which the rules for + computing local time change. + + * tzh_timecnt one-byte unsigned integer values; each one but the last + tells which of the different types of local time types described in + the file is associated with the time period starting with the same- + indexed transition time and continuing up to but not including the + next transition time. (The last time type is present only for + consistency checking with the POSIX-style TZ string described below.) + These values serve as indices into the next field. + + * tzh_typecnt ttinfo entries, each defined as follows: + + struct ttinfo { + int32_t tt_utoff; + unsigned char tt_isdst; + unsigned char tt_desigidx; + }; + + Each structure is written as a four-byte signed integer value for + tt_utoff, in network byte order, followed by a one-byte boolean for + tt_isdst and a one-byte value for tt_desigidx. In each structure, + tt_utoff gives the number of seconds to be added to UT, tt_isdst + tells whether tm_isdst should be set by localtime(3) and tt_desigidx + serves as an index into the array of time zone abbreviation bytes + that follow the ttinfo entries in the file; if the designated string + is "-00", the ttinfo entry is a placeholder indicating that local + time is unspecified. The tt_utoff value is never equal to -2**31, to + let 32-bit clients negate it without overflow. Also, in realistic + applications tt_utoff is in the range [-89999, 93599] (i.e., more + than -25 hours and less than 26 hours); this allows easy support by + implementations that already support the POSIX-required range + [-24:59:59, 25:59:59]. + + * tzh_charcnt bytes that represent time zone designations, which are + null-terminated byte strings, each indexed by the tt_desigidx values + mentioned above. The byte strings can overlap if one is a suffix of + the other. The encoding of these strings is not specified. + + * tzh_leapcnt pairs of four-byte values, written in network byte order; + the first value of each pair gives the nonnegative time (as returned + by time(2)) at which a leap second occurs or at which the leap second + table expires; the second is a signed integer specifying the + correction, which is the total number of leap seconds to be applied + during the time period starting at the given time. The pairs of + values are sorted in strictly ascending order by time. Each pair + denotes one leap second, either positive or negative, except that if + the last pair has the same correction as the previous one, the last + pair denotes the leap second table's expiration time. Each leap + second is at the end of a UTC calendar month. The first leap second + has a nonnegative occurrence time, and is a positive leap second if + and only if its correction is positive; the correction for each leap + second after the first differs from the previous leap second by + either 1 for a positive leap second, or -1 for a negative leap + second. If the leap second table is empty, the leap-second + correction is zero for all timestamps; otherwise, for timestamps + before the first occurrence time, the leap-second correction is zero + if the first pair's correction is 1 or -1, and is unspecified + otherwise (which can happen only in files truncated at the start). + + * tzh_ttisstdcnt standard/wall indicators, each stored as a one-byte + boolean; they tell whether the transition times associated with local + time types were specified as standard time or local (wall clock) + time. + + * tzh_ttisutcnt UT/local indicators, each stored as a one-byte boolean; + they tell whether the transition times associated with local time + types were specified as UT or local time. If a UT/local indicator is + set, the corresponding standard/wall indicator must also be set. + + The standard/wall and UT/local indicators were designed for + transforming a TZif file's transition times into transitions + appropriate for another time zone specified via a POSIX-style TZ string + that lacks rules. For example, when TZ="EET-2EEST" and there is no + TZif file "EET-2EEST", the idea was to adapt the transition times from + a TZif file with the well-known name "posixrules" that is present only + for this purpose and is a copy of the file "Europe/Brussels", a file + with a different UT offset. POSIX does not specify this obsolete + transformational behavior, the default rules are installation- + dependent, and no implementation is known to support this feature for + timestamps past 2037, so users desiring (say) Greek time should instead + specify TZ="Europe/Athens" for better historical coverage, falling back + on TZ="EET-2EEST,M3.5.0/3,M10.5.0/4" if POSIX conformance is required + and older timestamps need not be handled accurately. + + The localtime(3) function normally uses the first ttinfo structure in + the file if either tzh_timecnt is zero or the time argument is less + than the first transition time recorded in the file. + + Version 2 format + For version-2-format timezone files, the above header and data are + followed by a second header and data, identical in format except that + eight bytes are used for each transition time or leap second time. + (Leap second counts remain four bytes.) After the second header and + data comes a newline-enclosed, POSIX-TZ-environment-variable-style + string for use in handling instants after the last transition time + stored in the file or for all instants if the file has no transitions. + The POSIX-style TZ string is empty (i.e., nothing between the newlines) + if there is no POSIX-style representation for such instants. If + nonempty, the POSIX-style TZ string must agree with the local time type + after the last transition time if present in the eight-byte data; for + example, given the string "WET0WEST,M3.5.0/1,M10.5.0" then if a last + transition time is in July, the transition's local time type must + specify a daylight-saving time abbreviated "WEST" that is one hour east + of UT. Also, if there is at least one transition, time type 0 is + associated with the time period from the indefinite past up to but not + including the earliest transition time. + + Version 3 format + For version-3-format timezone files, the POSIX-TZ-style string may use + two minor extensions to the POSIX TZ format, as described in + newtzset(3). First, the hours part of its transition times may be + signed and range from -167 through 167 instead of the POSIX-required + unsigned values from 0 through 24. Second, DST is in effect all year + if it starts January 1 at 00:00 and ends December 31 at 24:00 plus the + difference between daylight saving and standard time. + + Version 4 format + For version-4-format TZif files, the first leap second record can have + a correction that is neither +1 nor -1, to represent truncation of the + TZif file at the start. Also, if two or more leap second transitions + are present and the last entry's correction equals the previous one, + the last entry denotes the expiration of the leap second table instead + of a leap second; timestamps after this expiration are unreliable in + that future releases will likely add leap second entries after the + expiration, and the added leap seconds will change how post-expiration + timestamps are treated. + + Interoperability considerations + Future changes to the format may append more data. + + Version 1 files are considered a legacy format and should not be + generated, as they do not support transition times after the year 2038. + Readers that understand only Version 1 must ignore any data that + extends beyond the calculated end of the version 1 data block. + + Other than version 1, writers should generate the lowest version number + needed by a file's data. For example, a writer should generate a + version 4 file only if its leap second table either expires or is + truncated at the start. Likewise, a writer not generating a version 4 + file should generate a version 3 file only if TZ string extensions are + necessary to accurately model transition times. + + The sequence of time changes defined by the version 1 header and data + block should be a contiguous sub-sequence of the time changes defined + by the version 2+ header and data block, and by the footer. This + guideline helps obsolescent version 1 readers agree with current + readers about timestamps within the contiguous sub-sequence. It also + lets writers not supporting obsolescent readers use a tzh_timecnt of + zero in the version 1 data block to save space. + + When a TZif file contains a leap second table expiration time, TZif + readers should either refuse to process post-expiration timestamps, or + process them as if the expiration time did not exist (possibly with an + error indication). + + Time zone designations should consist of at least three (3) and no more + than six (6) ASCII characters from the set of alphanumerics, "-", and + "+". This is for compatibility with POSIX requirements for time zone + abbreviations. + + When reading a version 2 or higher file, readers should ignore the + version 1 header and data block except for the purpose of skipping over + them. + + Readers should calculate the total lengths of the headers and data + blocks and check that they all fit within the actual file size, as part + of a validity check for the file. + + When a positive leap second occurs, readers should append an extra + second to the local minute containing the second just before the leap + second. If this occurs when the UTC offset is not a multiple of 60 + seconds, the leap second occurs earlier than the last second of the + local minute and the minute's remaining local seconds are numbered + through 60 instead of the usual 59; the UTC offset is unaffected. + + Common interoperability issues + This section documents common problems in reading or writing TZif + files. Most of these are problems in generating TZif files for use by + older readers. The goals of this section are: + + * to help TZif writers output files that avoid common pitfalls in older + or buggy TZif readers, + + * to help TZif readers avoid common pitfalls when reading files + generated by future TZif writers, and + + * to help any future specification authors see what sort of problems + arise when the TZif format is changed. + + When new versions of the TZif format have been defined, a design goal + has been that a reader can successfully use a TZif file even if the + file is of a later TZif version than what the reader was designed for. + When complete compatibility was not achieved, an attempt was made to + limit glitches to rarely used timestamps and allow simple partial + workarounds in writers designed to generate new-version data useful + even for older-version readers. This section attempts to document + these compatibility issues and workarounds, as well as to document + other common bugs in readers. + + Interoperability problems with TZif include the following: + + * Some readers examine only version 1 data. As a partial workaround, a + writer can output as much version 1 data as possible. However, a + reader should ignore version 1 data, and should use version 2+ data + even if the reader's native timestamps have only 32 bits. + + * Some readers designed for version 2 might mishandle timestamps after + a version 3 or higher file's last transition, because they cannot + parse extensions to POSIX in the TZ-like string. As a partial + workaround, a writer can output more transitions than necessary, so + that only far-future timestamps are mishandled by version 2 readers. + + * Some readers designed for version 2 do not support permanent daylight + saving time with transitions after 24:00 - e.g., a TZ string + "EST5EDT,0/0,J365/25" denoting permanent Eastern Daylight Time (-04). + As a workaround, a writer can substitute standard time for two time + zones east, e.g., "XXX3EDT4,0/0,J365/23" for a time zone with a + never-used standard time (XXX, -03) and negative daylight saving time + (EDT, -04) all year. Alternatively, as a partial workaround a writer + can substitute standard time for the next time zone east - e.g., + "AST4" for permanent Atlantic Standard Time (-04). + + * Some readers designed for version 2 or 3, and that require strict + conformance to RFC 8536, reject version 4 files whose leap second + tables are truncated at the start or that end in expiration times. + + * Some readers ignore the footer, and instead predict future timestamps + from the time type of the last transition. As a partial workaround, + a writer can output more transitions than necessary. + + * Some readers do not use time type 0 for timestamps before the first + transition, in that they infer a time type using a heuristic that + does not always select time type 0. As a partial workaround, a + writer can output a dummy (no-op) first transition at an early time. + + * Some readers mishandle timestamps before the first transition that + has a timestamp not less than -2**31. Readers that support only + 32-bit timestamps are likely to be more prone to this problem, for + example, when they process 64-bit transitions only some of which are + representable in 32 bits. As a partial workaround, a writer can + output a dummy transition at timestamp -2**31. + + * Some readers mishandle a transition if its timestamp has the minimum + possible signed 64-bit value. Timestamps less than -2**59 are not + recommended. + + * Some readers mishandle POSIX-style TZ strings that contain "<" or + ">". As a partial workaround, a writer can avoid using "<" or ">" + for time zone abbreviations containing only alphabetic characters. + + * Many readers mishandle time zone abbreviations that contain non-ASCII + characters. These characters are not recommended. + + * Some readers may mishandle time zone abbreviations that contain fewer + than 3 or more than 6 characters, or that contain ASCII characters + other than alphanumerics, "-", and "+". These abbreviations are not + recommended. + + * Some readers mishandle TZif files that specify daylight-saving time + UT offsets that are less than the UT offsets for the corresponding + standard time. These readers do not support locations like Ireland, + which uses the equivalent of the POSIX TZ string + "IST-1GMT0,M10.5.0,M3.5.0/1", observing standard time (IST, +01) in + summer and daylight saving time (GMT, +00) in winter. As a partial + workaround, a writer can output data for the equivalent of the POSIX + TZ string "GMT0IST,M3.5.0/1,M10.5.0", thus swapping standard and + daylight saving time. Although this workaround misidentifies which + part of the year uses daylight saving time, it records UT offsets and + time zone abbreviations correctly. + + * Some readers generate ambiguous timestamps for positive leap seconds + that occur when the UTC offset is not a multiple of 60 seconds. For + example, in a timezone with UTC offset +01:23:45 and with a positive + leap second 78796801 (1972-06-30 23:59:60 UTC), some readers will map + both 78796800 and 78796801 to 01:23:45 local time the next day + instead of mapping the latter to 01:23:46, and they will map 78796815 + to 01:23:59 instead of to 01:23:60. This has not yet been a + practical problem, since no civil authority has observed such UTC + offsets since leap seconds were introduced in 1972. + + Some interoperability problems are reader bugs that are listed here + mostly as warnings to developers of readers. + + * Some readers do not support negative timestamps. Developers of + distributed applications should keep this in mind if they need to + deal with pre-1970 data. + + * Some readers mishandle timestamps before the first transition that + has a nonnegative timestamp. Readers that do not support negative + timestamps are likely to be more prone to this problem. + + * Some readers mishandle time zone abbreviations like "-08" that + contain "+", "-", or digits. + + * Some readers mishandle UT offsets that are out of the traditional + range of -12 through +12 hours, and so do not support locations like + Kiritimati that are outside this range. + + * Some readers mishandle UT offsets in the range [-3599, -1] seconds + from UT, because they integer-divide the offset by 3600 to get 0 and + then display the hour part as "+00". + + * Some readers mishandle UT offsets that are not a multiple of one + hour, or of 15 minutes, or of 1 minute. + +SEE ALSO + time(2), localtime(3), tzset(3), tzselect(8), zdump(8), zic(8). + + Olson A, Eggert P, Murchison K. The Time Zone Information Format + (TZif). 2019 Feb. Internet RFC 8536 doi:10.17487/RFC8536 . + +Time Zone Database tzfile(5) diff --git a/lib-tzcode/tzfile.h b/lib-tzcode/tzfile.h new file mode 100644 index 0000000..9cbdcff --- /dev/null +++ b/lib-tzcode/tzfile.h @@ -0,0 +1,127 @@ +/* Layout and location of TZif files. */ + +#ifndef TZFILE_H + +#define TZFILE_H + +/* +** This file is in the public domain, so clarified as of +** 1996-06-05 by Arthur David Olson. +*/ + +/* +** This header is for use ONLY with the time conversion code. +** There is no guarantee that it will remain unchanged, +** or that it will remain at all. +** Do NOT copy it to any system include directory. +** Thank you! +*/ + +/* +** Information about time zone files. +*/ + +#ifndef TZDIR +# define TZDIR "/usr/share/zoneinfo" /* Time zone object file directory */ +#endif /* !defined TZDIR */ + +#ifndef TZDEFAULT +# define TZDEFAULT "/etc/localtime" +#endif /* !defined TZDEFAULT */ + +#ifndef TZDEFRULES +# define TZDEFRULES "posixrules" +#endif /* !defined TZDEFRULES */ + + +/* See Internet RFC 8536 for more details about the following format. */ + +/* +** Each file begins with. . . +*/ + +#define TZ_MAGIC "TZif" + +struct tzhead { + char tzh_magic[4]; /* TZ_MAGIC */ + char tzh_version[1]; /* '\0' or '2'-'4' as of 2021 */ + char tzh_reserved[15]; /* reserved; must be zero */ + char tzh_ttisutcnt[4]; /* coded number of trans. time flags */ + char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ + char tzh_leapcnt[4]; /* coded number of leap seconds */ + char tzh_timecnt[4]; /* coded number of transition times */ + char tzh_typecnt[4]; /* coded number of local time types */ + char tzh_charcnt[4]; /* coded number of abbr. chars */ +}; + +/* +** . . .followed by. . . +** +** tzh_timecnt (char [4])s coded transition times a la time(2) +** tzh_timecnt (unsigned char)s types of local time starting at above +** tzh_typecnt repetitions of +** one (char [4]) coded UT offset in seconds +** one (unsigned char) used to set tm_isdst +** one (unsigned char) that's an abbreviation list index +** tzh_charcnt (char)s '\0'-terminated zone abbreviations +** tzh_leapcnt repetitions of +** one (char [4]) coded leap second transition times +** one (char [4]) total correction after above +** tzh_ttisstdcnt (char)s indexed by type; if 1, transition +** time is standard time, if 0, +** transition time is local (wall clock) +** time; if absent, transition times are +** assumed to be local time +** tzh_ttisutcnt (char)s indexed by type; if 1, transition +** time is UT, if 0, transition time is +** local time; if absent, transition +** times are assumed to be local time. +** When this is 1, the corresponding +** std/wall indicator must also be 1. +*/ + +/* +** If tzh_version is '2' or greater, the above is followed by a second instance +** of tzhead and a second instance of the data in which each coded transition +** time uses 8 rather than 4 chars, +** then a POSIX-TZ-environment-variable-style string for use in handling +** instants after the last transition time stored in the file +** (with nothing between the newlines if there is no POSIX representation for +** such instants). +** +** If tz_version is '3' or greater, the above is extended as follows. +** First, the POSIX TZ string's hour offset may range from -167 +** through 167 as compared to the POSIX-required 0 through 24. +** Second, its DST start time may be January 1 at 00:00 and its stop +** time December 31 at 24:00 plus the difference between DST and +** standard time, indicating DST all year. +*/ + +/* +** In the current implementation, "tzset()" refuses to deal with files that +** exceed any of the limits below. +*/ + +#ifndef TZ_MAX_TIMES +/* This must be at least 242 for Europe/London with 'zic -b fat'. */ +# define TZ_MAX_TIMES 2000 +#endif /* !defined TZ_MAX_TIMES */ + +#ifndef TZ_MAX_TYPES +/* This must be at least 18 for Europe/Vilnius with 'zic -b fat'. */ +# define TZ_MAX_TYPES 256 /* Limited by what (unsigned char)'s can hold */ +#endif /* !defined TZ_MAX_TYPES */ + +#ifndef TZ_MAX_CHARS +/* This must be at least 40 for America/Anchorage. */ +# define TZ_MAX_CHARS 50 /* Maximum number of abbreviation characters */ + /* (limited by what unsigned chars can hold) */ +#endif /* !defined TZ_MAX_CHARS */ + +#ifndef TZ_MAX_LEAPS +/* This must be at least 27 for leap seconds from 1972 through mid-2023. + There's a plan to discontinue leap seconds by 2035. */ +# define TZ_MAX_LEAPS 50 /* Maximum number of leap second corrections */ +#endif /* !defined TZ_MAX_LEAPS */ + +#endif /* !defined TZFILE_H */ diff --git a/lib-tzcode/tzselect.8 b/lib-tzcode/tzselect.8 new file mode 100644 index 0000000..4578090 --- /dev/null +++ b/lib-tzcode/tzselect.8 @@ -0,0 +1,125 @@ +.\" This file is in the public domain, so clarified as of +.\" 2009-05-17 by Arthur David Olson. +.TH tzselect 8 "" "Time Zone Database" +.SH NAME +tzselect \- select a timezone +.SH SYNOPSIS +.ie \n(.g .ds - \f(CR-\fP +.el .ds - \- +.ds d " degrees +.ds m " minutes +.ds s " seconds +.ds _ " \& +.if t \{\ +. if \n(.g .if c \(de .if c \(fm .if c \(sd \{\ +. ds d \(de +. ds m \(fm +. ds s \(sd +. ds _ \| +. \} +.\} +.B tzselect +[ +.B \*-c +.I coord +] [ +.B \*-n +.I limit +] [ +.B \*-\*-help +] [ +.B \*-\*-version +] +.SH DESCRIPTION +The +.B tzselect +program asks the user for information about the current location, +and outputs the resulting timezone to standard output. +The output is suitable as a value for the TZ environment variable. +.PP +All interaction with the user is done via standard input and standard error. +.SH OPTIONS +.TP +.BI "\*-c " coord +Instead of asking for continent and then country and then city, +ask for selection from time zones whose largest cities +are closest to the location with geographical coordinates +.I coord. +Use ISO 6709 notation for +.I coord, +that is, a latitude immediately followed by a longitude. The latitude +and longitude should be signed integers followed by an optional +decimal point and fraction: positive numbers represent north and east, +negative south and west. Latitudes with two and longitudes with three +integer digits are treated as degrees; latitudes with four or six and +longitudes with five or seven integer digits are treated as +.I "DDMM, DDDMM, DDMMSS," +or +.I DDDMMSS +representing +.I DD +or +.I DDD +degrees, +.I MM +minutes, +and zero or +.I SS +seconds, with any trailing fractions represent fractional minutes or +(if +.I SS +is present) seconds. The decimal point is that of the current locale. +For example, in the (default) C locale, +.B "\*-c\ +40.689\*-074.045" +specifies 40.689\*d\*_N, 74.045\*d\*_W, +.B "\*-c\ +4041.4\*-07402.7" +specifies 40\*d\*_41.4\*m\*_N, 74\*d\*_2.7\*m\*_W, and +.B "\*-c\ +404121\*-0740240" +specifies 40\*d\*_41\*m\*_21\*s\*_N, 74\*d\*_2\*m\*_40\*s\*_W. +If +.I coord +is not one of the documented forms, the resulting behavior is unspecified. +.TP +.BI "\*-n " limit +When +.B \*-c +is used, display the closest +.I limit +locations (default 10). +.TP +.B "\*-\*-help" +Output help information and exit. +.TP +.B "\*-\*-version" +Output version information and exit. +.SH "ENVIRONMENT VARIABLES" +.TP +\f3AWK\fP +Name of a Posix-compliant +.B awk +program (default: +.BR awk ). +.TP +\f3TZDIR\fP +Name of the directory containing timezone data files (default: +.BR /usr/share/zoneinfo ). +.SH FILES +.TP +\f2TZDIR\fP\f3/iso3166.tab\fP +Table of ISO 3166 2-letter country codes and country names. +.TP +\f2TZDIR\fP\f3/zone1970.tab\fP +Table of country codes, latitude and longitude, timezones, and +descriptive comments. +.TP +\f2TZDIR\fP\f3/\fP\f2TZ\fP +Timezone data file for timezone \f2TZ\fP. +.SH "EXIT STATUS" +The exit status is zero if a timezone was successfully obtained from the user, +nonzero otherwise. +.SH "SEE ALSO" +newctime(3), tzfile(5), zdump(8), zic(8) +.SH NOTES +Applications should not assume that +.BR tzselect 's +output matches the user's political preferences. diff --git a/lib-tzcode/tzselect.8.txt b/lib-tzcode/tzselect.8.txt new file mode 100644 index 0000000..1b626ab --- /dev/null +++ b/lib-tzcode/tzselect.8.txt @@ -0,0 +1,77 @@ +tzselect(8) System Manager's Manual tzselect(8) + +NAME + tzselect - select a timezone + +SYNOPSIS + tzselect [ -c coord ] [ -n limit ] [ --help ] [ --version ] + +DESCRIPTION + The tzselect program asks the user for information about the current + location, and outputs the resulting timezone to standard output. The + output is suitable as a value for the TZ environment variable. + + All interaction with the user is done via standard input and standard + error. + +OPTIONS + -c coord + Instead of asking for continent and then country and then city, + ask for selection from time zones whose largest cities are + closest to the location with geographical coordinates coord. + Use ISO 6709 notation for coord, that is, a latitude immediately + followed by a longitude. The latitude and longitude should be + signed integers followed by an optional decimal point and + fraction: positive numbers represent north and east, negative + south and west. Latitudes with two and longitudes with three + integer digits are treated as degrees; latitudes with four or + six and longitudes with five or seven integer digits are treated + as DDMM, DDDMM, DDMMSS, or DDDMMSS representing DD or DDD + degrees, MM minutes, and zero or SS seconds, with any trailing + fractions represent fractional minutes or (if SS is present) + seconds. The decimal point is that of the current locale. For + example, in the (default) C locale, -c +40.689-074.045 specifies + 40.689 degrees N, 74.045 degrees W, -c +4041.4-07402.7 specifies + 40 degrees 41.4 minutes N, 74 degrees 2.7 minutes W, and + -c +404121-0740240 specifies 40 degrees 41 minutes 21 seconds N, + 74 degrees 2 minutes 40 seconds W. If coord is not one of the + documented forms, the resulting behavior is unspecified. + + -n limit + When -c is used, display the closest limit locations (default + 10). + + --help Output help information and exit. + + --version + Output version information and exit. + +ENVIRONMENT VARIABLES + AWK Name of a Posix-compliant awk program (default: awk). + + TZDIR Name of the directory containing timezone data files (default: + /usr/share/zoneinfo). + +FILES + TZDIR/iso3166.tab + Table of ISO 3166 2-letter country codes and country names. + + TZDIR/zone1970.tab + Table of country codes, latitude and longitude, timezones, and + descriptive comments. + + TZDIR/TZ + Timezone data file for timezone TZ. + +EXIT STATUS + The exit status is zero if a timezone was successfully obtained from + the user, nonzero otherwise. + +SEE ALSO + newctime(3), tzfile(5), zdump(8), zic(8) + +NOTES + Applications should not assume that tzselect's output matches the + user's political preferences. + +Time Zone Database tzselect(8) diff --git a/lib-tzcode/tzselect.ksh b/lib-tzcode/tzselect.ksh new file mode 100644 index 0000000..9a91acf --- /dev/null +++ b/lib-tzcode/tzselect.ksh @@ -0,0 +1,693 @@ +#!/bin/bash +# Ask the user about the time zone, and output the resulting TZ value to stdout. +# Interact with the user via stderr and stdin. + +PKGVERSION='(tzcode) ' +TZVERSION=see_Makefile +REPORT_BUGS_TO=tz@iana.org + +# Contributed by Paul Eggert. This file is in the public domain. + +# Porting notes: +# +# This script requires a Posix-like shell and prefers the extension of a +# 'select' statement. The 'select' statement was introduced in the +# Korn shell and is available in Bash and other shell implementations. +# If your host lacks both Bash and the Korn shell, you can get their +# source from one of these locations: +# +# Bash +# Korn Shell +# MirBSD Korn Shell +# +# For portability to Solaris 10 /bin/sh (supported by Oracle through +# January 2024) this script avoids some POSIX features and common +# extensions, such as $(...) (which works sometimes but not others), +# $((...)), ! CMD, ${#ID}, ${ID##PAT}, ${ID%%PAT}, and $10. + +# +# This script also uses several features of modern awk programs. +# If your host lacks awk, or has an old awk that does not conform to Posix, +# you can use either of the following free programs instead: +# +# Gawk (GNU awk) +# mawk +# nawk + + +# Specify default values for environment variables if they are unset. +: ${AWK=awk} +: ${TZDIR=`pwd`} + +# Output one argument as-is to standard output, with trailing newline. +# Safer than 'echo', which can mishandle '\' or leading '-'. +say() { + printf '%s\n' "$1" +} + +# Check for awk Posix compliance. +($AWK -v x=y 'BEGIN { exit 123 }') /dev/null 2>&1 +[ $? = 123 ] || { + say >&2 "$0: Sorry, your '$AWK' program is not Posix compatible." + exit 1 +} + +coord= +location_limit=10 +zonetabtype=zone1970 + +usage="Usage: tzselect [--version] [--help] [-c COORD] [-n LIMIT] +Select a timezone interactively. + +Options: + + -c COORD + Instead of asking for continent and then country and then city, + ask for selection from time zones whose largest cities + are closest to the location with geographical coordinates COORD. + COORD should use ISO 6709 notation, for example, '-c +4852+00220' + for Paris (in degrees and minutes, North and East), or + '-c -35-058' for Buenos Aires (in degrees, South and West). + + -n LIMIT + Display at most LIMIT locations when -c is used (default $location_limit). + + --version + Output version information. + + --help + Output this help. + +Report bugs to $REPORT_BUGS_TO." + +# Ask the user to select from the function's arguments, +# and assign the selected argument to the variable 'select_result'. +# Exit on EOF or I/O error. Use the shell's nicer 'select' builtin if +# available, falling back on a portable substitute otherwise. +if + case $BASH_VERSION in + ?*) : ;; + '') + # '; exit' should be redundant, but Dash doesn't properly fail without it. + (eval 'set --; select x; do break; done; exit') /dev/null + esac +then + # Do this inside 'eval', as otherwise the shell might exit when parsing it + # even though it is never executed. + eval ' + doselect() { + select select_result + do + case $select_result in + "") echo >&2 "Please enter a number in range." ;; + ?*) break + esac + done || exit + } + ' +else + doselect() { + # Field width of the prompt numbers. + select_width=`expr $# : '.*'` + + select_i= + + while : + do + case $select_i in + '') + select_i=0 + for select_word + do + select_i=`expr $select_i + 1` + printf >&2 "%${select_width}d) %s\\n" $select_i "$select_word" + done ;; + *[!0-9]*) + echo >&2 'Please enter a number in range.' ;; + *) + if test 1 -le $select_i && test $select_i -le $#; then + shift `expr $select_i - 1` + select_result=$1 + break + fi + echo >&2 'Please enter a number in range.' + esac + + # Prompt and read input. + printf >&2 %s "${PS3-#? }" + read select_i || exit + done + } +fi + +while getopts c:n:t:-: opt +do + case $opt$OPTARG in + c*) + coord=$OPTARG ;; + n*) + location_limit=$OPTARG ;; + t*) # Undocumented option, used for developer testing. + zonetabtype=$OPTARG ;; + -help) + exec echo "$usage" ;; + -version) + exec echo "tzselect $PKGVERSION$TZVERSION" ;; + -*) + say >&2 "$0: -$opt$OPTARG: unknown option; try '$0 --help'"; exit 1 ;; + *) + say >&2 "$0: try '$0 --help'"; exit 1 ;; + esac +done + +shift `expr $OPTIND - 1` +case $# in +0) ;; +*) say >&2 "$0: $1: unknown argument"; exit 1 ;; +esac + +# Make sure the tables are readable. +TZ_COUNTRY_TABLE=$TZDIR/iso3166.tab +TZ_ZONE_TABLE=$TZDIR/$zonetabtype.tab +for f in $TZ_COUNTRY_TABLE $TZ_ZONE_TABLE +do + <"$f" || { + say >&2 "$0: time zone files are not set up correctly" + exit 1 + } +done + +# If the current locale does not support UTF-8, convert data to current +# locale's format if possible, as the shell aligns columns better that way. +# Check the UTF-8 of U+12345 CUNEIFORM SIGN URU TIMES KI. +$AWK 'BEGIN { u12345 = "\360\222\215\205"; exit length(u12345) != 1 }' || { + { tmp=`(mktemp -d) 2>/dev/null` || { + tmp=${TMPDIR-/tmp}/tzselect.$$ && + (umask 77 && mkdir -- "$tmp") + };} && + trap 'status=$?; rm -fr -- "$tmp"; exit $status' 0 HUP INT PIPE TERM && + (iconv -f UTF-8 -t //TRANSLIT <"$TZ_COUNTRY_TABLE" >$tmp/iso3166.tab) \ + 2>/dev/null && + TZ_COUNTRY_TABLE=$tmp/iso3166.tab && + iconv -f UTF-8 -t //TRANSLIT <"$TZ_ZONE_TABLE" >$tmp/$zonetabtype.tab && + TZ_ZONE_TABLE=$tmp/$zonetabtype.tab +} + +newline=' +' +IFS=$newline + +# Awk script to output a country list. +output_country_list=' + BEGIN { FS = "\t" } + /^#$/ { next } + /^#[^@]/ { next } + { + commentary = $0 ~ /^#@/ + if (commentary) { + col1ccs = substr($1, 3) + conts = $2 + } else { + col1ccs = $1 + conts = $3 + } + ncc = split(col1ccs, cc, /,/) + ncont = split(conts, cont, /,/) + for (i = 1; i <= ncc; i++) { + elsewhere = commentary + for (ci = 1; ci <= ncont; ci++) { + if (cont[ci] ~ continent_re) { + if (!cc_seen[cc[i]]++) cc_list[++ccs] = cc[i] + elsewhere = 0 + } + } + if (elsewhere) { + for (i = 1; i <= ncc; i++) { + cc_elsewhere[cc[i]] = 1 + } + } + } + } + END { + while (getline &2 'Please identify a location' \ + 'so that time zone rules can be set correctly.' + + continent= + country= + region= + + case $coord in + ?*) + continent=coord;; + '') + + # Ask the user for continent or ocean. + + echo >&2 'Please select a continent, ocean, "coord", "TZ", or "time".' + + quoted_continents=` + $AWK ' + function handle_entry(entry) { + entry = substr(entry, 1, index(entry, "/") - 1) + if (entry == "America") + entry = entry "s" + if (entry ~ /^(Arctic|Atlantic|Indian|Pacific)$/) + entry = entry " Ocean" + printf "'\''%s'\''\n", entry + } + BEGIN { FS = "\t" } + /^[^#]/ { + handle_entry($3) + } + /^#@/ { + ncont = split($2, cont, /,/) + for (ci = 1; ci <= ncont; ci++) { + handle_entry(cont[ci]) + } + } + ' <"$TZ_ZONE_TABLE" | + sort -u | + tr '\n' ' ' + echo '' + ` + + eval ' + doselect '"$quoted_continents"' \ + "coord - I want to use geographical coordinates." \ + "TZ - I want to specify the timezone using the Posix TZ format." \ + "time - I know local time already." + continent=$select_result + case $continent in + Americas) continent=America;; + *" "*) continent=`expr "$continent" : '\''\([^ ]*\)'\''` + esac + ' + esac + + case $continent in + TZ) + # Ask the user for a Posix TZ string. Check that it conforms. + while + echo >&2 'Please enter the desired value' \ + 'of the TZ environment variable.' + echo >&2 'For example, AEST-10 is abbreviated' \ + 'AEST and is 10 hours' + echo >&2 'ahead (east) of Greenwich,' \ + 'with no daylight saving time.' + read TZ + $AWK -v TZ="$TZ" 'BEGIN { + tzname = "(<[[:alnum:]+-]{3,}>|[[:alpha:]]{3,})" + time = "(2[0-4]|[0-1]?[0-9])" \ + "(:[0-5][0-9](:[0-5][0-9])?)?" + offset = "[-+]?" time + mdate = "M([1-9]|1[0-2])\\.[1-5]\\.[0-6]" + jdate = "((J[1-9]|[0-9]|J?[1-9][0-9]" \ + "|J?[1-2][0-9][0-9])|J?3[0-5][0-9]|J?36[0-5])" + datetime = ",(" mdate "|" jdate ")(/" time ")?" + tzpattern = "^(:.*|" tzname offset "(" tzname \ + "(" offset ")?(" datetime datetime ")?)?)$" + if (TZ ~ tzpattern) exit 1 + exit 0 + }' + do + say >&2 "'$TZ' is not a conforming Posix timezone string." + done + TZ_for_date=$TZ;; + *) + case $continent in + coord) + case $coord in + '') + echo >&2 'Please enter coordinates' \ + 'in ISO 6709 notation.' + echo >&2 'For example, +4042-07403 stands for' + echo >&2 '40 degrees 42 minutes north,' \ + '74 degrees 3 minutes west.' + read coord;; + esac + distance_table=`$AWK \ + -v coord="$coord" \ + -v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \ + "$output_distances_or_times" <"$TZ_ZONE_TABLE" | + sort -n | + sed "${location_limit}q" + ` + regions=`$AWK \ + -v distance_table="$distance_table" ' + BEGIN { + nlines = split(distance_table, line, /\n/) + for (nr = 1; nr <= nlines; nr++) { + nf = split(line[nr], f, /\t/) + print f[nf] + } + } + '` + echo >&2 'Please select one of the following timezones,' + echo >&2 'listed roughly in increasing order' \ + "of distance from $coord". + doselect $regions + region=$select_result + TZ=`$AWK \ + -v distance_table="$distance_table" \ + -v region="$region" ' + BEGIN { + nlines = split(distance_table, line, /\n/) + for (nr = 1; nr <= nlines; nr++) { + nf = split(line[nr], f, /\t/) + if (f[nf] == region) { + print f[4] + } + } + } + '` + ;; + *) + case $continent in + time) + minute_format='%a %b %d %H:%M' + old_minute=`TZ=UTC0 date +"$minute_format"` + for i in 1 2 3 + do + time_table_command=` + $AWK -v output_times=1 \ + "$output_distances_or_times" <"$TZ_ZONE_TABLE" + ` + time_table=`eval "$time_table_command"` + new_minute=`TZ=UTC0 date +"$minute_format"` + case $old_minute in + "$new_minute") break;; + esac + old_minute=$new_minute + done + echo >&2 "The system says Universal Time is $new_minute." + echo >&2 "Assuming that's correct, what is the local time?" + eval doselect ` + say "$time_table" | + sort -k2n -k2,5 -k1n | + $AWK '{ + line = $6 " " $7 " " $4 " " $5 + if (line == oldline) next + oldline = line + gsub(/'\''/, "&\\\\&&", line) + printf "'\''%s'\''\n", line + }' + ` + time=$select_result + zone_table=` + say "$time_table" | + $AWK -v time="$time" '{ + if ($6 " " $7 " " $4 " " $5 == time) { + sub(/[^\t]*\t/, "") + print + } + }' + ` + countries=` + say "$zone_table" | + $AWK \ + -v continent_re='' \ + -v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \ + "$output_country_list" | + sort -f + ` + ;; + *) + zone_table=file + # Get list of names of countries in the continent or ocean. + countries=`$AWK \ + -v continent_re="^$continent/" \ + -v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \ + "$output_country_list" \ + <"$TZ_ZONE_TABLE" | sort -f + `;; + esac + + # If there's more than one country, ask the user which one. + case $countries in + *"$newline"*) + echo >&2 'Please select a country' \ + 'whose clocks agree with yours.' + doselect $countries + country_result=$select_result + country=$select_result;; + *) + country=$countries + esac + + + # Get list of timezones in the country. + regions=` + case $zone_table in + file) cat -- "$TZ_ZONE_TABLE";; + *) say "$zone_table";; + esac | + $AWK \ + -v country="$country" \ + -v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \ + ' + BEGIN { + FS = "\t" + cc = country + while (getline &2 'Please select one of the following timezones.' + doselect $regions + region=$select_result + esac + + # Determine TZ from country and region. + TZ=` + case $zone_table in + file) cat -- "$TZ_ZONE_TABLE";; + *) say "$zone_table";; + esac | + $AWK \ + -v country="$country" \ + -v region="$region" \ + -v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \ + ' + BEGIN { + FS = "\t" + cc = country + while (getline &2 "$0: time zone files are not set up correctly" + exit 1 + } + esac + + + # Use the proposed TZ to output the current date relative to UTC. + # Loop until they agree in seconds. + # Give up after 8 unsuccessful tries. + + extra_info= + for i in 1 2 3 4 5 6 7 8 + do + TZdate=`LANG=C TZ="$TZ_for_date" date` + UTdate=`LANG=C TZ=UTC0 date` + TZsec=`expr "$TZdate" : '.*:\([0-5][0-9]\)'` + UTsec=`expr "$UTdate" : '.*:\([0-5][0-9]\)'` + case $TZsec in + $UTsec) + extra_info=" +Selected time is now: $TZdate. +Universal Time is now: $UTdate." + break + esac + done + + + # Output TZ info and ask the user to confirm. + + echo >&2 "" + echo >&2 "Based on the following information:" + echo >&2 "" + case $time%$country_result%$region%$coord in + ?*%?*%?*%) + say >&2 " $time$newline $country_result$newline $region";; + ?*%?*%%|?*%%?*%) say >&2 " $time$newline $country_result$region";; + ?*%%%) say >&2 " $time";; + %?*%?*%) say >&2 " $country_result$newline $region";; + %?*%%) say >&2 " $country_result";; + %%?*%?*) say >&2 " coord $coord$newline $region";; + %%%?*) say >&2 " coord $coord";; + *) say >&2 " TZ='$TZ'" + esac + say >&2 "" + say >&2 "TZ='$TZ' will be used.$extra_info" + say >&2 "Is the above information OK?" + + doselect Yes No + ok=$select_result + case $ok in + Yes) break + esac +do coord= +done + +case $SHELL in +*csh) file=.login line="setenv TZ '$TZ'";; +*) file=.profile line="TZ='$TZ'; export TZ" +esac + +test -t 1 && say >&2 " +You can make this change permanent for yourself by appending the line + $line +to the file '$file' in your home directory; then log out and log in again. + +Here is that TZ value again, this time on standard output so that you +can use the $0 command in shell scripts:" + +say "$TZ" diff --git a/lib-tzcode/version b/lib-tzcode/version new file mode 100644 index 0000000..7daa77e --- /dev/null +++ b/lib-tzcode/version @@ -0,0 +1 @@ +2023c diff --git a/lib-tzcode/workman.sh b/lib-tzcode/workman.sh new file mode 100644 index 0000000..6e2da3a --- /dev/null +++ b/lib-tzcode/workman.sh @@ -0,0 +1,41 @@ +#! /bin/sh +# Convert manual page troff stdin to formatted .txt stdout. + +# This file is in the public domain, so clarified as of +# 2009-05-17 by Arthur David Olson. + +if (type nroff && type perl) >/dev/null 2>&1; then + + # Tell groff not to emit SGR escape sequences (ANSI color escapes). + GROFF_NO_SGR=1 + export GROFF_NO_SGR + + echo ".am TH +.hy 0 +.na +.. +.rm }H +.rm }F" | nroff -man - ${1+"$@"} | perl -ne ' + binmode STDIN, '\'':encoding(utf8)'\''; + binmode STDOUT, '\'':encoding(utf8)'\''; + chomp; + s/.\010//g; + s/\s*$//; + if (/^$/) { + $sawblank = 1; + next; + } else { + if ($sawblank && $didprint) { + print "\n"; + $sawblank = 0; + } + print "$_\n"; + $didprint = 1; + } + ' +elif (type mandoc && type col) >/dev/null 2>&1; then + mandoc -man -T ascii "$@" | col -bx +else + echo >&2 "$0: please install nroff and perl, or mandoc and col" + exit 1 +fi diff --git a/lib-tzcode/zdump.8 b/lib-tzcode/zdump.8 new file mode 100644 index 0000000..f77c0c7 --- /dev/null +++ b/lib-tzcode/zdump.8 @@ -0,0 +1,231 @@ +.\" This file is in the public domain, so clarified as of +.\" 2009-05-17 by Arthur David Olson. +.TH zdump 8 "" "Time Zone Database" +.SH NAME +zdump \- timezone dumper +.SH SYNOPSIS +.B zdump +[ +.I option +\&... ] [ +.I timezone +\&... ] +.SH DESCRIPTION +.ie '\(lq'' .ds lq \&"\" +.el .ds lq \(lq\" +.ie '\(rq'' .ds rq \&"\" +.el .ds rq \(rq\" +.de q +\\$3\*(lq\\$1\*(rq\\$2 +.. +.ie \n(.g .ds - \f(CR-\fP +.el .ds - \- +The +.B zdump +program prints the current time in each +.I timezone +named on the command line. +.SH OPTIONS +.TP +.B \*-\*-version +Output version information and exit. +.TP +.B \*-\*-help +Output short usage message and exit. +.TP +.B \*-i +Output a description of time intervals. For each +.I timezone +on the command line, output an interval-format description of the +timezone. See +.q "INTERVAL FORMAT" +below. +.TP +.B \*-v +Output a verbose description of time intervals. +For each +.I timezone +on the command line, +print the times at the two extreme time values, +the times (if present) at and just beyond the boundaries of years that +.BR localtime (3) +and +.BR gmtime (3) +can represent, and +the times both one second before and exactly at +each detected time discontinuity. +Each line is followed by +.BI isdst= D +where +.I D +is positive, zero, or negative depending on whether +the given time is daylight saving time, standard time, +or an unknown time type, respectively. +Each line is also followed by +.BI gmtoff= N +if the given local time is known to be +.I N +seconds east of Greenwich. +.TP +.B \*-V +Like +.BR \*-v , +except omit output concerning extreme time and year values. +This generates output that is easier to compare to that of +implementations with different time representations. +.TP +.BI "\*-c " \fR[\fIloyear , \fR]\fIhiyear +Cut off interval output at the given year(s). +Cutoff times are computed using the proleptic Gregorian calendar with year 0 +and with Universal Time (UT) ignoring leap seconds. +Cutoffs are at the start of each year, where the lower-bound +timestamp is inclusive and the upper is exclusive; for example, +.B "\*-c 1970,2070" +selects transitions on or after 1970-01-01 00:00:00 UTC +and before 2070-01-01 00:00:00 UTC. +The default cutoff is +.BR \*-500,2500 . +.TP +.BI "\*-t " \fR[\fIlotime , \fR]\fIhitime +Cut off interval output at the given time(s), +given in decimal seconds since 1970-01-01 00:00:00 +Coordinated Universal Time (UTC). +The +.I timezone +determines whether the count includes leap seconds. +As with +.BR \*-c , +the cutoff's lower bound is inclusive and its upper bound is exclusive. +.SH "INTERVAL FORMAT" +The interval format is a compact text representation that is intended +to be both human- and machine-readable. It consists of an empty line, +then a line +.q "TZ=\fIstring\fP" +where +.I string +is a double-quoted string giving the timezone, a second line +.q "\*- \*- \fIinterval\fP" +describing the time interval before the first transition if any, and +zero or more following lines +.q "\fIdate time interval\fP", +one line for each transition time and following interval. Fields are +separated by single tabs. +.PP +Dates are in +.IR yyyy - mm - dd +format and times are in 24-hour +.IR hh : mm : ss +format where +.IR hh <24. +Times are in local time immediately after the transition. A +time interval description consists of a UT offset in signed +.RI \(+- hhmmss +format, a time zone abbreviation, and an isdst flag. An abbreviation +that equals the UT offset is omitted; other abbreviations are +double-quoted strings unless they consist of one or more alphabetic +characters. An isdst flag is omitted for standard time, and otherwise +is a decimal integer that is unsigned and positive (typically 1) for +daylight saving time and negative for unknown. +.PP +In times and in UT offsets with absolute value less than 100 hours, +the seconds are omitted if they are zero, and +the minutes are also omitted if they are also zero. Positive UT +offsets are east of Greenwich. The UT offset \*-00 denotes a UT +placeholder in areas where the actual offset is unspecified; by +convention, this occurs when the UT offset is zero and the time zone +abbreviation begins with +.q "\*-" +or is +.q "zzz". +.PP +In double-quoted strings, escape sequences represent unusual +characters. The escape sequences are \es for space, and \e", \e\e, +\ef, \en, \er, \et, and \ev with their usual meaning in the C +programming language. E.g., the double-quoted string +\*(lq"CET\es\e"\e\e"\*(rq represents the character sequence \*(lqCET +"\e\*(rq.\"" +.PP +.ne 9 +Here is an example of the output, with the leading empty line omitted. +(This example is shown with tab stops set far enough apart so that the +tabbed columns line up.) +.nf +.sp +.if \n(.g .ft CR +.if t .in +.5i +.if n .in +2 +.nr w \w'1896-01-13 'u+\n(.i +.ta \w'1896-01-13 'u +\w'12:01:26 'u +\w'-103126 'u +\w'HWT 'u +TZ="Pacific/Honolulu" +- - -103126 LMT +1896-01-13 12:01:26 -1030 HST +1933-04-30 03 -0930 HDT 1 +1933-05-21 11 -1030 HST +1942-02-09 03 -0930 HWT 1 +1945-08-14 13:30 -0930 HPT 1 +1945-09-30 01 -1030 HST +1947-06-08 02:30 -10 HST +.in +.if \n(.g .ft +.sp +.fi +Here, local time begins 10 hours, 31 minutes and 26 seconds west of +UT, and is a standard time abbreviated LMT. Immediately after the +first transition, the date is 1896-01-13 and the time is 12:01:26, and +the following time interval is 10.5 hours west of UT, a standard time +abbreviated HST. Immediately after the second transition, the date is +1933-04-30 and the time is 03:00:00 and the following time interval is +9.5 hours west of UT, is abbreviated HDT, and is daylight saving time. +Immediately after the last transition the date is 1947-06-08 and the +time is 02:30:00, and the following time interval is 10 hours west of +UT, a standard time abbreviated HST. +.PP +.ne 10 +Here are excerpts from another example: +.nf +.sp +.if \n(.g .ft CR +.if t .in +.5i +.if n .in +2 +TZ="Europe/Astrakhan" +- - +031212 LMT +1924-04-30 23:47:48 +03 +1930-06-21 01 +04 +1981-04-01 01 +05 1 +1981-09-30 23 +04 +\&... +2014-10-26 01 +03 +2016-03-27 03 +04 +.in +.if \n(.g .ft +.sp +.fi +This time zone is east of UT, so its UT offsets are positive. Also, +many of its time zone abbreviations are omitted since they duplicate +the text of the UT offset. +.SH LIMITATIONS +Time discontinuities are found by sampling the results returned by +.BR localtime (3) +at twelve-hour intervals. +This works in all real-world cases; +one can construct artificial time zones for which this fails. +.PP +In the +.B \*-v +and +.B \*-V +output, +.q "UT" +denotes the value returned by +.BR gmtime (3), +which uses UTC for modern timestamps and some other UT flavor for +timestamps that predate the introduction of UTC. +No attempt is currently made to have the output use +.q "UTC" +for newer and +.q "UT" +for older timestamps, partly because the exact date of the +introduction of UTC is problematic. +.SH SEE ALSO +.BR tzfile (5), +.BR zic (8) diff --git a/lib-tzcode/zdump.8.txt b/lib-tzcode/zdump.8.txt new file mode 100644 index 0000000..bcdd99b --- /dev/null +++ b/lib-tzcode/zdump.8.txt @@ -0,0 +1,144 @@ +zdump(8) System Manager's Manual zdump(8) + +NAME + zdump - timezone dumper + +SYNOPSIS + zdump [ option ... ] [ timezone ... ] + +DESCRIPTION + The zdump program prints the current time in each timezone named on the + command line. + +OPTIONS + --version + Output version information and exit. + + --help Output short usage message and exit. + + -i Output a description of time intervals. For each timezone on + the command line, output an interval-format description of the + timezone. See "INTERVAL FORMAT" below. + + -v Output a verbose description of time intervals. For each + timezone on the command line, print the times at the two extreme + time values, the times (if present) at and just beyond the + boundaries of years that localtime(3) and gmtime(3) can + represent, and the times both one second before and exactly at + each detected time discontinuity. Each line is followed by + isdst=D where D is positive, zero, or negative depending on + whether the given time is daylight saving time, standard time, + or an unknown time type, respectively. Each line is also + followed by gmtoff=N if the given local time is known to be N + seconds east of Greenwich. + + -V Like -v, except omit output concerning extreme time and year + values. This generates output that is easier to compare to that + of implementations with different time representations. + + -c [loyear,]hiyear + Cut off interval output at the given year(s). Cutoff times are + computed using the proleptic Gregorian calendar with year 0 and + with Universal Time (UT) ignoring leap seconds. Cutoffs are at + the start of each year, where the lower-bound timestamp is + inclusive and the upper is exclusive; for example, -c 1970,2070 + selects transitions on or after 1970-01-01 00:00:00 UTC and + before 2070-01-01 00:00:00 UTC. The default cutoff is + -500,2500. + + -t [lotime,]hitime + Cut off interval output at the given time(s), given in decimal + seconds since 1970-01-01 00:00:00 Coordinated Universal Time + (UTC). The timezone determines whether the count includes leap + seconds. As with -c, the cutoff's lower bound is inclusive and + its upper bound is exclusive. + +INTERVAL FORMAT + The interval format is a compact text representation that is intended + to be both human- and machine-readable. It consists of an empty line, + then a line "TZ=string" where string is a double-quoted string giving + the timezone, a second line "- - interval" describing the time interval + before the first transition if any, and zero or more following lines + "date time interval", one line for each transition time and following + interval. Fields are separated by single tabs. + + Dates are in yyyy-mm-dd format and times are in 24-hour hh:mm:ss format + where hh<24. Times are in local time immediately after the transition. + A time interval description consists of a UT offset in signed +-hhmmss + format, a time zone abbreviation, and an isdst flag. An abbreviation + that equals the UT offset is omitted; other abbreviations are double- + quoted strings unless they consist of one or more alphabetic + characters. An isdst flag is omitted for standard time, and otherwise + is a decimal integer that is unsigned and positive (typically 1) for + daylight saving time and negative for unknown. + + In times and in UT offsets with absolute value less than 100 hours, the + seconds are omitted if they are zero, and the minutes are also omitted + if they are also zero. Positive UT offsets are east of Greenwich. The + UT offset -00 denotes a UT placeholder in areas where the actual offset + is unspecified; by convention, this occurs when the UT offset is zero + and the time zone abbreviation begins with "-" or is "zzz". + + In double-quoted strings, escape sequences represent unusual + characters. The escape sequences are \s for space, and \", \\, \f, \n, + \r, \t, and \v with their usual meaning in the C programming language. + E.g., the double-quoted string ""CET\s\"\\"" represents the character + sequence "CET "\". + + Here is an example of the output, with the leading empty line omitted. + (This example is shown with tab stops set far enough apart so that the + tabbed columns line up.) + + TZ="Pacific/Honolulu" + - - -103126 LMT + 1896-01-13 12:01:26 -1030 HST + 1933-04-30 03 -0930 HDT 1 + 1933-05-21 11 -1030 HST + 1942-02-09 03 -0930 HWT 1 + 1945-08-14 13:30 -0930 HPT 1 + 1945-09-30 01 -1030 HST + 1947-06-08 02:30 -10 HST + + Here, local time begins 10 hours, 31 minutes and 26 seconds west of UT, + and is a standard time abbreviated LMT. Immediately after the first + transition, the date is 1896-01-13 and the time is 12:01:26, and the + following time interval is 10.5 hours west of UT, a standard time + abbreviated HST. Immediately after the second transition, the date is + 1933-04-30 and the time is 03:00:00 and the following time interval is + 9.5 hours west of UT, is abbreviated HDT, and is daylight saving time. + Immediately after the last transition the date is 1947-06-08 and the + time is 02:30:00, and the following time interval is 10 hours west of + UT, a standard time abbreviated HST. + + Here are excerpts from another example: + + TZ="Europe/Astrakhan" + - - +031212 LMT + 1924-04-30 23:47:48 +03 + 1930-06-21 01 +04 + 1981-04-01 01 +05 1 + 1981-09-30 23 +04 + ... + 2014-10-26 01 +03 + 2016-03-27 03 +04 + + This time zone is east of UT, so its UT offsets are positive. Also, + many of its time zone abbreviations are omitted since they duplicate + the text of the UT offset. + +LIMITATIONS + Time discontinuities are found by sampling the results returned by + localtime(3) at twelve-hour intervals. This works in all real-world + cases; one can construct artificial time zones for which this fails. + + In the -v and -V output, "UT" denotes the value returned by gmtime(3), + which uses UTC for modern timestamps and some other UT flavor for + timestamps that predate the introduction of UTC. No attempt is + currently made to have the output use "UTC" for newer and "UT" for + older timestamps, partly because the exact date of the introduction of + UTC is problematic. + +SEE ALSO + tzfile(5), zic(8) + +Time Zone Database zdump(8) diff --git a/lib-tzcode/zdump.c b/lib-tzcode/zdump.c new file mode 100644 index 0000000..6f9573e --- /dev/null +++ b/lib-tzcode/zdump.c @@ -0,0 +1,1267 @@ +/* Dump time zone data in a textual format. */ + +/* +** This file is in the public domain, so clarified as of +** 2009-05-17 by Arthur David Olson. +*/ + +#include "version.h" + +#ifndef NETBSD_INSPIRED +# define NETBSD_INSPIRED 1 +#endif + +#include "private.h" +#include + +#ifndef HAVE_SNPRINTF +# define HAVE_SNPRINTF (!PORT_TO_C89 || 199901 <= __STDC_VERSION__) +#endif + +#ifndef HAVE_LOCALTIME_R +# define HAVE_LOCALTIME_R 1 +#endif + +#ifndef HAVE_LOCALTIME_RZ +# ifdef TM_ZONE +# define HAVE_LOCALTIME_RZ (NETBSD_INSPIRED && USE_LTZ) +# else +# define HAVE_LOCALTIME_RZ 0 +# endif +#endif + +#ifndef HAVE_TZSET +# define HAVE_TZSET 1 +#endif + +#ifndef ZDUMP_LO_YEAR +# define ZDUMP_LO_YEAR (-500) +#endif /* !defined ZDUMP_LO_YEAR */ + +#ifndef ZDUMP_HI_YEAR +# define ZDUMP_HI_YEAR 2500 +#endif /* !defined ZDUMP_HI_YEAR */ + +#define SECSPERNYEAR (SECSPERDAY * DAYSPERNYEAR) +#define SECSPERLYEAR (SECSPERNYEAR + SECSPERDAY) +#define SECSPER400YEARS (SECSPERNYEAR * (intmax_t) (300 + 3) \ + + SECSPERLYEAR * (intmax_t) (100 - 3)) + +/* +** True if SECSPER400YEARS is known to be representable as an +** intmax_t. It's OK that SECSPER400YEARS_FITS can in theory be false +** even if SECSPER400YEARS is representable, because when that happens +** the code merely runs a bit more slowly, and this slowness doesn't +** occur on any practical platform. +*/ +enum { SECSPER400YEARS_FITS = SECSPERLYEAR <= INTMAX_MAX / 400 }; + +#if HAVE_GETTEXT +# include /* for setlocale */ +#endif /* HAVE_GETTEXT */ + +#if ! HAVE_LOCALTIME_RZ +# undef timezone_t +# define timezone_t char ** +#endif + +#if !HAVE_POSIX_DECLS +extern int getopt(int argc, char * const argv[], + const char * options); +extern char * optarg; +extern int optind; +#endif + +/* The minimum and maximum finite time values. */ +enum { atime_shift = CHAR_BIT * sizeof(time_t) - 2 }; +static time_t const absolute_min_time = + ((time_t) -1 < 0 + ? (- ((time_t) ~ (time_t) 0 < 0) + - (((time_t) 1 << atime_shift) - 1 + ((time_t) 1 << atime_shift))) + : 0); +static time_t const absolute_max_time = + ((time_t) -1 < 0 + ? (((time_t) 1 << atime_shift) - 1 + ((time_t) 1 << atime_shift)) + : -1); +static int longest; +static char const *progname; +static bool warned; +static bool errout; + +static char const *abbr(struct tm const *); +ATTRIBUTE_REPRODUCIBLE static intmax_t delta(struct tm *, struct tm *); +static void dumptime(struct tm const *); +static time_t hunt(timezone_t, time_t, time_t, bool); +static void show(timezone_t, char *, time_t, bool); +static void showextrema(timezone_t, char *, time_t, struct tm *, time_t); +static void showtrans(char const *, struct tm const *, time_t, char const *, + char const *); +static const char *tformat(void); +ATTRIBUTE_REPRODUCIBLE static time_t yeartot(intmax_t); + +/* Is C an ASCII digit? */ +static bool +is_digit(char c) +{ + return '0' <= c && c <= '9'; +} + +/* Is A an alphabetic character in the C locale? */ +static bool +is_alpha(char a) +{ + switch (a) { + default: + return false; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + return true; + } +} + +ATTRIBUTE_NORETURN static void +size_overflow(void) +{ + fprintf(stderr, _("%s: size overflow\n"), progname); + exit(EXIT_FAILURE); +} + +/* Return A + B, exiting if the result would overflow either ptrdiff_t + or size_t. A and B are both nonnegative. */ +ATTRIBUTE_REPRODUCIBLE static ptrdiff_t +sumsize(ptrdiff_t a, ptrdiff_t b) +{ +#ifdef ckd_add + ptrdiff_t sum; + if (!ckd_add(&sum, a, b) && sum <= INDEX_MAX) + return sum; +#else + if (a <= INDEX_MAX && b <= INDEX_MAX - a) + return a + b; +#endif + size_overflow(); +} + +/* Return the size of of the string STR, including its trailing NUL. + Report an error and exit if this would exceed INDEX_MAX which means + pointer subtraction wouldn't work. */ +static ptrdiff_t +xstrsize(char const *str) +{ + size_t len = strlen(str); + if (len < INDEX_MAX) + return len + 1; + size_overflow(); +} + +/* Return a pointer to a newly allocated buffer of size SIZE, exiting + on failure. SIZE should be positive. */ +ATTRIBUTE_MALLOC static void * +xmalloc(ptrdiff_t size) +{ + void *p = malloc(size); + if (!p) { + fprintf(stderr, _("%s: Memory exhausted\n"), progname); + exit(EXIT_FAILURE); + } + return p; +} + +#if ! HAVE_TZSET +# undef tzset +# define tzset zdump_tzset +static void tzset(void) { } +#endif + +/* Assume gmtime_r works if localtime_r does. + A replacement localtime_r is defined below if needed. */ +#if ! HAVE_LOCALTIME_R + +# undef gmtime_r +# define gmtime_r zdump_gmtime_r + +static struct tm * +gmtime_r(time_t *tp, struct tm *tmp) +{ + struct tm *r = gmtime(tp); + if (r) { + *tmp = *r; + r = tmp; + } + return r; +} + +#endif + +/* Platforms with TM_ZONE don't need tzname, so they can use the + faster localtime_rz or localtime_r if available. */ + +#if defined TM_ZONE && HAVE_LOCALTIME_RZ +# define USE_LOCALTIME_RZ true +#else +# define USE_LOCALTIME_RZ false +#endif + +#if ! USE_LOCALTIME_RZ + +# if !defined TM_ZONE || ! HAVE_LOCALTIME_R || ! HAVE_TZSET +# undef localtime_r +# define localtime_r zdump_localtime_r +static struct tm * +localtime_r(time_t *tp, struct tm *tmp) +{ + struct tm *r = localtime(tp); + if (r) { + *tmp = *r; + r = tmp; + } + return r; +} +# endif + +# undef localtime_rz +# define localtime_rz zdump_localtime_rz +static struct tm * +localtime_rz(ATTRIBUTE_MAYBE_UNUSED timezone_t rz, time_t *tp, struct tm *tmp) +{ + return localtime_r(tp, tmp); +} + +# ifdef TYPECHECK +# undef mktime_z +# define mktime_z zdump_mktime_z +static time_t +mktime_z(timezone_t tz, struct tm *tmp) +{ + return mktime(tmp); +} +# endif + +# undef tzalloc +# undef tzfree +# define tzalloc zdump_tzalloc +# define tzfree zdump_tzfree + +static timezone_t +tzalloc(char const *val) +{ +# if HAVE_SETENV + if (setenv("TZ", val, 1) != 0) { + char const *e = strerror(errno); + fprintf(stderr, _("%s: setenv: %s\n"), progname, e); + exit(EXIT_FAILURE); + } + tzset(); + return &optarg; /* Any valid non-null char ** will do. */ +# else + enum { TZeqlen = 3 }; + static char const TZeq[TZeqlen] = "TZ="; + static char **fakeenv; + static ptrdiff_t fakeenv0size; + void *freeable = NULL; + char **env = fakeenv, **initial_environ; + ptrdiff_t valsize = xstrsize(val); + if (fakeenv0size < valsize) { + char **e = environ, **to; + ptrdiff_t initial_nenvptrs = 1; /* Counting the trailing NULL pointer. */ + + while (*e++) { +# ifdef ckd_add + if (ckd_add(&initial_nenvptrs, initial_nenvptrs, 1) + || INDEX_MAX < initial_nenvptrs) + size_overflow(); +# else + if (initial_nenvptrs == INDEX_MAX / sizeof *environ) + size_overflow(); + initial_nenvptrs++; +# endif + } + fakeenv0size = sumsize(valsize, valsize); + fakeenv0size = max(fakeenv0size, 64); + freeable = env; + fakeenv = env = + xmalloc(sumsize(sumsize(sizeof *environ, + initial_nenvptrs * sizeof *environ), + sumsize(TZeqlen, fakeenv0size))); + to = env + 1; + for (e = environ; (*to = *e); e++) + to += strncmp(*e, TZeq, TZeqlen) != 0; + env[0] = memcpy(to + 1, TZeq, TZeqlen); + } + memcpy(env[0] + TZeqlen, val, valsize); + initial_environ = environ; + environ = env; + tzset(); + free(freeable); + return initial_environ; +# endif +} + +static void +tzfree(ATTRIBUTE_MAYBE_UNUSED timezone_t initial_environ) +{ +# if !HAVE_SETENV + environ = initial_environ; + tzset(); +# endif +} +#endif /* ! USE_LOCALTIME_RZ */ + +/* A UT time zone, and its initializer. */ +static timezone_t gmtz; +static void +gmtzinit(void) +{ + if (USE_LOCALTIME_RZ) { + /* Try "GMT" first to find out whether this is one of the rare + platforms where time_t counts leap seconds; this works due to + the "Zone GMT 0 - GMT" line in the "etcetera" file. If "GMT" + fails, fall back on "GMT0" which might be similar due to the + "Link GMT GMT0" line in the "backward" file, and which + should work on all POSIX platforms. The rest of zdump does not + use the "GMT" abbreviation that comes from this setting, so it + is OK to use "GMT" here rather than the modern "UTC" which + would not work on platforms that omit the "backward" file. */ + gmtz = tzalloc("GMT"); + if (!gmtz) { + static char const gmt0[] = "GMT0"; + gmtz = tzalloc(gmt0); + if (!gmtz) { + char const *e = strerror(errno); + fprintf(stderr, _("%s: unknown timezone '%s': %s\n"), + progname, gmt0, e); + exit(EXIT_FAILURE); + } + } + } +} + +/* Convert *TP to UT, storing the broken-down time into *TMP. + Return TMP if successful, NULL otherwise. This is like gmtime_r(TP, TMP), + except typically faster if USE_LOCALTIME_RZ. */ +static struct tm * +my_gmtime_r(time_t *tp, struct tm *tmp) +{ + return USE_LOCALTIME_RZ ? localtime_rz(gmtz, tp, tmp) : gmtime_r(tp, tmp); +} + +#ifndef TYPECHECK +# define my_localtime_rz localtime_rz +#else /* !defined TYPECHECK */ + +static struct tm * +my_localtime_rz(timezone_t tz, time_t *tp, struct tm *tmp) +{ + tmp = localtime_rz(tz, tp, tmp); + if (tmp) { + struct tm tm; + register time_t t; + + tm = *tmp; + t = mktime_z(tz, &tm); + if (t != *tp) { + fflush(stdout); + fprintf(stderr, "\n%s: ", progname); + fprintf(stderr, tformat(), *tp); + fprintf(stderr, " ->"); + fprintf(stderr, " year=%d", tmp->tm_year); + fprintf(stderr, " mon=%d", tmp->tm_mon); + fprintf(stderr, " mday=%d", tmp->tm_mday); + fprintf(stderr, " hour=%d", tmp->tm_hour); + fprintf(stderr, " min=%d", tmp->tm_min); + fprintf(stderr, " sec=%d", tmp->tm_sec); + fprintf(stderr, " isdst=%d", tmp->tm_isdst); + fprintf(stderr, " -> "); + fprintf(stderr, tformat(), t); + fprintf(stderr, "\n"); + errout = true; + } + } + return tmp; +} +#endif /* !defined TYPECHECK */ + +static void +abbrok(const char *const abbrp, const char *const zone) +{ + register const char * cp; + register const char * wp; + + if (warned) + return; + cp = abbrp; + while (is_alpha(*cp) || is_digit(*cp) || *cp == '-' || *cp == '+') + ++cp; + if (*cp) + wp = _("has characters other than ASCII alphanumerics, '-' or '+'"); + else if (cp - abbrp < 3) + wp = _("has fewer than 3 characters"); + else if (cp - abbrp > 6) + wp = _("has more than 6 characters"); + else + return; + fflush(stdout); + fprintf(stderr, + _("%s: warning: zone \"%s\" abbreviation \"%s\" %s\n"), + progname, zone, abbrp, wp); + warned = errout = true; +} + +/* Return a time zone abbreviation. If the abbreviation needs to be + saved, use *BUF (of size *BUFALLOC) to save it, and return the + abbreviation in the possibly reallocated *BUF. Otherwise, just + return the abbreviation. Get the abbreviation from TMP. + Exit on memory allocation failure. */ +static char const * +saveabbr(char **buf, ptrdiff_t *bufalloc, struct tm const *tmp) +{ + char const *ab = abbr(tmp); + if (HAVE_LOCALTIME_RZ) + return ab; + else { + ptrdiff_t absize = xstrsize(ab); + if (*bufalloc < absize) { + free(*buf); + + /* Make the new buffer at least twice as long as the old, + to avoid O(N**2) behavior on repeated calls. */ + *bufalloc = sumsize(*bufalloc, absize); + + *buf = xmalloc(*bufalloc); + } + return strcpy(*buf, ab); + } +} + +static void +close_file(FILE *stream) +{ + char const *e = (ferror(stream) ? _("I/O error") + : fclose(stream) != 0 ? strerror(errno) : NULL); + if (e) { + fprintf(stderr, "%s: %s\n", progname, e); + exit(EXIT_FAILURE); + } +} + +static void +usage(FILE * const stream, const int status) +{ + fprintf(stream, +_("%s: usage: %s OPTIONS TIMEZONE ...\n" + "Options include:\n" + " -c [L,]U Start at year L (default -500), end before year U (default 2500)\n" + " -t [L,]U Start at time L, end before time U (in seconds since 1970)\n" + " -i List transitions briefly (format is experimental)\n" \ + " -v List transitions verbosely\n" + " -V List transitions a bit less verbosely\n" + " --help Output this help\n" + " --version Output version info\n" + "\n" + "Report bugs to %s.\n"), + progname, progname, REPORT_BUGS_TO); + if (status == EXIT_SUCCESS) + close_file(stream); + exit(status); +} + +int +main(int argc, char *argv[]) +{ + /* These are static so that they're initially zero. */ + static char * abbrev; + static ptrdiff_t abbrevsize; + + register int i; + register bool vflag; + register bool Vflag; + register char * cutarg; + register char * cuttimes; + register time_t cutlotime; + register time_t cuthitime; + time_t now; + bool iflag = false; + + cutlotime = absolute_min_time; + cuthitime = absolute_max_time; +#if HAVE_GETTEXT + setlocale(LC_ALL, ""); +# ifdef TZ_DOMAINDIR + bindtextdomain(TZ_DOMAIN, TZ_DOMAINDIR); +# endif /* defined TEXTDOMAINDIR */ + textdomain(TZ_DOMAIN); +#endif /* HAVE_GETTEXT */ + progname = argv[0] ? argv[0] : "zdump"; + for (i = 1; i < argc; ++i) + if (strcmp(argv[i], "--version") == 0) { + printf("zdump %s%s\n", PKGVERSION, TZVERSION); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "--help") == 0) { + usage(stdout, EXIT_SUCCESS); + } + vflag = Vflag = false; + cutarg = cuttimes = NULL; + for (;;) + switch (getopt(argc, argv, "c:it:vV")) { + case 'c': cutarg = optarg; break; + case 't': cuttimes = optarg; break; + case 'i': iflag = true; break; + case 'v': vflag = true; break; + case 'V': Vflag = true; break; + case -1: + if (! (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) + goto arg_processing_done; + ATTRIBUTE_FALLTHROUGH; + default: + usage(stderr, EXIT_FAILURE); + } + arg_processing_done:; + + if (iflag | vflag | Vflag) { + intmax_t lo; + intmax_t hi; + char *loend, *hiend; + register intmax_t cutloyear = ZDUMP_LO_YEAR; + register intmax_t cuthiyear = ZDUMP_HI_YEAR; + if (cutarg != NULL) { + lo = strtoimax(cutarg, &loend, 10); + if (cutarg != loend && !*loend) { + hi = lo; + cuthiyear = hi; + } else if (cutarg != loend && *loend == ',' + && (hi = strtoimax(loend + 1, &hiend, 10), + loend + 1 != hiend && !*hiend)) { + cutloyear = lo; + cuthiyear = hi; + } else { + fprintf(stderr, _("%s: wild -c argument %s\n"), + progname, cutarg); + return EXIT_FAILURE; + } + } + if (cutarg != NULL || cuttimes == NULL) { + cutlotime = yeartot(cutloyear); + cuthitime = yeartot(cuthiyear); + } + if (cuttimes != NULL) { + lo = strtoimax(cuttimes, &loend, 10); + if (cuttimes != loend && !*loend) { + hi = lo; + if (hi < cuthitime) { + if (hi < absolute_min_time + 1) + hi = absolute_min_time + 1; + cuthitime = hi; + } + } else if (cuttimes != loend && *loend == ',' + && (hi = strtoimax(loend + 1, &hiend, 10), + loend + 1 != hiend && !*hiend)) { + if (cutlotime < lo) { + if (absolute_max_time < lo) + lo = absolute_max_time; + cutlotime = lo; + } + if (hi < cuthitime) { + if (hi < absolute_min_time + 1) + hi = absolute_min_time + 1; + cuthitime = hi; + } + } else { + fprintf(stderr, + _("%s: wild -t argument %s\n"), + progname, cuttimes); + return EXIT_FAILURE; + } + } + } + gmtzinit(); + if (iflag | vflag | Vflag) + now = 0; + else { + now = time(NULL); + now |= !now; + } + longest = 0; + for (i = optind; i < argc; i++) { + size_t arglen = strlen(argv[i]); + if (longest < arglen) + longest = min(arglen, INT_MAX); + } + + for (i = optind; i < argc; ++i) { + timezone_t tz = tzalloc(argv[i]); + char const *ab; + time_t t; + struct tm tm, newtm; + bool tm_ok; + if (!tz) { + char const *e = strerror(errno); + fprintf(stderr, _("%s: unknown timezone '%s': %s\n"), + progname, argv[1], e); + return EXIT_FAILURE; + } + if (now) { + show(tz, argv[i], now, false); + tzfree(tz); + continue; + } + warned = false; + t = absolute_min_time; + if (! (iflag | Vflag)) { + show(tz, argv[i], t, true); + if (my_localtime_rz(tz, &t, &tm) == NULL + && t < cutlotime) { + time_t newt = cutlotime; + if (my_localtime_rz(tz, &newt, &newtm) != NULL) + showextrema(tz, argv[i], t, NULL, newt); + } + } + if (t + 1 < cutlotime) + t = cutlotime - 1; + tm_ok = my_localtime_rz(tz, &t, &tm) != NULL; + if (tm_ok) { + ab = saveabbr(&abbrev, &abbrevsize, &tm); + if (iflag) { + showtrans("\nTZ=%f", &tm, t, ab, argv[i]); + showtrans("-\t-\t%Q", &tm, t, ab, argv[i]); + } + } else + ab = NULL; + while (t < cuthitime - 1) { + time_t newt = ((t < absolute_max_time - SECSPERDAY / 2 + && t + SECSPERDAY / 2 < cuthitime - 1) + ? t + SECSPERDAY / 2 + : cuthitime - 1); + struct tm *newtmp = localtime_rz(tz, &newt, &newtm); + bool newtm_ok = newtmp != NULL; + if (tm_ok != newtm_ok + || (ab && (delta(&newtm, &tm) != newt - t + || newtm.tm_isdst != tm.tm_isdst + || strcmp(abbr(&newtm), ab) != 0))) { + newt = hunt(tz, t, newt, false); + newtmp = localtime_rz(tz, &newt, &newtm); + newtm_ok = newtmp != NULL; + if (iflag) + showtrans("%Y-%m-%d\t%L\t%Q", newtmp, newt, + newtm_ok ? abbr(&newtm) : NULL, argv[i]); + else { + show(tz, argv[i], newt - 1, true); + show(tz, argv[i], newt, true); + } + } + t = newt; + tm_ok = newtm_ok; + if (newtm_ok) { + ab = saveabbr(&abbrev, &abbrevsize, &newtm); + tm = newtm; + } + } + if (! (iflag | Vflag)) { + time_t newt = absolute_max_time; + t = cuthitime; + if (t < newt) { + struct tm *tmp = my_localtime_rz(tz, &t, &tm); + if (tmp != NULL + && my_localtime_rz(tz, &newt, &newtm) == NULL) + showextrema(tz, argv[i], t, tmp, newt); + } + show(tz, argv[i], absolute_max_time, true); + } + tzfree(tz); + } + close_file(stdout); + if (errout && (ferror(stderr) || fclose(stderr) != 0)) + return EXIT_FAILURE; + return EXIT_SUCCESS; +} + +static time_t +yeartot(intmax_t y) +{ + register intmax_t myy, seconds, years; + register time_t t; + + myy = EPOCH_YEAR; + t = 0; + while (myy < y) { + if (SECSPER400YEARS_FITS && 400 <= y - myy) { + intmax_t diff400 = (y - myy) / 400; + if (INTMAX_MAX / SECSPER400YEARS < diff400) + return absolute_max_time; + seconds = diff400 * SECSPER400YEARS; + years = diff400 * 400; + } else { + seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR; + years = 1; + } + myy += years; + if (t > absolute_max_time - seconds) + return absolute_max_time; + t += seconds; + } + while (y < myy) { + if (SECSPER400YEARS_FITS && y + 400 <= myy && myy < 0) { + intmax_t diff400 = (myy - y) / 400; + if (INTMAX_MAX / SECSPER400YEARS < diff400) + return absolute_min_time; + seconds = diff400 * SECSPER400YEARS; + years = diff400 * 400; + } else { + seconds = isleap(myy - 1) ? SECSPERLYEAR : SECSPERNYEAR; + years = 1; + } + myy -= years; + if (t < absolute_min_time + seconds) + return absolute_min_time; + t -= seconds; + } + return t; +} + +/* Search for a discontinuity in timezone TZ, in the + timestamps ranging from LOT through HIT. LOT and HIT disagree + about some aspect of timezone. If ONLY_OK, search only for + definedness changes, i.e., localtime succeeds on one side of the + transition but fails on the other side. Return the timestamp just + before the transition from LOT's settings. */ + +static time_t +hunt(timezone_t tz, time_t lot, time_t hit, bool only_ok) +{ + static char * loab; + static ptrdiff_t loabsize; + struct tm lotm; + struct tm tm; + + /* Convert LOT into a broken-down time here, even though our + caller already did that. On platforms without TM_ZONE, + tzname may have been altered since our caller broke down + LOT, and tzname needs to be changed back. */ + bool lotm_ok = my_localtime_rz(tz, &lot, &lotm) != NULL; + bool tm_ok; + char const *ab = lotm_ok ? saveabbr(&loab, &loabsize, &lotm) : NULL; + + for ( ; ; ) { + /* T = average of LOT and HIT, rounding down. + Avoid overflow. */ + int rem_sum = lot % 2 + hit % 2; + time_t t = (rem_sum == 2) - (rem_sum < 0) + lot / 2 + hit / 2; + if (t == lot) + break; + tm_ok = my_localtime_rz(tz, &t, &tm) != NULL; + if (lotm_ok == tm_ok + && (only_ok + || (ab && tm.tm_isdst == lotm.tm_isdst + && delta(&tm, &lotm) == t - lot + && strcmp(abbr(&tm), ab) == 0))) { + lot = t; + if (tm_ok) + lotm = tm; + } else hit = t; + } + return hit; +} + +/* +** Thanks to Paul Eggert for logic used in delta_nonneg. +*/ + +static intmax_t +delta_nonneg(struct tm *newp, struct tm *oldp) +{ + intmax_t oldy = oldp->tm_year; + int cycles = (newp->tm_year - oldy) / YEARSPERREPEAT; + intmax_t sec = SECSPERREPEAT, result = cycles * sec; + int tmy = oldp->tm_year + cycles * YEARSPERREPEAT; + for ( ; tmy < newp->tm_year; ++tmy) + result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE); + result += newp->tm_yday - oldp->tm_yday; + result *= HOURSPERDAY; + result += newp->tm_hour - oldp->tm_hour; + result *= MINSPERHOUR; + result += newp->tm_min - oldp->tm_min; + result *= SECSPERMIN; + result += newp->tm_sec - oldp->tm_sec; + return result; +} + +static intmax_t +delta(struct tm *newp, struct tm *oldp) +{ + return (newp->tm_year < oldp->tm_year + ? -delta_nonneg(oldp, newp) + : delta_nonneg(newp, oldp)); +} + +#ifndef TM_GMTOFF +/* Return A->tm_yday, adjusted to compare it fairly to B->tm_yday. + Assume A and B differ by at most one year. */ +static int +adjusted_yday(struct tm const *a, struct tm const *b) +{ + int yday = a->tm_yday; + if (b->tm_year < a->tm_year) + yday += 365 + isleap_sum(b->tm_year, TM_YEAR_BASE); + return yday; +} +#endif + +/* If A is the broken-down local time and B the broken-down UT for + the same instant, return A's UT offset in seconds, where positive + offsets are east of Greenwich. On failure, return LONG_MIN. + + If T is nonnull, *T is the timestamp that corresponds to A; call + my_gmtime_r and use its result instead of B. Otherwise, B is the + possibly nonnull result of an earlier call to my_gmtime_r. */ +static long +gmtoff(struct tm const *a, ATTRIBUTE_MAYBE_UNUSED time_t *t, + ATTRIBUTE_MAYBE_UNUSED struct tm const *b) +{ +#ifdef TM_GMTOFF + return a->TM_GMTOFF; +#else + struct tm tm; + if (t) + b = my_gmtime_r(t, &tm); + if (! b) + return LONG_MIN; + else { + int ayday = adjusted_yday(a, b); + int byday = adjusted_yday(b, a); + int days = ayday - byday; + long hours = a->tm_hour - b->tm_hour + 24 * days; + long minutes = a->tm_min - b->tm_min + 60 * hours; + long seconds = a->tm_sec - b->tm_sec + 60 * minutes; + return seconds; + } +#endif +} + +static void +show(timezone_t tz, char *zone, time_t t, bool v) +{ + register struct tm * tmp; + register struct tm * gmtmp; + struct tm tm, gmtm; + + printf("%-*s ", longest, zone); + if (v) { + gmtmp = my_gmtime_r(&t, &gmtm); + if (gmtmp == NULL) { + printf(tformat(), t); + printf(_(" (gmtime failed)")); + } else { + dumptime(gmtmp); + printf(" UT"); + } + printf(" = "); + } + tmp = my_localtime_rz(tz, &t, &tm); + if (tmp == NULL) { + printf(tformat(), t); + printf(_(" (localtime failed)")); + } else { + dumptime(tmp); + if (*abbr(tmp) != '\0') + printf(" %s", abbr(tmp)); + if (v) { + long off = gmtoff(tmp, NULL, gmtmp); + printf(" isdst=%d", tmp->tm_isdst); + if (off != LONG_MIN) + printf(" gmtoff=%ld", off); + } + } + printf("\n"); + if (tmp != NULL && *abbr(tmp) != '\0') + abbrok(abbr(tmp), zone); +} + +/* Show timestamps just before and just after a transition between + defined and undefined (or vice versa) in either localtime or + gmtime. These transitions are for timezone TZ with name ZONE, in + the range from LO (with broken-down time LOTMP if that is nonnull) + through HI. LO and HI disagree on definedness. */ + +static void +showextrema(timezone_t tz, char *zone, time_t lo, struct tm *lotmp, time_t hi) +{ + struct tm localtm[2], gmtm[2]; + time_t t, boundary = hunt(tz, lo, hi, true); + bool old = false; + hi = (SECSPERDAY < hi - boundary + ? boundary + SECSPERDAY + : hi + (hi < TIME_T_MAX)); + if (SECSPERDAY < boundary - lo) { + lo = boundary - SECSPERDAY; + lotmp = my_localtime_rz(tz, &lo, &localtm[old]); + } + if (lotmp) + localtm[old] = *lotmp; + else + localtm[old].tm_sec = -1; + if (! my_gmtime_r(&lo, &gmtm[old])) + gmtm[old].tm_sec = -1; + + /* Search sequentially for definedness transitions. Although this + could be sped up by refining 'hunt' to search for either + localtime or gmtime definedness transitions, it hardly seems + worth the trouble. */ + for (t = lo + 1; t < hi; t++) { + bool new = !old; + if (! my_localtime_rz(tz, &t, &localtm[new])) + localtm[new].tm_sec = -1; + if (! my_gmtime_r(&t, &gmtm[new])) + gmtm[new].tm_sec = -1; + if (((localtm[old].tm_sec < 0) != (localtm[new].tm_sec < 0)) + | ((gmtm[old].tm_sec < 0) != (gmtm[new].tm_sec < 0))) { + show(tz, zone, t - 1, true); + show(tz, zone, t, true); + } + old = new; + } +} + +#if HAVE_SNPRINTF +# define my_snprintf snprintf +#else +# include + +/* A substitute for snprintf that is good enough for zdump. */ +ATTRIBUTE_FORMAT((printf, 3, 4)) static int +my_snprintf(char *s, size_t size, char const *format, ...) +{ + int n; + va_list args; + char const *arg; + size_t arglen, slen; + char buf[1024]; + va_start(args, format); + if (strcmp(format, "%s") == 0) { + arg = va_arg(args, char const *); + arglen = strlen(arg); + } else { + n = vsprintf(buf, format, args); + if (n < 0) { + va_end(args); + return n; + } + arg = buf; + arglen = n; + } + slen = arglen < size ? arglen : size - 1; + memcpy(s, arg, slen); + s[slen] = '\0'; + n = arglen <= INT_MAX ? arglen : -1; + va_end(args); + return n; +} +#endif + +/* Store into BUF, of size SIZE, a formatted local time taken from *TM. + Use ISO 8601 format +HH:MM:SS. Omit :SS if SS is zero, and omit + :MM too if MM is also zero. + + Return the length of the resulting string. If the string does not + fit, return the length that the string would have been if it had + fit; do not overrun the output buffer. */ +static int +format_local_time(char *buf, ptrdiff_t size, struct tm const *tm) +{ + int ss = tm->tm_sec, mm = tm->tm_min, hh = tm->tm_hour; + return (ss + ? my_snprintf(buf, size, "%02d:%02d:%02d", hh, mm, ss) + : mm + ? my_snprintf(buf, size, "%02d:%02d", hh, mm) + : my_snprintf(buf, size, "%02d", hh)); +} + +/* Store into BUF, of size SIZE, a formatted UT offset for the + localtime *TM corresponding to time T. Use ISO 8601 format + +HHMMSS, or -HHMMSS for timestamps west of Greenwich; use the + format -00 for unknown UT offsets. If the hour needs more than + two digits to represent, extend the length of HH as needed. + Otherwise, omit SS if SS is zero, and omit MM too if MM is also + zero. + + Return the length of the resulting string, or -1 if the result is + not representable as a string. If the string does not fit, return + the length that the string would have been if it had fit; do not + overrun the output buffer. */ +static int +format_utc_offset(char *buf, ptrdiff_t size, struct tm const *tm, time_t t) +{ + long off = gmtoff(tm, &t, NULL); + char sign = ((off < 0 + || (off == 0 + && (*abbr(tm) == '-' || strcmp(abbr(tm), "zzz") == 0))) + ? '-' : '+'); + long hh; + int mm, ss; + if (off < 0) + { + if (off == LONG_MIN) + return -1; + off = -off; + } + ss = off % 60; + mm = off / 60 % 60; + hh = off / 60 / 60; + return (ss || 100 <= hh + ? my_snprintf(buf, size, "%c%02ld%02d%02d", sign, hh, mm, ss) + : mm + ? my_snprintf(buf, size, "%c%02ld%02d", sign, hh, mm) + : my_snprintf(buf, size, "%c%02ld", sign, hh)); +} + +/* Store into BUF (of size SIZE) a quoted string representation of P. + If the representation's length is less than SIZE, return the + length; the representation is not null terminated. Otherwise + return SIZE, to indicate that BUF is too small. */ +static ptrdiff_t +format_quoted_string(char *buf, ptrdiff_t size, char const *p) +{ + char *b = buf; + ptrdiff_t s = size; + if (!s) + return size; + *b++ = '"', s--; + for (;;) { + char c = *p++; + if (s <= 1) + return size; + switch (c) { + default: *b++ = c, s--; continue; + case '\0': *b++ = '"', s--; return size - s; + case '"': case '\\': break; + case ' ': c = 's'; break; + case '\f': c = 'f'; break; + case '\n': c = 'n'; break; + case '\r': c = 'r'; break; + case '\t': c = 't'; break; + case '\v': c = 'v'; break; + } + *b++ = '\\', *b++ = c, s -= 2; + } +} + +/* Store into BUF (of size SIZE) a timestamp formatted by TIME_FMT. + TM is the broken-down time, T the seconds count, AB the time zone + abbreviation, and ZONE_NAME the zone name. Return true if + successful, false if the output would require more than SIZE bytes. + TIME_FMT uses the same format that strftime uses, with these + additions: + + %f zone name + %L local time as per format_local_time + %Q like "U\t%Z\tD" where U is the UT offset as for format_utc_offset + and D is the isdst flag; except omit D if it is zero, omit %Z if + it equals U, quote and escape %Z if it contains nonalphabetics, + and omit any trailing tabs. */ + +static bool +istrftime(char *buf, ptrdiff_t size, char const *time_fmt, + struct tm const *tm, time_t t, char const *ab, char const *zone_name) +{ + char *b = buf; + ptrdiff_t s = size; + char const *f = time_fmt, *p; + + for (p = f; ; p++) + if (*p == '%' && p[1] == '%') + p++; + else if (!*p + || (*p == '%' + && (p[1] == 'f' || p[1] == 'L' || p[1] == 'Q'))) { + ptrdiff_t formatted_len; + ptrdiff_t f_prefix_len = p - f; + ptrdiff_t f_prefix_copy_size = sumsize(f_prefix_len, 2); + char fbuf[100]; + bool oversized = sizeof fbuf <= f_prefix_copy_size; + char *f_prefix_copy = oversized ? xmalloc(f_prefix_copy_size) : fbuf; + memcpy(f_prefix_copy, f, f_prefix_len); + strcpy(f_prefix_copy + f_prefix_len, "X"); + formatted_len = strftime(b, s, f_prefix_copy, tm); + if (oversized) + free(f_prefix_copy); + if (formatted_len == 0) + return false; + formatted_len--; + b += formatted_len, s -= formatted_len; + if (!*p++) + break; + switch (*p) { + case 'f': + formatted_len = format_quoted_string(b, s, zone_name); + break; + case 'L': + formatted_len = format_local_time(b, s, tm); + break; + case 'Q': + { + bool show_abbr; + int offlen = format_utc_offset(b, s, tm, t); + if (! (0 <= offlen && offlen < s)) + return false; + show_abbr = strcmp(b, ab) != 0; + b += offlen, s -= offlen; + if (show_abbr) { + char const *abp; + ptrdiff_t len; + if (s <= 1) + return false; + *b++ = '\t', s--; + for (abp = ab; is_alpha(*abp); abp++) + continue; + len = (!*abp && *ab + ? my_snprintf(b, s, "%s", ab) + : format_quoted_string(b, s, ab)); + if (s <= len) + return false; + b += len, s -= len; + } + formatted_len + = (tm->tm_isdst + ? my_snprintf(b, s, &"\t\t%d"[show_abbr], tm->tm_isdst) + : 0); + } + break; + } + if (s <= formatted_len) + return false; + b += formatted_len, s -= formatted_len; + f = p + 1; + } + *b = '\0'; + return true; +} + +/* Show a time transition. */ +static void +showtrans(char const *time_fmt, struct tm const *tm, time_t t, char const *ab, + char const *zone_name) +{ + if (!tm) { + printf(tformat(), t); + putchar('\n'); + } else { + char stackbuf[1000]; + ptrdiff_t size = sizeof stackbuf; + char *buf = stackbuf; + char *bufalloc = NULL; + while (! istrftime(buf, size, time_fmt, tm, t, ab, zone_name)) { + size = sumsize(size, size); + free(bufalloc); + buf = bufalloc = xmalloc(size); + } + puts(buf); + free(bufalloc); + } +} + +static char const * +abbr(struct tm const *tmp) +{ +#ifdef TM_ZONE + return tmp->TM_ZONE; +#else +# if HAVE_TZNAME + if (0 <= tmp->tm_isdst && tzname[0 < tmp->tm_isdst]) + return tzname[0 < tmp->tm_isdst]; +# endif + return ""; +#endif +} + +/* +** The code below can fail on certain theoretical systems; +** it works on all known real-world systems as of 2022-01-25. +*/ + +static const char * +tformat(void) +{ +#if HAVE__GENERIC + /* C11-style _Generic is more likely to return the correct + format when distinct types have the same size. */ + char const *fmt = + _Generic(+ (time_t) 0, + int: "%d", long: "%ld", long long: "%lld", + unsigned: "%u", unsigned long: "%lu", + unsigned long long: "%llu", + default: NULL); + if (fmt) + return fmt; + fmt = _Generic((time_t) 0, + intmax_t: "%"PRIdMAX, uintmax_t: "%"PRIuMAX, + default: NULL); + if (fmt) + return fmt; +#endif + if (0 > (time_t) -1) { /* signed */ + if (sizeof(time_t) == sizeof(intmax_t)) + return "%"PRIdMAX; + if (sizeof(time_t) > sizeof(long)) + return "%lld"; + if (sizeof(time_t) > sizeof(int)) + return "%ld"; + return "%d"; + } +#ifdef PRIuMAX + if (sizeof(time_t) == sizeof(uintmax_t)) + return "%"PRIuMAX; +#endif + if (sizeof(time_t) > sizeof(unsigned long)) + return "%llu"; + if (sizeof(time_t) > sizeof(unsigned int)) + return "%lu"; + return "%u"; +} + +static void +dumptime(register const struct tm *timeptr) +{ + static const char wday_name[][4] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" + }; + static const char mon_name[][4] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + register int lead; + register int trail; + int DIVISOR = 10; + + /* + ** The packaged localtime_rz and gmtime_r never put out-of-range + ** values in tm_wday or tm_mon, but since this code might be compiled + ** with other (perhaps experimental) versions, paranoia is in order. + */ + printf("%s %s%3d %.2d:%.2d:%.2d ", + ((0 <= timeptr->tm_wday + && timeptr->tm_wday < sizeof wday_name / sizeof wday_name[0]) + ? wday_name[timeptr->tm_wday] : "???"), + ((0 <= timeptr->tm_mon + && timeptr->tm_mon < sizeof mon_name / sizeof mon_name[0]) + ? mon_name[timeptr->tm_mon] : "???"), + timeptr->tm_mday, timeptr->tm_hour, + timeptr->tm_min, timeptr->tm_sec); + trail = timeptr->tm_year % DIVISOR + TM_YEAR_BASE % DIVISOR; + lead = timeptr->tm_year / DIVISOR + TM_YEAR_BASE / DIVISOR + + trail / DIVISOR; + trail %= DIVISOR; + if (trail < 0 && lead > 0) { + trail += DIVISOR; + --lead; + } else if (lead < 0 && trail > 0) { + trail -= DIVISOR; + ++lead; + } + if (lead == 0) + printf("%d", trail); + else printf("%d%d", lead, ((trail < 0) ? -trail : trail)); +} diff --git a/lib-tzcode/zic.8 b/lib-tzcode/zic.8 new file mode 100644 index 0000000..c467efe --- /dev/null +++ b/lib-tzcode/zic.8 @@ -0,0 +1,903 @@ +.\" This file is in the public domain, so clarified as of +.\" 2009-05-17 by Arthur David Olson. +.TH zic 8 "" "Time Zone Database" +.SH NAME +zic \- timezone compiler +.SH SYNOPSIS +.B zic +[ +.I option +\&... ] [ +.I filename +\&... ] +.SH DESCRIPTION +.ie '\(lq'' .ds lq \&"\" +.el .ds lq \(lq\" +.ie '\(rq'' .ds rq \&"\" +.el .ds rq \(rq\" +.de q +\\$3\*(lq\\$1\*(rq\\$2 +.. +.ie '\(la'' .ds < < +.el .ds < \(la +.ie '\(ra'' .ds > > +.el .ds > \(ra +.ie \n(.g \{\ +. ds : \: +. ds - \f(CR-\fP +.\} +.el \{\ +. ds : +. ds - \- +.\} +.ds d " degrees +.ds m " minutes +.ds s " seconds +.ds _ " \& +.if t \{\ +. if \n(.g .if c \(de .if c \(fm .if c \(sd \{\ +. ds d \(de +. ds m \(fm +. ds s \(sd +. ds _ \| +. \} +.\} +The +.B zic +program reads text from the file(s) named on the command line +and creates the timezone information format (TZif) files +specified in this input. +If a +.I filename +is +.q "\*-" , +standard input is read. +.SH OPTIONS +.TP +.B "\*-\*-version" +Output version information and exit. +.TP +.B \*-\*-help +Output short usage message and exit. +.TP +.BI "\*-b " bloat +Output backward-compatibility data as specified by +.IR bloat . +If +.I bloat +is +.BR fat , +generate additional data entries that work around potential bugs or +incompatibilities in older software, such as software that mishandles +the 64-bit generated data. +If +.I bloat +is +.BR slim , +keep the output files small; this can help check for the bugs +and incompatibilities. +The default is +.BR slim , +as software that mishandles 64-bit data typically +mishandles timestamps after the year 2038 anyway. +Also see the +.B \*-r +option for another way to alter output size. +.TP +.BI "\*-d " directory +Create time conversion information files in the named directory rather than +in the standard directory named below. +.TP +.BI "\*-l " timezone +Use +.I timezone +as local time. +.B zic +will act as if the input contained a link line of the form +.sp +.ti +.5i +.ta \w'Link\0\0'u +\w'\fItimezone\fP\0\0'u +Link \fItimezone\fP localtime +.sp +If +.I timezone +is +.BR \*- , +any already-existing link is removed. +.TP +.BI "\*-L " leapsecondfilename +Read leap second information from the file with the given name. +If this option is not used, +no leap second information appears in output files. +.TP +.BI "\*-p " timezone +Use +.IR timezone 's +rules when handling nonstandard +TZ strings like "EET\*-2EEST" that lack transition rules. +.B zic +will act as if the input contained a link line of the form +.sp +.ti +.5i +Link \fItimezone\fP posixrules +.sp +Unless +.I timezone is +.q "\*-" , +this option is obsolete and poorly supported. +Among other things it should not be used for timestamps after the year 2037, +and it should not be combined with +.B "\*-b slim" +if +.IR timezone 's +transitions are at standard time or Universal Time (UT) instead of local time. +.sp +If +.I timezone +is +.BR \*- , +any already-existing link is removed. +.TP +.BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]" +Limit the applicability of output files +to timestamps in the range from +.I lo +(inclusive) to +.I hi +(exclusive), where +.I lo +and +.I hi +are possibly signed decimal counts of seconds since the Epoch +(1970-01-01 00:00:00 UTC). +Omitted counts default to extreme values. +The output files use UT offset 0 and abbreviation +.q "\*-00" +in place of the omitted timestamp data. +For example, +.q "zic \*-r @0" +omits data intended for negative timestamps (i.e., before the Epoch), and +.q "zic \*-r @0/@2147483648" +outputs data intended only for nonnegative timestamps that fit into +31-bit signed integers. +On platforms with GNU +.BR date , +.q "zic \*-r @$(date +%s)" +omits data intended for past timestamps. +Although this option typically reduces the output file's size, +the size can increase due to the need to represent the timestamp range +boundaries, particularly if +.I hi +causes a TZif file to contain explicit entries for +.RI pre- hi +transitions rather than concisely representing them +with an extended POSIX TZ string. +Also see the +.B "\*-b slim" +option for another way to shrink output size. +.TP +.BI "\*-R @" hi +Generate redundant trailing explicit transitions for timestamps +that occur less than +.I hi +seconds since the Epoch, even though the transitions could be +more concisely represented via the extended POSIX TZ string. +This option does not affect the represented timestamps. +Although it accommodates nonstandard TZif readers +that ignore the extended POSIX TZ string, +it increases the size of the altered output files. +.TP +.BI "\*-t " file +When creating local time information, put the configuration link in +the named file rather than in the standard location. +.TP +.B \*-v +Be more verbose, and complain about the following situations: +.RS +.PP +The input specifies a link to a link, +something not supported by some older parsers, including +.B zic +itself through release 2022e. +.PP +A year that appears in a data file is outside the range +of representable years. +.PP +A time of 24:00 or more appears in the input. +Pre-1998 versions of +.B zic +prohibit 24:00, and pre-2007 versions prohibit times greater than 24:00. +.PP +A rule goes past the start or end of the month. +Pre-2004 versions of +.B zic +prohibit this. +.PP +A time zone abbreviation uses a +.B %z +format. +Pre-2015 versions of +.B zic +do not support this. +.PP +A timestamp contains fractional seconds. +Pre-2018 versions of +.B zic +do not support this. +.PP +The input contains abbreviations that are mishandled by pre-2018 versions of +.B zic +due to a longstanding coding bug. +These abbreviations include +.q L +for +.q Link , +.q mi +for +.q min , +.q Sa +for +.q Sat , +and +.q Su +for +.q Sun . +.PP +The output file does not contain all the information about the +long-term future of a timezone, because the future cannot be summarized as +an extended POSIX TZ string. For example, as of 2023 this problem +occurs for Morocco's daylight-saving rules, as these rules are based +on predictions for when Ramadan will be observed, something that +an extended POSIX TZ string cannot represent. +.PP +The output contains data that may not be handled properly by client +code designed for older +.B zic +output formats. These compatibility issues affect only timestamps +before 1970 or after the start of 2038. +.PP +The output contains a truncated leap second table, +which can cause some older TZif readers to misbehave. +This can occur if the +.B "\*-L" +option is used, and either an Expires line is present or +the +.B "\*-r" +option is also used. +.PP +The output file contains more than 1200 transitions, +which may be mishandled by some clients. +The current reference client supports at most 2000 transitions; +pre-2014 versions of the reference client support at most 1200 +transitions. +.PP +A time zone abbreviation has fewer than 3 or more than 6 characters. +POSIX requires at least 3, and requires implementations to support +at least 6. +.PP +An output file name contains a byte that is not an ASCII letter, +.q "\*-" , +.q "/" , +or +.q "_" ; +or it contains a file name component that contains more than 14 bytes +or that starts with +.q "\*-" . +.RE +.SH FILES +Input files use the format described in this section; output files use +.BR tzfile (5) +format. +.PP +Input files should be text files, that is, they should be a series of +zero or more lines, each ending in a newline byte and containing at +most 2048 bytes counting the newline, and without any NUL bytes. +The input text's encoding +is typically UTF-8 or ASCII; it should have a unibyte representation +for the POSIX Portable Character Set (PPCS) +\* +and the encoding's non-unibyte characters should consist entirely of +non-PPCS bytes. Non-PPCS characters typically occur only in comments: +although output file names and time zone abbreviations can contain +nearly any character, other software will work better if these are +limited to the restricted syntax described under the +.B \*-v +option. +.PP +Input lines are made up of fields. +Fields are separated from one another by one or more white space characters. +The white space characters are space, form feed, carriage return, newline, +tab, and vertical tab. +Leading and trailing white space on input lines is ignored. +An unquoted sharp character (#) in the input introduces a comment which extends +to the end of the line the sharp character appears on. +White space characters and sharp characters may be enclosed in double quotes +(") if they're to be used as part of a field. +Any line that is blank (after comment stripping) is ignored. +Nonblank lines are expected to be of one of three types: +rule lines, zone lines, and link lines. +.PP +Names must be in English and are case insensitive. +They appear in several contexts, and include month and weekday names +and keywords such as +.BR "maximum" , +.BR "only" , +.BR "Rolling" , +and +.BR "Zone" . +A name can be abbreviated by omitting all but an initial prefix; any +abbreviation must be unambiguous in context. +.PP +A rule line has the form +.nf +.ti +.5i +.ta \w'Rule\0\0'u +\w'NAME\0\0'u +\w'FROM\0\0'u +\w'1973\0\0'u +\w'\*-\0\0'u +\w'Apr\0\0'u +\w'lastSun\0\0'u +\w'2:00w\0\0'u +\w'1:00d\0\0'u +.sp +Rule NAME FROM TO \*- IN ON AT SAVE LETTER/S +.sp +For example: +.ti +.5i +.sp +Rule US 1967 1973 \*- Apr lastSun 2:00w 1:00d D +.sp +.fi +The fields that make up a rule line are: +.TP "\w'LETTER/S'u" +.B NAME +Gives the name of the rule set that contains this line. +The name must start with a character that is neither +an ASCII digit nor +.q \*- +nor +.q + . +To allow for future extensions, +an unquoted name should not contain characters from the set +.ie \n(.g .q \f(CR!$%&\(aq()*,/:;<=>?@[\e]\(ha\(ga{|}\(ti\fP . +.el .ie t .q \f(CW!$%&'()*,/:;<=>?@[\e]^\(ga{|}~\fP . +.el .q !$%&'()*,/:;<=>?@[\e]^`{|}~ . +.TP +.B FROM +Gives the first year in which the rule applies. +Any signed integer year can be supplied; the proleptic Gregorian calendar +is assumed, with year 0 preceding year 1. +The word +.B minimum +(or an abbreviation) means the indefinite past. +The word +.B maximum +(or an abbreviation) means the indefinite future. +Rules can describe times that are not representable as time values, +with the unrepresentable times ignored; this allows rules to be portable +among hosts with differing time value types. +.TP +.B TO +Gives the final year in which the rule applies. +In addition to +.B minimum +and +.B maximum +(as above), +the word +.B only +(or an abbreviation) +may be used to repeat the value of the +.B FROM +field. +.TP +.B \*- +Is a reserved field and should always contain +.q \*- +for compatibility with older versions of +.BR zic . +It was previously known as the +.B TYPE +field, which could contain values to allow a +separate script to further restrict in which +.q types +of years the rule would apply. +.TP +.B IN +Names the month in which the rule takes effect. +Month names may be abbreviated. +.TP +.B ON +Gives the day on which the rule takes effect. +Recognized forms include: +.nf +.in +.5i +.sp +.ta \w'Sun<=25\0\0'u +5 the fifth of the month +lastSun the last Sunday in the month +lastMon the last Monday in the month +Sun>=8 first Sunday on or after the eighth +Sun<=25 last Sunday on or before the 25th +.fi +.in -.5i +.sp +A weekday name (e.g., +.BR "Sunday" ) +or a weekday name preceded by +.q "last" +(e.g., +.BR "lastSunday" ) +may be abbreviated or spelled out in full. +There must be no white space characters within the +.B ON +field. +The +.q <= +and +.q >= +constructs can result in a day in the neighboring month; +for example, the IN-ON combination +.q "Oct Sun>=31" +stands for the first Sunday on or after October 31, +even if that Sunday occurs in November. +.TP +.B AT +Gives the time of day at which the rule takes effect, +relative to 00:00, the start of a calendar day. +Recognized forms include: +.nf +.in +.5i +.sp +.ta \w'00:19:32.13\0\0'u +2 time in hours +2:00 time in hours and minutes +01:28:14 time in hours, minutes, and seconds +00:19:32.13 time with fractional seconds +12:00 midday, 12 hours after 00:00 +15:00 3 PM, 15 hours after 00:00 +24:00 end of day, 24 hours after 00:00 +260:00 260 hours after 00:00 +\*-2:30 2.5 hours before 00:00 +\*- equivalent to 0 +.fi +.in -.5i +.sp +Although +.B zic +rounds times to the nearest integer second +(breaking ties to the even integer), the fractions may be useful +to other applications requiring greater precision. +The source format does not specify any maximum precision. +Any of these forms may be followed by the letter +.B w +if the given time is local or +.q "wall clock" +time, +.B s +if the given time is standard time without any adjustment for daylight saving, +or +.B u +(or +.B g +or +.BR z ) +if the given time is universal time; +in the absence of an indicator, +local (wall clock) time is assumed. +These forms ignore leap seconds; for example, +if a leap second occurs at 00:59:60 local time, +.q "1:00" +stands for 3601 seconds after local midnight instead of the usual 3600 seconds. +The intent is that a rule line describes the instants when a +clock/calendar set to the type of time specified in the +.B AT +field would show the specified date and time of day. +.TP +.B SAVE +Gives the amount of time to be added to local standard time when the rule is in +effect, and whether the resulting time is standard or daylight saving. +This field has the same format as the +.B AT +field +except with a different set of suffix letters: +.B s +for standard time and +.B d +for daylight saving time. +The suffix letter is typically omitted, and defaults to +.B s +if the offset is zero and to +.B d +otherwise. +Negative offsets are allowed; in Ireland, for example, daylight saving +time is observed in winter and has a negative offset relative to +Irish Standard Time. +The offset is merely added to standard time; for example, +.B zic +does not distinguish a 10:30 standard time plus an 0:30 +.B SAVE +from a 10:00 standard time plus a 1:00 +.BR SAVE . +.TP +.B LETTER/S +Gives the +.q "variable part" +(for example, the +.q "S" +or +.q "D" +in +.q "EST" +or +.q "EDT" ) +of time zone abbreviations to be used when this rule is in effect. +If this field is +.q \*- , +the variable part is null. +.PP +A zone line has the form +.sp +.nf +.ti +.5i +.ta \w'Zone\0\0'u +\w'Asia/Amman\0\0'u +\w'STDOFF\0\0'u +\w'Jordan\0\0'u +\w'FORMAT\0\0'u +Zone NAME STDOFF RULES FORMAT [UNTIL] +.sp +For example: +.sp +.ti +.5i +Zone Asia/Amman 2:00 Jordan EE%sT 2017 Oct 27 01:00 +.sp +.fi +The fields that make up a zone line are: +.TP "\w'STDOFF'u" +.B NAME +The name of the timezone. +This is the name used in creating the time conversion information file for the +timezone. +It should not contain a file name component +.q ".\&" +or +.q ".." ; +a file name component is a maximal substring that does not contain +.q "/" . +.TP +.B STDOFF +The amount of time to add to UT to get standard time, +without any adjustment for daylight saving. +This field has the same format as the +.B AT +and +.B SAVE +fields of rule lines, except without suffix letters; +begin the field with a minus sign if time must be subtracted from UT. +.TP +.B RULES +The name of the rules that apply in the timezone or, +alternatively, a field in the same format as a rule-line SAVE column, +giving the amount of time to be added to local standard time +and whether the resulting time is standard or daylight saving. +If this field is +.B \*- +then standard time always applies. +When an amount of time is given, only the sum of standard time and +this amount matters. +.TP +.B FORMAT +The format for time zone abbreviations. +The pair of characters +.B %s +is used to show where the +.q "variable part" +of the time zone abbreviation goes. +Alternatively, a format can use the pair of characters +.B %z +to stand for the UT offset in the form +.RI \(+- hh , +.RI \(+- hhmm , +or +.RI \(+- hhmmss , +using the shortest form that does not lose information, where +.IR hh , +.IR mm , +and +.I ss +are the hours, minutes, and seconds east (+) or west (\-) of UT. +Alternatively, +a slash (/) +separates standard and daylight abbreviations. +To conform to POSIX, a time zone abbreviation should contain only +alphanumeric ASCII characters, +.q "+" +and +.q "\*-". +By convention, the time zone abbreviation +.q "\*-00" +is a placeholder that means local time is unspecified. +.TP +.B UNTIL +The time at which the UT offset or the rule(s) change for a location. +It takes the form of one to four fields YEAR [MONTH [DAY [TIME]]]. +If this is specified, +the time zone information is generated from the given UT offset +and rule change until the time specified, which is interpreted using +the rules in effect just before the transition. +The month, day, and time of day have the same format as the IN, ON, and AT +fields of a rule; trailing fields can be omitted, and default to the +earliest possible value for the missing fields. +.IP +The next line must be a +.q "continuation" +line; this has the same form as a zone line except that the +string +.q "Zone" +and the name are omitted, as the continuation line will +place information starting at the time specified as the +.q "until" +information in the previous line in the file used by the previous line. +Continuation lines may contain +.q "until" +information, just as zone lines do, indicating that the next line is a further +continuation. +.PP +If a zone changes at the same instant that a rule would otherwise take +effect in the earlier zone or continuation line, the rule is ignored. +A zone or continuation line +.I L +with a named rule set starts with standard time by default: +that is, any of +.IR L 's +timestamps preceding +.IR L 's +earliest rule use the rule in effect after +.IR L 's +first transition into standard time. +In a single zone it is an error if two rules take effect at the same +instant, or if two zone changes take effect at the same instant. +.PP +If a continuation line subtracts +.I N +seconds from the UT offset after a transition that would be +interpreted to be later if using the continuation line's UT offset and +rules, the +.q "until" +time of the previous zone or continuation line is interpreted +according to the continuation line's UT offset and rules, and any rule +that would otherwise take effect in the next +.I N +seconds is instead assumed to take effect simultaneously. +For example: +.br +.ne 7 +.nf +.in +2m +.ta \w'# Rule\0\0'u +\w'NAME\0\0'u +\w'FROM\0\0'u +\w'2006\0\0'u +\w'\*-\0\0'u +\w'Oct\0\0'u +\w'lastSun\0\0'u +\w'2:00\0\0'u +\w'SAVE\0\0'u +.sp +# Rule NAME FROM TO \*- IN ON AT SAVE LETTER/S +Rule US 1967 2006 - Oct lastSun 2:00 0 S +Rule US 1967 1973 - Apr lastSun 2:00 1:00 D +.ta \w'Zone\0\0America/Menominee\0\0'u +\w'STDOFF\0\0'u +\w'RULES\0\0'u +\w'FORMAT\0\0'u +# Zone\0\0NAME STDOFF RULES FORMAT [UNTIL] +Zone\0\0America/Menominee \*-5:00 \*- EST 1973 Apr 29 2:00 + \*-6:00 US C%sT +.sp +.in +.fi +Here, an incorrect reading would be there were two clock changes on 1973-04-29, +the first from 02:00 EST (\*-05) to 01:00 CST (\*-06), +and the second an hour later from 02:00 CST (\*-06) to 03:00 CDT (\*-05). +However, +.B zic +interprets this more sensibly as a single transition from 02:00 CST (\*-05) to +02:00 CDT (\*-05). +.PP +A link line has the form +.sp +.nf +.ti +.5i +.ta \w'Link\0\0'u +\w'Europe/Istanbul\0\0'u +Link TARGET LINK-NAME +.sp +For example: +.sp +.ti +.5i +Link Europe/Istanbul Asia/Istanbul +.sp +.fi +The +.B TARGET +field should appear as the +.B NAME +field in some zone line or as the +.B LINK-NAME +field in some link line. +The +.B LINK-NAME +field is used as an alternative name for that zone; +it has the same syntax as a zone line's +.B NAME +field. +Links can chain together, although the behavior is unspecified if a +chain of one or more links does not terminate in a Zone name. +A link line can appear before the line that defines the link target. +For example: +.sp +.ne 3 +.nf +.in +2m +.ta \w'Zone\0\0'u +\w'Greenwich\0\0'u +Link Greenwich G_M_T +Link Etc/GMT Greenwich +Zone Etc/GMT\0\00\0\0\*-\0\0GMT +.sp +.in +.fi +The two links are chained together, and G_M_T, Greenwich, and Etc/GMT +all name the same zone. +.PP +Except for continuation lines, +lines may appear in any order in the input. +However, the behavior is unspecified if multiple zone or link lines +define the same name. +.PP +The file that describes leap seconds can have leap lines and an +expiration line. +Leap lines have the following form: +.nf +.ti +.5i +.ta \w'Leap\0\0'u +\w'YEAR\0\0'u +\w'MONTH\0\0'u +\w'DAY\0\0'u +\w'HH:MM:SS\0\0'u +\w'CORR\0\0'u +.sp +Leap YEAR MONTH DAY HH:MM:SS CORR R/S +.sp +For example: +.ti +.5i +.sp +Leap 2016 Dec 31 23:59:60 + S +.sp +.fi +The +.BR YEAR , +.BR MONTH , +.BR DAY , +and +.B HH:MM:SS +fields tell when the leap second happened. +The +.B CORR +field +should be +.q "+" +if a second was added +or +.q "\*-" +if a second was skipped. +The +.B R/S +field +should be (an abbreviation of) +.q "Stationary" +if the leap second time given by the other fields should be interpreted as UTC +or +(an abbreviation of) +.q "Rolling" +if the leap second time given by the other fields should be interpreted as +local (wall clock) time. +.PP +Rolling leap seconds were implemented back when it was not +clear whether common practice was rolling or stationary, +with concerns that one would see +Times Square ball drops where there'd be a +.q "3... 2... 1... leap... Happy New Year" +countdown, placing the leap second at +midnight New York time rather than midnight UTC. +However, this countdown style does not seem to have caught on, +which means rolling leap seconds are not used in practice; +also, they are not supported if the +.B \*-r +option is used. +.PP +The expiration line, if present, has the form: +.nf +.ti +.5i +.ta \w'Expires\0\0'u +\w'YEAR\0\0'u +\w'MONTH\0\0'u +\w'DAY\0\0'u +.sp +Expires YEAR MONTH DAY HH:MM:SS +.sp +For example: +.ti +.5i +.sp +Expires 2020 Dec 28 00:00:00 +.sp +.fi +The +.BR YEAR , +.BR MONTH , +.BR DAY , +and +.B HH:MM:SS +fields give the expiration timestamp in UTC for the leap second table. +.br +.ne 22 +.SH "EXTENDED EXAMPLE" +Here is an extended example of +.B zic +input, intended to illustrate many of its features. +.nf +.in +2m +.ta \w'# Rule\0\0'u +\w'NAME\0\0'u +\w'FROM\0\0'u +\w'1973\0\0'u +\w'\*-\0\0'u +\w'Apr\0\0'u +\w'lastSun\0\0'u +\w'2:00\0\0'u +\w'SAVE\0\0'u +.sp +# Rule NAME FROM TO \*- IN ON AT SAVE LETTER/S +Rule Swiss 1941 1942 \*- May Mon>=1 1:00 1:00 S +Rule Swiss 1941 1942 \*- Oct Mon>=1 2:00 0 \*- +.sp .5 +Rule EU 1977 1980 \*- Apr Sun>=1 1:00u 1:00 S +Rule EU 1977 only \*- Sep lastSun 1:00u 0 \*- +Rule EU 1978 only \*- Oct 1 1:00u 0 \*- +Rule EU 1979 1995 \*- Sep lastSun 1:00u 0 \*- +Rule EU 1981 max \*- Mar lastSun 1:00u 1:00 S +Rule EU 1996 max \*- Oct lastSun 1:00u 0 \*- +.sp +.ta \w'# Zone\0\0'u +\w'Europe/Zurich\0\0'u +\w'0:29:45.50\0\0'u +\w'RULES\0\0'u +\w'FORMAT\0\0'u +# Zone NAME STDOFF RULES FORMAT [UNTIL] +Zone Europe/Zurich 0:34:08 \*- LMT 1853 Jul 16 + 0:29:45.50 \*- BMT 1894 Jun + 1:00 Swiss CE%sT 1981 + 1:00 EU CE%sT +.sp +Link Europe/Zurich Europe/Vaduz +.sp +.in +.fi +In this example, the EU rules are for the European Union +and for its predecessor organization, the European Communities. +The timezone is named Europe/Zurich and it has the alias Europe/Vaduz. +This example says that Zurich was 34 minutes and 8 +seconds east of UT until 1853-07-16 at 00:00, when the legal offset +was changed to +7\*d\*_26\*m\*_22.50\*s, +which works out to 0:29:45.50; +.B zic +treats this by rounding it to 0:29:46. +After 1894-06-01 at 00:00 the UT offset became one hour +and Swiss daylight saving rules (defined with lines beginning with +.q "Rule Swiss") +apply. From 1981 to the present, EU daylight saving rules have +applied, and the UTC offset has remained at one hour. +.PP +In 1941 and 1942, daylight saving time applied from the first Monday +in May at 01:00 to the first Monday in October at 02:00. +The pre-1981 EU daylight-saving rules have no effect +here, but are included for completeness. Since 1981, daylight +saving has begun on the last Sunday in March at 01:00 UTC. +Until 1995 it ended the last Sunday in September at 01:00 UTC, +but this changed to the last Sunday in October starting in 1996. +.PP +For purposes of display, +.q "LMT" +and +.q "BMT" +were initially used, respectively. Since +Swiss rules and later EU rules were applied, the time zone abbreviation +has been CET for standard time and CEST for daylight saving +time. +.SH FILES +.TP +.I /etc/localtime +Default local timezone file. +.TP +.I /usr/share/zoneinfo +Default timezone information directory. +.SH NOTES +For areas with more than two types of local time, +you may need to use local standard time in the +.B AT +field of the earliest transition time's rule to ensure that +the earliest transition time recorded in the compiled file is correct. +.PP +If, +for a particular timezone, +a clock advance caused by the start of daylight saving +coincides with and is equal to +a clock retreat caused by a change in UT offset, +.B zic +produces a single transition to daylight saving at the new UT offset +without any change in local (wall clock) time. +To get separate transitions +use multiple zone continuation lines +specifying transition instants using universal time. +.SH SEE ALSO +.BR tzfile (5), +.BR zdump (8) diff --git a/lib-tzcode/zic.8.txt b/lib-tzcode/zic.8.txt new file mode 100644 index 0000000..cf86f58 --- /dev/null +++ b/lib-tzcode/zic.8.txt @@ -0,0 +1,517 @@ +zic(8) System Manager's Manual zic(8) + +NAME + zic - timezone compiler + +SYNOPSIS + zic [ option ... ] [ filename ... ] + +DESCRIPTION + The zic program reads text from the file(s) named on the command line + and creates the timezone information format (TZif) files specified in + this input. If a filename is "-", standard input is read. + +OPTIONS + --version + Output version information and exit. + + --help Output short usage message and exit. + + -b bloat + Output backward-compatibility data as specified by bloat. If + bloat is fat, generate additional data entries that work around + potential bugs or incompatibilities in older software, such as + software that mishandles the 64-bit generated data. If bloat is + slim, keep the output files small; this can help check for the + bugs and incompatibilities. The default is slim, as software + that mishandles 64-bit data typically mishandles timestamps + after the year 2038 anyway. Also see the -r option for another + way to alter output size. + + -d directory + Create time conversion information files in the named directory + rather than in the standard directory named below. + + -l timezone + Use timezone as local time. zic will act as if the input + contained a link line of the form + + Link timezone localtime + + If timezone is -, any already-existing link is removed. + + -L leapsecondfilename + Read leap second information from the file with the given name. + If this option is not used, no leap second information appears + in output files. + + -p timezone + Use timezone's rules when handling nonstandard TZ strings like + "EET-2EEST" that lack transition rules. zic will act as if the + input contained a link line of the form + + Link timezone posixrules + + Unless timezone is "-", this option is obsolete and poorly + supported. Among other things it should not be used for + timestamps after the year 2037, and it should not be combined + with -b slim if timezone's transitions are at standard time or + Universal Time (UT) instead of local time. + + If timezone is -, any already-existing link is removed. + + -r [@lo][/@hi] + Limit the applicability of output files to timestamps in the + range from lo (inclusive) to hi (exclusive), where lo and hi are + possibly signed decimal counts of seconds since the Epoch + (1970-01-01 00:00:00 UTC). Omitted counts default to extreme + values. The output files use UT offset 0 and abbreviation "-00" + in place of the omitted timestamp data. For example, "zic -r + @0" omits data intended for negative timestamps (i.e., before + the Epoch), and "zic -r @0/@2147483648" outputs data intended + only for nonnegative timestamps that fit into 31-bit signed + integers. On platforms with GNU date, "zic -r @$(date +%s)" + omits data intended for past timestamps. Although this option + typically reduces the output file's size, the size can increase + due to the need to represent the timestamp range boundaries, + particularly if hi causes a TZif file to contain explicit + entries for pre-hi transitions rather than concisely + representing them with an extended POSIX TZ string. Also see + the -b slim option for another way to shrink output size. + + -R @hi Generate redundant trailing explicit transitions for timestamps + that occur less than hi seconds since the Epoch, even though the + transitions could be more concisely represented via the extended + POSIX TZ string. This option does not affect the represented + timestamps. Although it accommodates nonstandard TZif readers + that ignore the extended POSIX TZ string, it increases the size + of the altered output files. + + -t file + When creating local time information, put the configuration link + in the named file rather than in the standard location. + + -v Be more verbose, and complain about the following situations: + + The input specifies a link to a link, something not supported by + some older parsers, including zic itself through release 2022e. + + A year that appears in a data file is outside the range of + representable years. + + A time of 24:00 or more appears in the input. Pre-1998 versions + of zic prohibit 24:00, and pre-2007 versions prohibit times + greater than 24:00. + + A rule goes past the start or end of the month. Pre-2004 + versions of zic prohibit this. + + A time zone abbreviation uses a %z format. Pre-2015 versions of + zic do not support this. + + A timestamp contains fractional seconds. Pre-2018 versions of + zic do not support this. + + The input contains abbreviations that are mishandled by pre-2018 + versions of zic due to a longstanding coding bug. These + abbreviations include "L" for "Link", "mi" for "min", "Sa" for + "Sat", and "Su" for "Sun". + + The output file does not contain all the information about the + long-term future of a timezone, because the future cannot be + summarized as an extended POSIX TZ string. For example, as of + 2023 this problem occurs for Morocco's daylight-saving rules, as + these rules are based on predictions for when Ramadan will be + observed, something that an extended POSIX TZ string cannot + represent. + + The output contains data that may not be handled properly by + client code designed for older zic output formats. These + compatibility issues affect only timestamps before 1970 or after + the start of 2038. + + The output contains a truncated leap second table, which can + cause some older TZif readers to misbehave. This can occur if + the -L option is used, and either an Expires line is present or + the -r option is also used. + + The output file contains more than 1200 transitions, which may + be mishandled by some clients. The current reference client + supports at most 2000 transitions; pre-2014 versions of the + reference client support at most 1200 transitions. + + A time zone abbreviation has fewer than 3 or more than 6 + characters. POSIX requires at least 3, and requires + implementations to support at least 6. + + An output file name contains a byte that is not an ASCII letter, + "-", "/", or "_"; or it contains a file name component that + contains more than 14 bytes or that starts with "-". + +FILES + Input files use the format described in this section; output files use + tzfile(5) format. + + Input files should be text files, that is, they should be a series of + zero or more lines, each ending in a newline byte and containing at + most 2048 bytes counting the newline, and without any NUL bytes. The + input text's encoding is typically UTF-8 or ASCII; it should have a + unibyte representation for the POSIX Portable Character Set (PPCS) + and the encoding's non-unibyte characters should consist + entirely of non-PPCS bytes. Non-PPCS characters typically occur only + in comments: although output file names and time zone abbreviations can + contain nearly any character, other software will work better if these + are limited to the restricted syntax described under the -v option. + + Input lines are made up of fields. Fields are separated from one + another by one or more white space characters. The white space + characters are space, form feed, carriage return, newline, tab, and + vertical tab. Leading and trailing white space on input lines is + ignored. An unquoted sharp character (#) in the input introduces a + comment which extends to the end of the line the sharp character + appears on. White space characters and sharp characters may be + enclosed in double quotes (") if they're to be used as part of a field. + Any line that is blank (after comment stripping) is ignored. Nonblank + lines are expected to be of one of three types: rule lines, zone lines, + and link lines. + + Names must be in English and are case insensitive. They appear in + several contexts, and include month and weekday names and keywords such + as maximum, only, Rolling, and Zone. A name can be abbreviated by + omitting all but an initial prefix; any abbreviation must be + unambiguous in context. + + A rule line has the form + + Rule NAME FROM TO - IN ON AT SAVE LETTER/S + + For example: + + Rule US 1967 1973 - Apr lastSun 2:00w 1:00d D + + The fields that make up a rule line are: + + NAME Gives the name of the rule set that contains this line. The + name must start with a character that is neither an ASCII digit + nor "-" nor "+". To allow for future extensions, an unquoted + name should not contain characters from the set + "!$%&'()*,/:;<=>?@[\]^`{|}~". + + FROM Gives the first year in which the rule applies. Any signed + integer year can be supplied; the proleptic Gregorian calendar + is assumed, with year 0 preceding year 1. The word minimum (or + an abbreviation) means the indefinite past. The word maximum + (or an abbreviation) means the indefinite future. Rules can + describe times that are not representable as time values, with + the unrepresentable times ignored; this allows rules to be + portable among hosts with differing time value types. + + TO Gives the final year in which the rule applies. In addition to + minimum and maximum (as above), the word only (or an + abbreviation) may be used to repeat the value of the FROM + field. + + - Is a reserved field and should always contain "-" for + compatibility with older versions of zic. It was previously + known as the TYPE field, which could contain values to allow a + separate script to further restrict in which "types" of years + the rule would apply. + + IN Names the month in which the rule takes effect. Month names + may be abbreviated. + + ON Gives the day on which the rule takes effect. Recognized forms + include: + + 5 the fifth of the month + lastSun the last Sunday in the month + lastMon the last Monday in the month + Sun>=8 first Sunday on or after the eighth + Sun<=25 last Sunday on or before the 25th + + A weekday name (e.g., Sunday) or a weekday name preceded by + "last" (e.g., lastSunday) may be abbreviated or spelled out in + full. There must be no white space characters within the ON + field. The "<=" and ">=" constructs can result in a day in the + neighboring month; for example, the IN-ON combination "Oct + Sun>=31" stands for the first Sunday on or after October 31, + even if that Sunday occurs in November. + + AT Gives the time of day at which the rule takes effect, relative + to 00:00, the start of a calendar day. Recognized forms + include: + + 2 time in hours + 2:00 time in hours and minutes + 01:28:14 time in hours, minutes, and seconds + 00:19:32.13 time with fractional seconds + 12:00 midday, 12 hours after 00:00 + 15:00 3 PM, 15 hours after 00:00 + 24:00 end of day, 24 hours after 00:00 + 260:00 260 hours after 00:00 + -2:30 2.5 hours before 00:00 + - equivalent to 0 + + Although zic rounds times to the nearest integer second + (breaking ties to the even integer), the fractions may be + useful to other applications requiring greater precision. The + source format does not specify any maximum precision. Any of + these forms may be followed by the letter w if the given time + is local or "wall clock" time, s if the given time is standard + time without any adjustment for daylight saving, or u (or g or + z) if the given time is universal time; in the absence of an + indicator, local (wall clock) time is assumed. These forms + ignore leap seconds; for example, if a leap second occurs at + 00:59:60 local time, "1:00" stands for 3601 seconds after local + midnight instead of the usual 3600 seconds. The intent is that + a rule line describes the instants when a clock/calendar set to + the type of time specified in the AT field would show the + specified date and time of day. + + SAVE Gives the amount of time to be added to local standard time + when the rule is in effect, and whether the resulting time is + standard or daylight saving. This field has the same format as + the AT field except with a different set of suffix letters: s + for standard time and d for daylight saving time. The suffix + letter is typically omitted, and defaults to s if the offset is + zero and to d otherwise. Negative offsets are allowed; in + Ireland, for example, daylight saving time is observed in + winter and has a negative offset relative to Irish Standard + Time. The offset is merely added to standard time; for + example, zic does not distinguish a 10:30 standard time plus an + 0:30 SAVE from a 10:00 standard time plus a 1:00 SAVE. + + LETTER/S + Gives the "variable part" (for example, the "S" or "D" in "EST" + or "EDT") of time zone abbreviations to be used when this rule + is in effect. If this field is "-", the variable part is null. + + A zone line has the form + + Zone NAME STDOFF RULES FORMAT [UNTIL] + + For example: + + Zone Asia/Amman 2:00 Jordan EE%sT 2017 Oct 27 01:00 + + The fields that make up a zone line are: + + NAME The name of the timezone. This is the name used in creating the + time conversion information file for the timezone. It should not + contain a file name component "." or ".."; a file name component + is a maximal substring that does not contain "/". + + STDOFF + The amount of time to add to UT to get standard time, without any + adjustment for daylight saving. This field has the same format + as the AT and SAVE fields of rule lines, except without suffix + letters; begin the field with a minus sign if time must be + subtracted from UT. + + RULES The name of the rules that apply in the timezone or, + alternatively, a field in the same format as a rule-line SAVE + column, giving the amount of time to be added to local standard + time and whether the resulting time is standard or daylight + saving. If this field is - then standard time always applies. + When an amount of time is given, only the sum of standard time + and this amount matters. + + FORMAT + The format for time zone abbreviations. The pair of characters + %s is used to show where the "variable part" of the time zone + abbreviation goes. Alternatively, a format can use the pair of + characters %z to stand for the UT offset in the form +-hh, + +-hhmm, or +-hhmmss, using the shortest form that does not lose + information, where hh, mm, and ss are the hours, minutes, and + seconds east (+) or west (-) of UT. Alternatively, a slash (/) + separates standard and daylight abbreviations. To conform to + POSIX, a time zone abbreviation should contain only alphanumeric + ASCII characters, "+" and "-". By convention, the time zone + abbreviation "-00" is a placeholder that means local time is + unspecified. + + UNTIL The time at which the UT offset or the rule(s) change for a + location. It takes the form of one to four fields YEAR [MONTH + [DAY [TIME]]]. If this is specified, the time zone information + is generated from the given UT offset and rule change until the + time specified, which is interpreted using the rules in effect + just before the transition. The month, day, and time of day have + the same format as the IN, ON, and AT fields of a rule; trailing + fields can be omitted, and default to the earliest possible value + for the missing fields. + + The next line must be a "continuation" line; this has the same + form as a zone line except that the string "Zone" and the name + are omitted, as the continuation line will place information + starting at the time specified as the "until" information in the + previous line in the file used by the previous line. + Continuation lines may contain "until" information, just as zone + lines do, indicating that the next line is a further + continuation. + + If a zone changes at the same instant that a rule would otherwise take + effect in the earlier zone or continuation line, the rule is ignored. + A zone or continuation line L with a named rule set starts with + standard time by default: that is, any of L's timestamps preceding L's + earliest rule use the rule in effect after L's first transition into + standard time. In a single zone it is an error if two rules take + effect at the same instant, or if two zone changes take effect at the + same instant. + + If a continuation line subtracts N seconds from the UT offset after a + transition that would be interpreted to be later if using the + continuation line's UT offset and rules, the "until" time of the + previous zone or continuation line is interpreted according to the + continuation line's UT offset and rules, and any rule that would + otherwise take effect in the next N seconds is instead assumed to take + effect simultaneously. For example: + + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S + Rule US 1967 2006 - Oct lastSun 2:00 0 S + Rule US 1967 1973 - Apr lastSun 2:00 1:00 D + # Zone NAME STDOFF RULES FORMAT [UNTIL] + Zone America/Menominee -5:00 - EST 1973 Apr 29 2:00 + -6:00 US C%sT + + Here, an incorrect reading would be there were two clock changes on + 1973-04-29, the first from 02:00 EST (-05) to 01:00 CST (-06), and the + second an hour later from 02:00 CST (-06) to 03:00 CDT (-05). However, + zic interprets this more sensibly as a single transition from 02:00 CST + (-05) to 02:00 CDT (-05). + + A link line has the form + + Link TARGET LINK-NAME + + For example: + + Link Europe/Istanbul Asia/Istanbul + + The TARGET field should appear as the NAME field in some zone line or + as the LINK-NAME field in some link line. The LINK-NAME field is used + as an alternative name for that zone; it has the same syntax as a zone + line's NAME field. Links can chain together, although the behavior is + unspecified if a chain of one or more links does not terminate in a + Zone name. A link line can appear before the line that defines the + link target. For example: + + Link Greenwich G_M_T + Link Etc/GMT Greenwich + Zone Etc/GMT 0 - GMT + + The two links are chained together, and G_M_T, Greenwich, and Etc/GMT + all name the same zone. + + Except for continuation lines, lines may appear in any order in the + input. However, the behavior is unspecified if multiple zone or link + lines define the same name. + + The file that describes leap seconds can have leap lines and an + expiration line. Leap lines have the following form: + + Leap YEAR MONTH DAY HH:MM:SS CORR R/S + + For example: + + Leap 2016 Dec 31 23:59:60 + S + + The YEAR, MONTH, DAY, and HH:MM:SS fields tell when the leap second + happened. The CORR field should be "+" if a second was added or "-" if + a second was skipped. The R/S field should be (an abbreviation of) + "Stationary" if the leap second time given by the other fields should + be interpreted as UTC or (an abbreviation of) "Rolling" if the leap + second time given by the other fields should be interpreted as local + (wall clock) time. + + Rolling leap seconds were implemented back when it was not clear + whether common practice was rolling or stationary, with concerns that + one would see Times Square ball drops where there'd be a "3... 2... + 1... leap... Happy New Year" countdown, placing the leap second at + midnight New York time rather than midnight UTC. However, this + countdown style does not seem to have caught on, which means rolling + leap seconds are not used in practice; also, they are not supported if + the -r option is used. + + The expiration line, if present, has the form: + + Expires YEAR MONTH DAY HH:MM:SS + + For example: + + Expires 2020 Dec 28 00:00:00 + + The YEAR, MONTH, DAY, and HH:MM:SS fields give the expiration timestamp + in UTC for the leap second table. + +EXTENDED EXAMPLE + Here is an extended example of zic input, intended to illustrate many + of its features. + + # Rule NAME FROM TO - IN ON AT SAVE LETTER/S + Rule Swiss 1941 1942 - May Mon>=1 1:00 1:00 S + Rule Swiss 1941 1942 - Oct Mon>=1 2:00 0 - + Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S + Rule EU 1977 only - Sep lastSun 1:00u 0 - + Rule EU 1978 only - Oct 1 1:00u 0 - + Rule EU 1979 1995 - Sep lastSun 1:00u 0 - + Rule EU 1981 max - Mar lastSun 1:00u 1:00 S + Rule EU 1996 max - Oct lastSun 1:00u 0 - + + # Zone NAME STDOFF RULES FORMAT [UNTIL] + Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 + 0:29:45.50 - BMT 1894 Jun + 1:00 Swiss CE%sT 1981 + 1:00 EU CE%sT + + Link Europe/Zurich Europe/Vaduz + + In this example, the EU rules are for the European Union and for its + predecessor organization, the European Communities. The timezone is + named Europe/Zurich and it has the alias Europe/Vaduz. This example + says that Zurich was 34 minutes and 8 seconds east of UT until + 1853-07-16 at 00:00, when the legal offset was changed to 7 degrees 26 + minutes 22.50 seconds, which works out to 0:29:45.50; zic treats this + by rounding it to 0:29:46. After 1894-06-01 at 00:00 the UT offset + became one hour and Swiss daylight saving rules (defined with lines + beginning with "Rule Swiss") apply. From 1981 to the present, EU + daylight saving rules have applied, and the UTC offset has remained at + one hour. + + In 1941 and 1942, daylight saving time applied from the first Monday in + May at 01:00 to the first Monday in October at 02:00. The pre-1981 EU + daylight-saving rules have no effect here, but are included for + completeness. Since 1981, daylight saving has begun on the last Sunday + in March at 01:00 UTC. Until 1995 it ended the last Sunday in + September at 01:00 UTC, but this changed to the last Sunday in October + starting in 1996. + + For purposes of display, "LMT" and "BMT" were initially used, + respectively. Since Swiss rules and later EU rules were applied, the + time zone abbreviation has been CET for standard time and CEST for + daylight saving time. + +FILES + /etc/localtime + Default local timezone file. + + /usr/share/zoneinfo + Default timezone information directory. + +NOTES + For areas with more than two types of local time, you may need to use + local standard time in the AT field of the earliest transition time's + rule to ensure that the earliest transition time recorded in the + compiled file is correct. + + If, for a particular timezone, a clock advance caused by the start of + daylight saving coincides with and is equal to a clock retreat caused + by a change in UT offset, zic produces a single transition to daylight + saving at the new UT offset without any change in local (wall clock) + time. To get separate transitions use multiple zone continuation lines + specifying transition instants using universal time. + +SEE ALSO + tzfile(5), zdump(8) + +Time Zone Database zic(8) diff --git a/lib-tzcode/zic.c b/lib-tzcode/zic.c new file mode 100644 index 0000000..5518051 --- /dev/null +++ b/lib-tzcode/zic.c @@ -0,0 +1,3931 @@ +/* Compile .zi time zone data into TZif binary files. */ + +/* +** This file is in the public domain, so clarified as of +** 2006-07-17 by Arthur David Olson. +*/ + +/* Use the system 'time' function, instead of any private replacement. + This avoids creating an unnecessary dependency on localtime.c. */ +#undef EPOCH_LOCAL +#undef EPOCH_OFFSET +#undef RESERVE_STD_EXT_IDS +#undef time_tz + +#include "version.h" +#include "private.h" +#include "tzfile.h" + +#include +#include +#include +#include +#include + +typedef int_fast64_t zic_t; +static zic_t const + ZIC_MIN = INT_FAST64_MIN, + ZIC_MAX = INT_FAST64_MAX, + ZIC32_MIN = -1 - (zic_t) 0x7fffffff, + ZIC32_MAX = 0x7fffffff; +#define SCNdZIC SCNdFAST64 + +#ifndef ZIC_MAX_ABBR_LEN_WO_WARN +# define ZIC_MAX_ABBR_LEN_WO_WARN 6 +#endif /* !defined ZIC_MAX_ABBR_LEN_WO_WARN */ + +/* An upper bound on how much a format might grow due to concatenation. */ +enum { FORMAT_LEN_GROWTH_BOUND = 5 }; + +#ifdef HAVE_DIRECT_H +# include +# include +# undef mkdir +# define mkdir(name, mode) _mkdir(name) +#endif + +#ifndef HAVE_GETRANDOM +# ifdef __has_include +# if __has_include() +# include +# endif +# elif 2 < __GLIBC__ + (25 <= __GLIBC_MINOR__) +# include +# endif +# define HAVE_GETRANDOM GRND_RANDOM +#elif HAVE_GETRANDOM +# include +#endif + +#if HAVE_SYS_STAT_H +# include +#endif +#ifdef S_IRUSR +# define MKDIR_UMASK (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) +#else +# define MKDIR_UMASK 0755 +#endif + +/* The minimum alignment of a type, for pre-C23 platforms. + The __SUNPRO_C test is because Oracle Developer Studio 12.6 lacks + even though __STDC_VERSION__ == 201112. */ +#if __STDC_VERSION__ < 201112 || defined __SUNPRO_C +# define alignof(type) offsetof(struct { char a; type b; }, b) +#elif __STDC_VERSION__ < 202311 +# include +#endif + +/* The maximum length of a text line, including the trailing newline. */ +#ifndef _POSIX2_LINE_MAX +# define _POSIX2_LINE_MAX 2048 +#endif + +/* The type for line numbers. Use PRIdMAX to format them; formerly + there was also "#define PRIdLINENO PRIdMAX" and formats used + PRIdLINENO, but xgettext cannot grok that. */ +typedef intmax_t lineno; + +struct rule { + int r_filenum; + lineno r_linenum; + const char * r_name; + + zic_t r_loyear; /* for example, 1986 */ + zic_t r_hiyear; /* for example, 1986 */ + bool r_lowasnum; + bool r_hiwasnum; + + int r_month; /* 0..11 */ + + int r_dycode; /* see below */ + int r_dayofmonth; + int r_wday; + + zic_t r_tod; /* time from midnight */ + bool r_todisstd; /* is r_tod standard time? */ + bool r_todisut; /* is r_tod UT? */ + bool r_isdst; /* is this daylight saving time? */ + zic_t r_save; /* offset from standard time */ + const char * r_abbrvar; /* variable part of abbreviation */ + + bool r_todo; /* a rule to do (used in outzone) */ + zic_t r_temp; /* used in outzone */ +}; + +/* +** r_dycode r_dayofmonth r_wday +*/ +enum { + DC_DOM, /* 1..31 */ /* unused */ + DC_DOWGEQ, /* 1..31 */ /* 0..6 (Sun..Sat) */ + DC_DOWLEQ /* 1..31 */ /* 0..6 (Sun..Sat) */ +}; + +struct zone { + int z_filenum; + lineno z_linenum; + + const char * z_name; + zic_t z_stdoff; + char * z_rule; + const char * z_format; + char z_format_specifier; + + bool z_isdst; + zic_t z_save; + + struct rule * z_rules; + ptrdiff_t z_nrules; + + struct rule z_untilrule; + zic_t z_untiltime; +}; + +#if !HAVE_POSIX_DECLS +extern int getopt(int argc, char * const argv[], + const char * options); +extern int link(const char * target, const char * linkname); +extern char * optarg; +extern int optind; +#endif + +#if ! HAVE_SYMLINK +static ssize_t +readlink(char const *restrict file, char *restrict buf, size_t size) +{ + errno = ENOTSUP; + return -1; +} +static int +symlink(char const *target, char const *linkname) +{ + errno = ENOTSUP; + return -1; +} +#endif +#ifndef AT_SYMLINK_FOLLOW +# if HAVE_LINK +# define linkat(targetdir, target, linknamedir, linkname, flag) \ + (itssymlink(target) ? (errno = ENOTSUP, -1) : link(target, linkname)) +# else +# define linkat(targetdir, target, linknamedir, linkname, flag) \ + (errno = ENOTSUP, -1) +# endif +#endif + +static void addtt(zic_t starttime, int type); +static int addtype(zic_t, char const *, bool, bool, bool); +static void leapadd(zic_t, int, int); +static void adjleap(void); +static void associate(void); +static void dolink(const char *, const char *, bool); +static int getfields(char *, char **, int); +static zic_t gethms(const char * string, const char * errstring); +static zic_t getsave(char *, bool *); +static void inexpires(char **, int); +static void infile(int, char const *); +static void inleap(char ** fields, int nfields); +static void inlink(char ** fields, int nfields); +static void inrule(char ** fields, int nfields); +static bool inzcont(char ** fields, int nfields); +static bool inzone(char ** fields, int nfields); +static bool inzsub(char **, int, bool); +static bool itssymlink(char const *); +static bool is_alpha(char a); +static char lowerit(char); +static void mkdirs(char const *, bool); +static void newabbr(const char * abbr); +static zic_t oadd(zic_t t1, zic_t t2); +static void outzone(const struct zone * zp, ptrdiff_t ntzones); +static zic_t rpytime(const struct rule * rp, zic_t wantedy); +static bool rulesub(struct rule * rp, + const char * loyearp, const char * hiyearp, + const char * typep, const char * monthp, + const char * dayp, const char * timep); +static zic_t tadd(zic_t t1, zic_t t2); + +/* Bound on length of what %z can expand to. */ +enum { PERCENT_Z_LEN_BOUND = sizeof "+995959" - 1 }; + +static int charcnt; +static bool errors; +static bool warnings; +static int filenum; +static int leapcnt; +static bool leapseen; +static zic_t leapminyear; +static zic_t leapmaxyear; +static lineno linenum; +static int max_abbrvar_len = PERCENT_Z_LEN_BOUND; +static int max_format_len; +static zic_t max_year; +static zic_t min_year; +static bool noise; +static int rfilenum; +static lineno rlinenum; +static const char * progname; +static char const * leapsec; +static char *const * main_argv; +static ptrdiff_t timecnt; +static ptrdiff_t timecnt_alloc; +static int typecnt; +static int unspecifiedtype; + +/* +** Line codes. +*/ + +enum { + LC_RULE, + LC_ZONE, + LC_LINK, + LC_LEAP, + LC_EXPIRES +}; + +/* +** Which fields are which on a Zone line. +*/ + +enum { + ZF_NAME = 1, + ZF_STDOFF, + ZF_RULE, + ZF_FORMAT, + ZF_TILYEAR, + ZF_TILMONTH, + ZF_TILDAY, + ZF_TILTIME, + ZONE_MAXFIELDS, + ZONE_MINFIELDS = ZF_TILYEAR +}; + +/* +** Which fields are which on a Zone continuation line. +*/ + +enum { + ZFC_STDOFF, + ZFC_RULE, + ZFC_FORMAT, + ZFC_TILYEAR, + ZFC_TILMONTH, + ZFC_TILDAY, + ZFC_TILTIME, + ZONEC_MAXFIELDS, + ZONEC_MINFIELDS = ZFC_TILYEAR +}; + +/* +** Which files are which on a Rule line. +*/ + +enum { + RF_NAME = 1, + RF_LOYEAR, + RF_HIYEAR, + RF_COMMAND, + RF_MONTH, + RF_DAY, + RF_TOD, + RF_SAVE, + RF_ABBRVAR, + RULE_FIELDS +}; + +/* +** Which fields are which on a Link line. +*/ + +enum { + LF_TARGET = 1, + LF_LINKNAME, + LINK_FIELDS +}; + +/* +** Which fields are which on a Leap line. +*/ + +enum { + LP_YEAR = 1, + LP_MONTH, + LP_DAY, + LP_TIME, + LP_CORR, + LP_ROLL, + LEAP_FIELDS, + + /* Expires lines are like Leap lines, except without CORR and ROLL fields. */ + EXPIRES_FIELDS = LP_TIME + 1 +}; + +/* The maximum number of fields on any of the above lines. + (The "+"s pacify gcc -Wenum-compare.) */ +enum { + MAX_FIELDS = max(max(+RULE_FIELDS, +LINK_FIELDS), + max(+LEAP_FIELDS, +EXPIRES_FIELDS)) +}; + +/* +** Year synonyms. +*/ + +enum { + YR_MINIMUM, + YR_MAXIMUM, + YR_ONLY +}; + +static struct rule * rules; +static ptrdiff_t nrules; /* number of rules */ +static ptrdiff_t nrules_alloc; + +static struct zone * zones; +static ptrdiff_t nzones; /* number of zones */ +static ptrdiff_t nzones_alloc; + +struct link { + int l_filenum; + lineno l_linenum; + const char * l_target; + const char * l_linkname; +}; + +static struct link * links; +static ptrdiff_t nlinks; +static ptrdiff_t nlinks_alloc; + +struct lookup { + const char * l_word; + const int l_value; +}; + +static struct lookup const * byword(const char * string, + const struct lookup * lp); + +static struct lookup const zi_line_codes[] = { + { "Rule", LC_RULE }, + { "Zone", LC_ZONE }, + { "Link", LC_LINK }, + { NULL, 0 } +}; +static struct lookup const leap_line_codes[] = { + { "Leap", LC_LEAP }, + { "Expires", LC_EXPIRES }, + { NULL, 0} +}; + +static struct lookup const mon_names[] = { + { "January", TM_JANUARY }, + { "February", TM_FEBRUARY }, + { "March", TM_MARCH }, + { "April", TM_APRIL }, + { "May", TM_MAY }, + { "June", TM_JUNE }, + { "July", TM_JULY }, + { "August", TM_AUGUST }, + { "September", TM_SEPTEMBER }, + { "October", TM_OCTOBER }, + { "November", TM_NOVEMBER }, + { "December", TM_DECEMBER }, + { NULL, 0 } +}; + +static struct lookup const wday_names[] = { + { "Sunday", TM_SUNDAY }, + { "Monday", TM_MONDAY }, + { "Tuesday", TM_TUESDAY }, + { "Wednesday", TM_WEDNESDAY }, + { "Thursday", TM_THURSDAY }, + { "Friday", TM_FRIDAY }, + { "Saturday", TM_SATURDAY }, + { NULL, 0 } +}; + +static struct lookup const lasts[] = { + { "last-Sunday", TM_SUNDAY }, + { "last-Monday", TM_MONDAY }, + { "last-Tuesday", TM_TUESDAY }, + { "last-Wednesday", TM_WEDNESDAY }, + { "last-Thursday", TM_THURSDAY }, + { "last-Friday", TM_FRIDAY }, + { "last-Saturday", TM_SATURDAY }, + { NULL, 0 } +}; + +static struct lookup const begin_years[] = { + { "minimum", YR_MINIMUM }, + { "maximum", YR_MAXIMUM }, + { NULL, 0 } +}; + +static struct lookup const end_years[] = { + { "minimum", YR_MINIMUM }, + { "maximum", YR_MAXIMUM }, + { "only", YR_ONLY }, + { NULL, 0 } +}; + +static struct lookup const leap_types[] = { + { "Rolling", true }, + { "Stationary", false }, + { NULL, 0 } +}; + +static const int len_months[2][MONSPERYEAR] = { + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, + { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } +}; + +static const int len_years[2] = { + DAYSPERNYEAR, DAYSPERLYEAR +}; + +static struct attype { + zic_t at; + bool dontmerge; + unsigned char type; +} * attypes; +static zic_t utoffs[TZ_MAX_TYPES]; +static char isdsts[TZ_MAX_TYPES]; +static unsigned char desigidx[TZ_MAX_TYPES]; +static bool ttisstds[TZ_MAX_TYPES]; +static bool ttisuts[TZ_MAX_TYPES]; +static char chars[TZ_MAX_CHARS]; +static zic_t trans[TZ_MAX_LEAPS]; +static zic_t corr[TZ_MAX_LEAPS]; +static char roll[TZ_MAX_LEAPS]; + +/* +** Memory allocation. +*/ + +ATTRIBUTE_NORETURN static void +memory_exhausted(const char *msg) +{ + fprintf(stderr, _("%s: Memory exhausted: %s\n"), progname, msg); + exit(EXIT_FAILURE); +} + +ATTRIBUTE_NORETURN static void +size_overflow(void) +{ + memory_exhausted(_("size overflow")); +} + +ATTRIBUTE_REPRODUCIBLE static ptrdiff_t +size_sum(size_t a, size_t b) +{ +#ifdef ckd_add + ptrdiff_t sum; + if (!ckd_add(&sum, a, b) && sum <= INDEX_MAX) + return sum; +#else + if (a <= INDEX_MAX && b <= INDEX_MAX - a) + return a + b; +#endif + size_overflow(); +} + +ATTRIBUTE_REPRODUCIBLE static ptrdiff_t +size_product(ptrdiff_t nitems, ptrdiff_t itemsize) +{ +#ifdef ckd_mul + ptrdiff_t product; + if (!ckd_mul(&product, nitems, itemsize) && product <= INDEX_MAX) + return product; +#else + ptrdiff_t nitems_max = INDEX_MAX / itemsize; + if (nitems <= nitems_max) + return nitems * itemsize; +#endif + size_overflow(); +} + +ATTRIBUTE_REPRODUCIBLE static ptrdiff_t +align_to(ptrdiff_t size, ptrdiff_t alignment) +{ + ptrdiff_t lo_bits = alignment - 1, sum = size_sum(size, lo_bits); + return sum & ~lo_bits; +} + +#if !HAVE_STRDUP +static char * +strdup(char const *str) +{ + char *result = malloc(strlen(str) + 1); + return result ? strcpy(result, str) : result; +} +#endif + +static void * +memcheck(void *ptr) +{ + if (ptr == NULL) + memory_exhausted(strerror(HAVE_MALLOC_ERRNO ? errno : ENOMEM)); + return ptr; +} + +ATTRIBUTE_MALLOC static void * +emalloc(size_t size) +{ + return memcheck(malloc(size)); +} + +static void * +erealloc(void *ptr, size_t size) +{ + return memcheck(realloc(ptr, size)); +} + +ATTRIBUTE_MALLOC static char * +estrdup(char const *str) +{ + return memcheck(strdup(str)); +} + +static ptrdiff_t +grow_nitems_alloc(ptrdiff_t *nitems_alloc, ptrdiff_t itemsize) +{ + ptrdiff_t addend = (*nitems_alloc >> 1) + 1; +#if defined ckd_add && defined ckd_mul + ptrdiff_t product; + if (!ckd_add(nitems_alloc, *nitems_alloc, addend) + && !ckd_mul(&product, *nitems_alloc, itemsize) && product <= INDEX_MAX) + return product; +#else + if (*nitems_alloc <= ((INDEX_MAX - 1) / 3 * 2) / itemsize) { + *nitems_alloc += addend; + return *nitems_alloc * itemsize; + } +#endif + memory_exhausted(_("integer overflow")); +} + +static void * +growalloc(void *ptr, ptrdiff_t itemsize, ptrdiff_t nitems, + ptrdiff_t *nitems_alloc) +{ + return (nitems < *nitems_alloc + ? ptr + : erealloc(ptr, grow_nitems_alloc(nitems_alloc, itemsize))); +} + +/* +** Error handling. +*/ + +/* In most of the code, an input file name is represented by its index + into the main argument vector, except that LEAPSEC_FILENUM stands + for leapsec and COMMAND_LINE_FILENUM stands for the command line. */ +enum { LEAPSEC_FILENUM = -2, COMMAND_LINE_FILENUM = -1 }; + +/* Return the name of the Ith input file, for diagnostics. */ +static char const * +filename(int i) +{ + if (i == COMMAND_LINE_FILENUM) + return _("command line"); + else { + char const *fname = i == LEAPSEC_FILENUM ? leapsec : main_argv[i]; + return strcmp(fname, "-") == 0 ? _("standard input") : fname; + } +} + +static void +eats(int fnum, lineno num, int rfnum, lineno rnum) +{ + filenum = fnum; + linenum = num; + rfilenum = rfnum; + rlinenum = rnum; +} + +static void +eat(int fnum, lineno num) +{ + eats(fnum, num, 0, -1); +} + +ATTRIBUTE_FORMAT((printf, 1, 0)) static void +verror(const char *const string, va_list args) +{ + /* + ** Match the format of "cc" to allow sh users to + ** zic ... 2>&1 | error -t "*" -v + ** on BSD systems. + */ + if (filenum) + fprintf(stderr, _("\"%s\", line %"PRIdMAX": "), + filename(filenum), linenum); + vfprintf(stderr, string, args); + if (rfilenum) + fprintf(stderr, _(" (rule from \"%s\", line %"PRIdMAX")"), + filename(rfilenum), rlinenum); + fprintf(stderr, "\n"); +} + +ATTRIBUTE_FORMAT((printf, 1, 2)) static void +error(const char *const string, ...) +{ + va_list args; + va_start(args, string); + verror(string, args); + va_end(args); + errors = true; +} + +ATTRIBUTE_FORMAT((printf, 1, 2)) static void +warning(const char *const string, ...) +{ + va_list args; + fprintf(stderr, _("warning: ")); + va_start(args, string); + verror(string, args); + va_end(args); + warnings = true; +} + +/* Close STREAM. If it had an I/O error, report it against DIR/NAME, + remove TEMPNAME if nonnull, and then exit. */ +static void +close_file(FILE *stream, char const *dir, char const *name, + char const *tempname) +{ + char const *e = (ferror(stream) ? _("I/O error") + : fclose(stream) != 0 ? strerror(errno) : NULL); + if (e) { + fprintf(stderr, "%s: %s%s%s%s%s\n", progname, + dir ? dir : "", dir ? "/" : "", + name ? name : "", name ? ": " : "", + e); + if (tempname) + remove(tempname); + exit(EXIT_FAILURE); + } +} + +ATTRIBUTE_NORETURN static void +usage(FILE *stream, int status) +{ + fprintf(stream, + _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n" + "\t[ -b {slim|fat} ] [ -d directory ] [ -l localtime ]" + " [ -L leapseconds ] \\\n" + "\t[ -p posixrules ] [ -r '[@lo][/@hi]' ] [ -R '@hi' ] \\\n" + "\t[ -t localtime-link ] \\\n" + "\t[ filename ... ]\n\n" + "Report bugs to %s.\n"), + progname, progname, REPORT_BUGS_TO); + if (status == EXIT_SUCCESS) + close_file(stream, NULL, NULL, NULL); + exit(status); +} + +/* Change the working directory to DIR, possibly creating DIR and its + ancestors. After this is done, all files are accessed with names + relative to DIR. */ +static void +change_directory(char const *dir) +{ + if (chdir(dir) != 0) { + int chdir_errno = errno; + if (chdir_errno == ENOENT) { + mkdirs(dir, false); + chdir_errno = chdir(dir) == 0 ? 0 : errno; + } + if (chdir_errno != 0) { + fprintf(stderr, _("%s: Can't chdir to %s: %s\n"), + progname, dir, strerror(chdir_errno)); + exit(EXIT_FAILURE); + } + } +} + +/* Compare the two links A and B, for a stable sort by link name. */ +static int +qsort_linkcmp(void const *a, void const *b) +{ + struct link const *l = a; + struct link const *m = b; + int cmp = strcmp(l->l_linkname, m->l_linkname); + if (cmp) + return cmp; + + /* The link names are the same. Make the sort stable by comparing + file numbers (where subtraction cannot overflow) and possibly + line numbers (where it can). */ + cmp = l->l_filenum - m->l_filenum; + if (cmp) + return cmp; + return (l->l_linenum > m->l_linenum) - (l->l_linenum < m->l_linenum); +} + +/* Compare the string KEY to the link B, for bsearch. */ +static int +bsearch_linkcmp(void const *key, void const *b) +{ + struct link const *m = b; + return strcmp(key, m->l_linkname); +} + +/* Make the links specified by the Link lines. */ +static void +make_links(void) +{ + ptrdiff_t i, j, nalinks, pass_size; + if (1 < nlinks) + qsort(links, nlinks, sizeof *links, qsort_linkcmp); + + /* Ignore each link superseded by a later link with the same name. */ + j = 0; + for (i = 0; i < nlinks; i++) { + while (i + 1 < nlinks + && strcmp(links[i].l_linkname, links[i + 1].l_linkname) == 0) + i++; + links[j++] = links[i]; + } + nlinks = pass_size = j; + + /* Walk through the link array making links. However, + if a link's target has not been made yet, append a copy to the + end of the array. The end of the array will gradually fill + up with a small sorted subsequence of not-yet-made links. + nalinks counts all the links in the array, including copies. + When we reach the copied subsequence, it may still contain + a link to a not-yet-made link, so the process repeats. + At any given point in time, the link array consists of the + following subregions, where 0 <= i <= j <= nalinks and + 0 <= nlinks <= nalinks: + + 0 .. (i - 1): + links that either have been made, or have been copied to a + later point point in the array (this later point can be in + any of the three subregions) + i .. (j - 1): + not-yet-made links for this pass + j .. (nalinks - 1): + not-yet-made links that this pass has skipped because + they were links to not-yet-made links + + The first subregion might not be sorted if nlinks < i; + the other two subregions are sorted. This algorithm does + not alter entries 0 .. (nlinks - 1), which remain sorted. + + If there are L links, this algorithm is O(C*L*log(L)) where + C is the length of the longest link chain. Usually C is + short (e.g., 3) though its worst-case value is L. */ + + j = nalinks = nlinks; + + for (i = 0; i < nalinks; i++) { + struct link *l; + + eat(links[i].l_filenum, links[i].l_linenum); + + /* If this pass examined all its links, start the next pass. */ + if (i == j) { + if (nalinks - i == pass_size) { + error(_("\"Link %s %s\" is part of a link cycle"), + links[i].l_target, links[i].l_linkname); + break; + } + j = nalinks; + pass_size = nalinks - i; + } + + /* Diagnose self links, which the cycle detection algorithm would not + otherwise catch. */ + if (strcmp(links[i].l_target, links[i].l_linkname) == 0) { + error(_("link %s targets itself"), links[i].l_target); + continue; + } + + /* Make this link unless its target has not been made yet. */ + l = bsearch(links[i].l_target, &links[i + 1], j - (i + 1), + sizeof *links, bsearch_linkcmp); + if (!l) + l = bsearch(links[i].l_target, &links[j], nalinks - j, + sizeof *links, bsearch_linkcmp); + if (!l) + dolink(links[i].l_target, links[i].l_linkname, false); + else { + /* The link target has not been made yet; copy the link to the end. */ + links = growalloc(links, sizeof *links, nalinks, &nlinks_alloc); + links[nalinks++] = links[i]; + } + + if (noise && i < nlinks) { + if (l) + warning(_("link %s targeting link %s mishandled by pre-2023 zic"), + links[i].l_linkname, links[i].l_target); + else if (bsearch(links[i].l_target, links, nlinks, sizeof *links, + bsearch_linkcmp)) + warning(_("link %s targeting link %s"), + links[i].l_linkname, links[i].l_target); + } + } +} + +/* Simple signal handling: just set a flag that is checked + periodically outside critical sections. To set up the handler, + prefer sigaction if available to close a signal race. */ + +static sig_atomic_t got_signal; + +static void +signal_handler(int sig) +{ +#ifndef SA_SIGINFO + signal(sig, signal_handler); +#endif + got_signal = sig; +} + +/* Arrange for SIGINT etc. to be caught by the handler. */ +static void +catch_signals(void) +{ + static int const signals[] = { +#ifdef SIGHUP + SIGHUP, +#endif + SIGINT, +#ifdef SIGPIPE + SIGPIPE, +#endif + SIGTERM + }; + int i; + for (i = 0; i < sizeof signals / sizeof signals[0]; i++) { +#ifdef SA_SIGINFO + struct sigaction act0, act; + act.sa_handler = signal_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(signals[i], &act, &act0) == 0 + && ! (act0.sa_flags & SA_SIGINFO) && act0.sa_handler == SIG_IGN) { + sigaction(signals[i], &act0, NULL); + got_signal = 0; + } +#else + if (signal(signals[i], signal_handler) == SIG_IGN) { + signal(signals[i], SIG_IGN); + got_signal = 0; + } +#endif + } +} + +/* If a signal has arrived, terminate zic with appropriate status. */ +static void +check_for_signal(void) +{ + int sig = got_signal; + if (sig) { + signal(sig, SIG_DFL); + raise(sig); + abort(); /* A bug in 'raise'. */ + } +} + +enum { TIME_T_BITS_IN_FILE = 64 }; + +/* The minimum and maximum values representable in a TZif file. */ +static zic_t const min_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); +static zic_t const max_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE); + +/* The minimum, and one less than the maximum, values specified by + the -r option. These default to MIN_TIME and MAX_TIME. */ +static zic_t lo_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); +static zic_t hi_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE); + +/* The time specified by the -R option, defaulting to MIN_TIME. */ +static zic_t redundant_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE); + +/* The time specified by an Expires line, or negative if no such line. */ +static zic_t leapexpires = -1; + +/* Set the time range of the output to TIMERANGE. + Return true if successful. */ +static bool +timerange_option(char *timerange) +{ + intmax_t lo = min_time, hi = max_time; + char *lo_end = timerange, *hi_end; + if (*timerange == '@') { + errno = 0; + lo = strtoimax(timerange + 1, &lo_end, 10); + if (lo_end == timerange + 1 || (lo == INTMAX_MAX && errno == ERANGE)) + return false; + } + hi_end = lo_end; + if (lo_end[0] == '/' && lo_end[1] == '@') { + errno = 0; + hi = strtoimax(lo_end + 2, &hi_end, 10); + if (hi_end == lo_end + 2 || hi == INTMAX_MIN) + return false; + hi -= ! (hi == INTMAX_MAX && errno == ERANGE); + } + if (*hi_end || hi < lo || max_time < lo || hi < min_time) + return false; + lo_time = max(lo, min_time); + hi_time = min(hi, max_time); + return true; +} + +/* Generate redundant time stamps up to OPT. Return true if successful. */ +static bool +redundant_time_option(char *opt) +{ + if (*opt == '@') { + intmax_t redundant; + char *opt_end; + redundant = strtoimax(opt + 1, &opt_end, 10); + if (opt_end != opt + 1 && !*opt_end) { + redundant_time = max(redundant_time, redundant); + return true; + } + } + return false; +} + +static const char * psxrules; +static const char * lcltime; +static const char * directory; +static const char * tzdefault; + +/* -1 if the TZif output file should be slim, 0 if default, 1 if the + output should be fat for backward compatibility. ZIC_BLOAT_DEFAULT + determines the default. */ +static int bloat; + +static bool +want_bloat(void) +{ + return 0 <= bloat; +} + +#ifndef ZIC_BLOAT_DEFAULT +# define ZIC_BLOAT_DEFAULT "slim" +#endif + +int +main(int argc, char **argv) +{ + register int c, k; + register ptrdiff_t i, j; + bool timerange_given = false; + +#ifdef S_IWGRP + umask(umask(S_IWGRP | S_IWOTH) | (S_IWGRP | S_IWOTH)); +#endif +#if HAVE_GETTEXT + setlocale(LC_ALL, ""); +# ifdef TZ_DOMAINDIR + bindtextdomain(TZ_DOMAIN, TZ_DOMAINDIR); +# endif /* defined TEXTDOMAINDIR */ + textdomain(TZ_DOMAIN); +#endif /* HAVE_GETTEXT */ + main_argv = argv; + progname = argv[0] ? argv[0] : "zic"; + if (TYPE_BIT(zic_t) < 64) { + fprintf(stderr, "%s: %s\n", progname, + _("wild compilation-time specification of zic_t")); + return EXIT_FAILURE; + } + for (k = 1; k < argc; k++) + if (strcmp(argv[k], "--version") == 0) { + printf("zic %s%s\n", PKGVERSION, TZVERSION); + close_file(stdout, NULL, NULL, NULL); + return EXIT_SUCCESS; + } else if (strcmp(argv[k], "--help") == 0) { + usage(stdout, EXIT_SUCCESS); + } + while ((c = getopt(argc, argv, "b:d:l:L:p:r:R:st:vy:")) != EOF + && c != -1) + switch (c) { + default: + usage(stderr, EXIT_FAILURE); + case 'b': + if (strcmp(optarg, "slim") == 0) { + if (0 < bloat) + error(_("incompatible -b options")); + bloat = -1; + } else if (strcmp(optarg, "fat") == 0) { + if (bloat < 0) + error(_("incompatible -b options")); + bloat = 1; + } else + error(_("invalid option: -b '%s'"), optarg); + break; + case 'd': + if (directory == NULL) + directory = optarg; + else { + fprintf(stderr, +_("%s: More than one -d option specified\n"), + progname); + return EXIT_FAILURE; + } + break; + case 'l': + if (lcltime == NULL) + lcltime = optarg; + else { + fprintf(stderr, +_("%s: More than one -l option specified\n"), + progname); + return EXIT_FAILURE; + } + break; + case 'p': + if (psxrules == NULL) + psxrules = optarg; + else { + fprintf(stderr, +_("%s: More than one -p option specified\n"), + progname); + return EXIT_FAILURE; + } + break; + case 't': + if (tzdefault != NULL) { + fprintf(stderr, + _("%s: More than one -t option" + " specified\n"), + progname); + return EXIT_FAILURE; + } + tzdefault = optarg; + break; + case 'y': + warning(_("-y ignored")); + break; + case 'L': + if (leapsec == NULL) + leapsec = optarg; + else { + fprintf(stderr, +_("%s: More than one -L option specified\n"), + progname); + return EXIT_FAILURE; + } + break; + case 'v': + noise = true; + break; + case 'r': + if (timerange_given) { + fprintf(stderr, +_("%s: More than one -r option specified\n"), + progname); + return EXIT_FAILURE; + } + if (! timerange_option(optarg)) { + fprintf(stderr, +_("%s: invalid time range: %s\n"), + progname, optarg); + return EXIT_FAILURE; + } + timerange_given = true; + break; + case 'R': + if (! redundant_time_option(optarg)) { + fprintf(stderr, _("%s: invalid time: %s\n"), + progname, optarg); + return EXIT_FAILURE; + } + break; + case 's': + warning(_("-s ignored")); + break; + } + if (optind == argc - 1 && strcmp(argv[optind], "=") == 0) + usage(stderr, EXIT_FAILURE); /* usage message by request */ + if (hi_time + (hi_time < ZIC_MAX) < redundant_time) { + fprintf(stderr, _("%s: -R time exceeds -r cutoff\n"), progname); + return EXIT_FAILURE; + } + if (bloat == 0) { + static char const bloat_default[] = ZIC_BLOAT_DEFAULT; + if (strcmp(bloat_default, "slim") == 0) + bloat = -1; + else if (strcmp(bloat_default, "fat") == 0) + bloat = 1; + else + abort(); /* Configuration error. */ + } + if (directory == NULL) + directory = TZDIR; + if (tzdefault == NULL) + tzdefault = TZDEFAULT; + + if (optind < argc && leapsec != NULL) { + infile(LEAPSEC_FILENUM, leapsec); + adjleap(); + } + + for (k = optind; k < argc; k++) + infile(k, argv[k]); + if (errors) + return EXIT_FAILURE; + associate(); + change_directory(directory); + catch_signals(); + for (i = 0; i < nzones; i = j) { + /* + ** Find the next non-continuation zone entry. + */ + for (j = i + 1; j < nzones && zones[j].z_name == NULL; ++j) + continue; + outzone(&zones[i], j - i); + } + make_links(); + if (lcltime != NULL) { + eat(COMMAND_LINE_FILENUM, 1); + dolink(lcltime, tzdefault, true); + } + if (psxrules != NULL) { + eat(COMMAND_LINE_FILENUM, 1); + dolink(psxrules, TZDEFRULES, true); + } + if (warnings && (ferror(stderr) || fclose(stderr) != 0)) + return EXIT_FAILURE; + return errors ? EXIT_FAILURE : EXIT_SUCCESS; +} + +static bool +componentcheck(char const *name, char const *component, + char const *component_end) +{ + enum { component_len_max = 14 }; + ptrdiff_t component_len = component_end - component; + if (component_len == 0) { + if (!*name) + error(_("empty file name")); + else + error(_(component == name + ? "file name '%s' begins with '/'" + : *component_end + ? "file name '%s' contains '//'" + : "file name '%s' ends with '/'"), + name); + return false; + } + if (0 < component_len && component_len <= 2 + && component[0] == '.' && component_end[-1] == '.') { + int len = component_len; + error(_("file name '%s' contains '%.*s' component"), + name, len, component); + return false; + } + if (noise) { + if (0 < component_len && component[0] == '-') + warning(_("file name '%s' component contains leading '-'"), + name); + if (component_len_max < component_len) + warning(_("file name '%s' contains overlength component" + " '%.*s...'"), + name, component_len_max, component); + } + return true; +} + +static bool +namecheck(const char *name) +{ + register char const *cp; + + /* Benign characters in a portable file name. */ + static char const benign[] = + "-/_" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + /* Non-control chars in the POSIX portable character set, + excluding the benign characters. */ + static char const printable_and_not_benign[] = + " !\"#$%&'()*+,.0123456789:;<=>?@[\\]^`{|}~"; + + register char const *component = name; + for (cp = name; *cp; cp++) { + unsigned char c = *cp; + if (noise && !strchr(benign, c)) { + warning((strchr(printable_and_not_benign, c) + ? _("file name '%s' contains byte '%c'") + : _("file name '%s' contains byte '\\%o'")), + name, c); + } + if (c == '/') { + if (!componentcheck(name, component, cp)) + return false; + component = cp + 1; + } + } + return componentcheck(name, component, cp); +} + +/* Return a random uint_fast64_t. */ +static uint_fast64_t +get_rand_u64(void) +{ +#if HAVE_GETRANDOM + static uint_fast64_t entropy_buffer[max(1, 256 / sizeof(uint_fast64_t))]; + static int nwords; + if (!nwords) { + ssize_t s; + do + s = getrandom(entropy_buffer, sizeof entropy_buffer, 0); + while (s < 0 && errno == EINTR); + + nwords = s < 0 ? -1 : s / sizeof *entropy_buffer; + } + if (0 < nwords) + return entropy_buffer[--nwords]; +#endif + + /* getrandom didn't work, so fall back on portable code that is + not the best because the seed isn't cryptographically random and + 'rand' might not be cryptographically secure. */ + { + static bool initialized; + if (!initialized) { + srand(time(NULL)); + initialized = true; + } + } + + /* Return a random number if rand() yields a random number and in + the typical case where RAND_MAX is one less than a power of two. + In other cases this code yields a sort-of-random number. */ + { + uint_fast64_t rand_max = RAND_MAX, + nrand = rand_max < UINT_FAST64_MAX ? rand_max + 1 : 0, + rmod = INT_MAX < UINT_FAST64_MAX ? 0 : UINT_FAST64_MAX / nrand + 1, + r = 0, rmax = 0; + + do { + uint_fast64_t rmax1 = rmax; + if (rmod) { + /* Avoid signed integer overflow on theoretical platforms + where uint_fast64_t promotes to int. */ + rmax1 %= rmod; + r %= rmod; + } + rmax1 = nrand * rmax1 + rand_max; + r = nrand * r + rand(); + rmax = rmax < rmax1 ? rmax1 : UINT_FAST64_MAX; + } while (rmax < UINT_FAST64_MAX); + + return r; + } +} + +/* Generate a randomish name in the same directory as *NAME. If + *NAMEALLOC, put the name into *NAMEALLOC which is assumed to be + that returned by a previous call and is thus already almost set up + and equal to *NAME; otherwise, allocate a new name and put its + address into both *NAMEALLOC and *NAME. */ +static void +random_dirent(char const **name, char **namealloc) +{ + char const *src = *name; + char *dst = *namealloc; + static char const prefix[] = ".zic"; + static char const alphabet[] = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789"; + enum { prefixlen = sizeof prefix - 1, alphabetlen = sizeof alphabet - 1 }; + int suffixlen = 6; + char const *lastslash = strrchr(src, '/'); + ptrdiff_t dirlen = lastslash ? lastslash + 1 - src : 0; + int i; + uint_fast64_t r; + uint_fast64_t base = alphabetlen; + + /* BASE**6 */ + uint_fast64_t base__6 = base * base * base * base * base * base; + + /* The largest uintmax_t that is a multiple of BASE**6. Any random + uintmax_t value that is this value or greater, yields a biased + remainder when divided by BASE**6. UNFAIR_MIN equals the + mathematical value of ((UINTMAX_MAX + 1) - (UINTMAX_MAX + 1) % BASE**6) + computed without overflow. */ + uint_fast64_t unfair_min = - ((UINTMAX_MAX % base__6 + 1) % base__6); + + if (!dst) { + dst = emalloc(size_sum(dirlen, prefixlen + suffixlen + 1)); + memcpy(dst, src, dirlen); + memcpy(dst + dirlen, prefix, prefixlen); + dst[dirlen + prefixlen + suffixlen] = '\0'; + *name = *namealloc = dst; + } + + do + r = get_rand_u64(); + while (unfair_min <= r); + + for (i = 0; i < suffixlen; i++) { + dst[dirlen + prefixlen + i] = alphabet[r % alphabetlen]; + r /= alphabetlen; + } +} + +/* Prepare to write to the file *OUTNAME, using *TEMPNAME to store the + name of the temporary file that will eventually be renamed to + *OUTNAME. Assign the temporary file's name to both *OUTNAME and + *TEMPNAME. If *TEMPNAME is null, allocate the name of any such + temporary file; otherwise, reuse *TEMPNAME's storage, which is + already set up and only needs its trailing suffix updated. */ +static FILE * +open_outfile(char const **outname, char **tempname) +{ +#if __STDC_VERSION__ < 201112 + static char const fopen_mode[] = "wb"; +#else + static char const fopen_mode[] = "wbx"; +#endif + + FILE *fp; + bool dirs_made = false; + if (!*tempname) + random_dirent(outname, tempname); + + while (! (fp = fopen(*outname, fopen_mode))) { + int fopen_errno = errno; + if (fopen_errno == ENOENT && !dirs_made) { + mkdirs(*outname, true); + dirs_made = true; + } else if (fopen_errno == EEXIST) + random_dirent(outname, tempname); + else { + fprintf(stderr, _("%s: Can't create %s/%s: %s\n"), + progname, directory, *outname, strerror(fopen_errno)); + exit(EXIT_FAILURE); + } + } + + return fp; +} + +/* If TEMPNAME, the result is in the temporary file TEMPNAME even + though the user wanted it in NAME, so rename TEMPNAME to NAME. + Report an error and exit if there is trouble. Also, free TEMPNAME. */ +static void +rename_dest(char *tempname, char const *name) +{ + if (tempname) { + if (rename(tempname, name) != 0) { + int rename_errno = errno; + remove(tempname); + fprintf(stderr, _("%s: rename to %s/%s: %s\n"), + progname, directory, name, strerror(rename_errno)); + exit(EXIT_FAILURE); + } + free(tempname); + } +} + +/* Create symlink contents suitable for symlinking FROM to TO, as a + freshly allocated string. FROM should be a relative file name, and + is relative to the global variable DIRECTORY. TO can be either + relative or absolute. */ +static char * +relname(char const *target, char const *linkname) +{ + size_t i, taillen, dir_len = 0, dotdots = 0; + ptrdiff_t dotdotetcsize, linksize = INDEX_MAX; + char const *f = target; + char *result = NULL; + if (*linkname == '/') { + /* Make F absolute too. */ + size_t len = strlen(directory); + size_t lenslash = len + (len && directory[len - 1] != '/'); + size_t targetsize = strlen(target) + 1; + linksize = size_sum(lenslash, targetsize); + f = result = emalloc(linksize); + memcpy(result, directory, len); + result[len] = '/'; + memcpy(result + lenslash, target, targetsize); + } + for (i = 0; f[i] && f[i] == linkname[i]; i++) + if (f[i] == '/') + dir_len = i + 1; + for (; linkname[i]; i++) + dotdots += linkname[i] == '/' && linkname[i - 1] != '/'; + taillen = strlen(f + dir_len); + dotdotetcsize = size_sum(size_product(dotdots, 3), taillen + 1); + if (dotdotetcsize <= linksize) { + if (!result) + result = emalloc(dotdotetcsize); + for (i = 0; i < dotdots; i++) + memcpy(result + 3 * i, "../", 3); + memmove(result + 3 * dotdots, f + dir_len, taillen + 1); + } + return result; +} + +static void +dolink(char const *target, char const *linkname, bool staysymlink) +{ + bool linkdirs_made = false; + int link_errno; + char *tempname = NULL; + char const *outname = linkname; + + check_for_signal(); + + if (strcmp(target, "-") == 0) { + if (remove(linkname) == 0 || errno == ENOENT || errno == ENOTDIR) + return; + else { + char const *e = strerror(errno); + fprintf(stderr, _("%s: Can't remove %s/%s: %s\n"), + progname, directory, linkname, e); + exit(EXIT_FAILURE); + } + } + + while (true) { + if (linkat(AT_FDCWD, target, AT_FDCWD, outname, AT_SYMLINK_FOLLOW) + == 0) { + link_errno = 0; + break; + } + link_errno = errno; + if (link_errno == EXDEV || link_errno == ENOTSUP) + break; + + if (link_errno == EEXIST) { + staysymlink &= !tempname; + random_dirent(&outname, &tempname); + if (staysymlink && itssymlink(linkname)) + break; + } else if (link_errno == ENOENT && !linkdirs_made) { + mkdirs(linkname, true); + linkdirs_made = true; + } else { + fprintf(stderr, _("%s: Can't link %s/%s to %s/%s: %s\n"), + progname, directory, target, directory, outname, + strerror(link_errno)); + exit(EXIT_FAILURE); + } + } + if (link_errno != 0) { + bool absolute = *target == '/'; + char *linkalloc = absolute ? NULL : relname(target, linkname); + char const *contents = absolute ? target : linkalloc; + int symlink_errno; + + while (true) { + if (symlink(contents, outname) == 0) { + symlink_errno = 0; + break; + } + symlink_errno = errno; + if (symlink_errno == EEXIST) + random_dirent(&outname, &tempname); + else if (symlink_errno == ENOENT && !linkdirs_made) { + mkdirs(linkname, true); + linkdirs_made = true; + } else + break; + } + free(linkalloc); + if (symlink_errno == 0) { + if (link_errno != ENOTSUP && link_errno != EEXIST) + warning(_("symbolic link used because hard link failed: %s"), + strerror(link_errno)); + } else { + FILE *fp, *tp; + int c; + fp = fopen(target, "rb"); + if (!fp) { + char const *e = strerror(errno); + fprintf(stderr, _("%s: Can't read %s/%s: %s\n"), + progname, directory, target, e); + exit(EXIT_FAILURE); + } + tp = open_outfile(&outname, &tempname); + while ((c = getc(fp)) != EOF) + putc(c, tp); + close_file(tp, directory, linkname, tempname); + close_file(fp, directory, target, NULL); + if (link_errno != ENOTSUP) + warning(_("copy used because hard link failed: %s"), + strerror(link_errno)); + else if (symlink_errno != ENOTSUP) + warning(_("copy used because symbolic link failed: %s"), + strerror(symlink_errno)); + } + } + rename_dest(tempname, linkname); +} + +/* Return true if NAME is a symbolic link. */ +static bool +itssymlink(char const *name) +{ + char c; + return 0 <= readlink(name, &c, 1); +} + +/* +** Associate sets of rules with zones. +*/ + +/* +** Sort by rule name. +*/ + +static int +rcomp(const void *cp1, const void *cp2) +{ + struct rule const *r1 = cp1, *r2 = cp2; + return strcmp(r1->r_name, r2->r_name); +} + +static void +associate(void) +{ + register struct zone * zp; + register struct rule * rp; + register ptrdiff_t i, j, base, out; + + if (1 < nrules) { + qsort(rules, nrules, sizeof *rules, rcomp); + for (i = 0; i < nrules - 1; ++i) { + if (strcmp(rules[i].r_name, + rules[i + 1].r_name) != 0) + continue; + if (rules[i].r_filenum == rules[i + 1].r_filenum) + continue; + eat(rules[i].r_filenum, rules[i].r_linenum); + warning(_("same rule name in multiple files")); + eat(rules[i + 1].r_filenum, rules[i + 1].r_linenum); + warning(_("same rule name in multiple files")); + for (j = i + 2; j < nrules; ++j) { + if (strcmp(rules[i].r_name, + rules[j].r_name) != 0) + break; + if (rules[i].r_filenum == rules[j].r_filenum) + continue; + if (rules[i + 1].r_filenum + == rules[j].r_filenum) + continue; + break; + } + i = j - 1; + } + } + for (i = 0; i < nzones; ++i) { + zp = &zones[i]; + zp->z_rules = NULL; + zp->z_nrules = 0; + } + for (base = 0; base < nrules; base = out) { + rp = &rules[base]; + for (out = base + 1; out < nrules; ++out) + if (strcmp(rp->r_name, rules[out].r_name) != 0) + break; + for (i = 0; i < nzones; ++i) { + zp = &zones[i]; + if (strcmp(zp->z_rule, rp->r_name) != 0) + continue; + zp->z_rules = rp; + zp->z_nrules = out - base; + } + } + for (i = 0; i < nzones; ++i) { + zp = &zones[i]; + if (zp->z_nrules == 0) { + /* + ** Maybe we have a local standard time offset. + */ + eat(zp->z_filenum, zp->z_linenum); + zp->z_save = getsave(zp->z_rule, &zp->z_isdst); + /* + ** Note, though, that if there's no rule, + ** a '%s' in the format is a bad thing. + */ + if (zp->z_format_specifier == 's') + error("%s", _("%s in ruleless zone")); + } + } + if (errors) + exit(EXIT_FAILURE); +} + +/* Read a text line from FP into BUF, which is of size BUFSIZE. + Terminate it with a NUL byte instead of a newline. + Return true if successful, false if EOF. + On error, report the error and exit. */ +static bool +inputline(FILE *fp, char *buf, ptrdiff_t bufsize) +{ + ptrdiff_t linelen = 0, ch; + while ((ch = getc(fp)) != '\n') { + if (ch < 0) { + if (ferror(fp)) { + error(_("input error")); + exit(EXIT_FAILURE); + } + if (linelen == 0) + return false; + error(_("unterminated line")); + exit(EXIT_FAILURE); + } + if (!ch) { + error(_("NUL input byte")); + exit(EXIT_FAILURE); + } + buf[linelen++] = ch; + if (linelen == bufsize) { + error(_("line too long")); + exit(EXIT_FAILURE); + } + } + buf[linelen] = '\0'; + return true; +} + +static void +infile(int fnum, char const *name) +{ + register FILE * fp; + register const struct lookup * lp; + register bool wantcont; + register lineno num; + + if (strcmp(name, "-") == 0) { + fp = stdin; + } else if ((fp = fopen(name, "r")) == NULL) { + const char *e = strerror(errno); + + fprintf(stderr, _("%s: Can't open %s: %s\n"), + progname, name, e); + exit(EXIT_FAILURE); + } + wantcont = false; + for (num = 1; ; ++num) { + enum { bufsize_bound + = (min(INT_MAX, INDEX_MAX) / FORMAT_LEN_GROWTH_BOUND) }; + char buf[min(_POSIX2_LINE_MAX, bufsize_bound)]; + int nfields; + char *fields[MAX_FIELDS]; + eat(fnum, num); + if (!inputline(fp, buf, sizeof buf)) + break; + nfields = getfields(buf, fields, + sizeof fields / sizeof *fields); + if (nfields == 0) { + /* nothing to do */ + } else if (wantcont) { + wantcont = inzcont(fields, nfields); + } else { + struct lookup const *line_codes + = fnum < 0 ? leap_line_codes : zi_line_codes; + lp = byword(fields[0], line_codes); + if (lp == NULL) + error(_("input line of unknown type")); + else switch (lp->l_value) { + case LC_RULE: + inrule(fields, nfields); + wantcont = false; + break; + case LC_ZONE: + wantcont = inzone(fields, nfields); + break; + case LC_LINK: + inlink(fields, nfields); + wantcont = false; + break; + case LC_LEAP: + inleap(fields, nfields); + wantcont = false; + break; + case LC_EXPIRES: + inexpires(fields, nfields); + wantcont = false; + break; + default: unreachable(); + } + } + } + close_file(fp, NULL, filename(fnum), NULL); + if (wantcont) + error(_("expected continuation line not found")); +} + +/* +** Convert a string of one of the forms +** h -h hh:mm -hh:mm hh:mm:ss -hh:mm:ss +** into a number of seconds. +** A null string maps to zero. +** Call error with errstring and return zero on errors. +*/ + +static zic_t +gethms(char const *string, char const *errstring) +{ + zic_t hh; + int sign, mm = 0, ss = 0; + char hhx, mmx, ssx, xr = '0', xs; + int tenths = 0; + bool ok = true; + + if (string == NULL || *string == '\0') + return 0; + if (*string == '-') { + sign = -1; + ++string; + } else sign = 1; + switch (sscanf(string, + "%"SCNdZIC"%c%d%c%d%c%1d%*[0]%c%*[0123456789]%c", + &hh, &hhx, &mm, &mmx, &ss, &ssx, &tenths, &xr, &xs)) { + default: ok = false; break; + case 8: + ok = '0' <= xr && xr <= '9'; + ATTRIBUTE_FALLTHROUGH; + case 7: + ok &= ssx == '.'; + if (ok && noise) + warning(_("fractional seconds rejected by" + " pre-2018 versions of zic")); + ATTRIBUTE_FALLTHROUGH; + case 5: ok &= mmx == ':'; ATTRIBUTE_FALLTHROUGH; + case 3: ok &= hhx == ':'; ATTRIBUTE_FALLTHROUGH; + case 1: break; + } + if (!ok) { + error("%s", errstring); + return 0; + } + if (hh < 0 || + mm < 0 || mm >= MINSPERHOUR || + ss < 0 || ss > SECSPERMIN) { + error("%s", errstring); + return 0; + } + if (ZIC_MAX / SECSPERHOUR < hh) { + error(_("time overflow")); + return 0; + } + ss += 5 + ((ss ^ 1) & (xr == '0')) <= tenths; /* Round to even. */ + if (noise && (hh > HOURSPERDAY || + (hh == HOURSPERDAY && (mm != 0 || ss != 0)))) +warning(_("values over 24 hours not handled by pre-2007 versions of zic")); + return oadd(sign * hh * SECSPERHOUR, + sign * (mm * SECSPERMIN + ss)); +} + +static zic_t +getsave(char *field, bool *isdst) +{ + int dst = -1; + zic_t save; + ptrdiff_t fieldlen = strlen(field); + if (fieldlen != 0) { + char *ep = field + fieldlen - 1; + switch (*ep) { + case 'd': dst = 1; *ep = '\0'; break; + case 's': dst = 0; *ep = '\0'; break; + } + } + save = gethms(field, _("invalid saved time")); + *isdst = dst < 0 ? save != 0 : dst; + return save; +} + +static void +inrule(char **fields, int nfields) +{ + struct rule r; + + if (nfields != RULE_FIELDS) { + error(_("wrong number of fields on Rule line")); + return; + } + switch (*fields[RF_NAME]) { + case '\0': + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + case '+': case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + error(_("Invalid rule name \"%s\""), fields[RF_NAME]); + return; + } + r.r_filenum = filenum; + r.r_linenum = linenum; + r.r_save = getsave(fields[RF_SAVE], &r.r_isdst); + if (!rulesub(&r, fields[RF_LOYEAR], fields[RF_HIYEAR], + fields[RF_COMMAND], fields[RF_MONTH], fields[RF_DAY], + fields[RF_TOD])) + return; + r.r_name = estrdup(fields[RF_NAME]); + r.r_abbrvar = estrdup(fields[RF_ABBRVAR]); + if (max_abbrvar_len < strlen(r.r_abbrvar)) + max_abbrvar_len = strlen(r.r_abbrvar); + rules = growalloc(rules, sizeof *rules, nrules, &nrules_alloc); + rules[nrules++] = r; +} + +static bool +inzone(char **fields, int nfields) +{ + register ptrdiff_t i; + + if (nfields < ZONE_MINFIELDS || nfields > ZONE_MAXFIELDS) { + error(_("wrong number of fields on Zone line")); + return false; + } + if (lcltime != NULL && strcmp(fields[ZF_NAME], tzdefault) == 0) { + error( +_("\"Zone %s\" line and -l option are mutually exclusive"), + tzdefault); + return false; + } + if (strcmp(fields[ZF_NAME], TZDEFRULES) == 0 && psxrules != NULL) { + error( +_("\"Zone %s\" line and -p option are mutually exclusive"), + TZDEFRULES); + return false; + } + for (i = 0; i < nzones; ++i) + if (zones[i].z_name != NULL && + strcmp(zones[i].z_name, fields[ZF_NAME]) == 0) { + error(_("duplicate zone name %s" + " (file \"%s\", line %"PRIdMAX")"), + fields[ZF_NAME], + filename(zones[i].z_filenum), + zones[i].z_linenum); + return false; + } + return inzsub(fields, nfields, false); +} + +static bool +inzcont(char **fields, int nfields) +{ + if (nfields < ZONEC_MINFIELDS || nfields > ZONEC_MAXFIELDS) { + error(_("wrong number of fields on Zone continuation line")); + return false; + } + return inzsub(fields, nfields, true); +} + +static bool +inzsub(char **fields, int nfields, bool iscont) +{ + register char * cp; + char * cp1; + struct zone z; + int format_len; + register int i_stdoff, i_rule, i_format; + register int i_untilyear, i_untilmonth; + register int i_untilday, i_untiltime; + register bool hasuntil; + + if (iscont) { + i_stdoff = ZFC_STDOFF; + i_rule = ZFC_RULE; + i_format = ZFC_FORMAT; + i_untilyear = ZFC_TILYEAR; + i_untilmonth = ZFC_TILMONTH; + i_untilday = ZFC_TILDAY; + i_untiltime = ZFC_TILTIME; + } else if (!namecheck(fields[ZF_NAME])) + return false; + else { + i_stdoff = ZF_STDOFF; + i_rule = ZF_RULE; + i_format = ZF_FORMAT; + i_untilyear = ZF_TILYEAR; + i_untilmonth = ZF_TILMONTH; + i_untilday = ZF_TILDAY; + i_untiltime = ZF_TILTIME; + } + z.z_filenum = filenum; + z.z_linenum = linenum; + z.z_stdoff = gethms(fields[i_stdoff], _("invalid UT offset")); + if ((cp = strchr(fields[i_format], '%')) != 0) { + if ((*++cp != 's' && *cp != 'z') || strchr(cp, '%') + || strchr(fields[i_format], '/')) { + error(_("invalid abbreviation format")); + return false; + } + } + z.z_format_specifier = cp ? *cp : '\0'; + format_len = strlen(fields[i_format]); + if (max_format_len < format_len) + max_format_len = format_len; + hasuntil = nfields > i_untilyear; + if (hasuntil) { + z.z_untilrule.r_filenum = filenum; + z.z_untilrule.r_linenum = linenum; + if (!rulesub( + &z.z_untilrule, + fields[i_untilyear], + "only", + "", + (nfields > i_untilmonth) ? + fields[i_untilmonth] : "Jan", + (nfields > i_untilday) ? fields[i_untilday] : "1", + (nfields > i_untiltime) ? fields[i_untiltime] : "0")) + return false; + z.z_untiltime = rpytime(&z.z_untilrule, + z.z_untilrule.r_loyear); + if (iscont && nzones > 0 && + z.z_untiltime > min_time && + z.z_untiltime < max_time && + zones[nzones - 1].z_untiltime > min_time && + zones[nzones - 1].z_untiltime < max_time && + zones[nzones - 1].z_untiltime >= z.z_untiltime) { + error(_( +"Zone continuation line end time is not after end time of previous line" + )); + return false; + } + } + z.z_name = iscont ? NULL : estrdup(fields[ZF_NAME]); + z.z_rule = estrdup(fields[i_rule]); + z.z_format = cp1 = estrdup(fields[i_format]); + if (z.z_format_specifier == 'z') { + cp1[cp - fields[i_format]] = 's'; + if (noise) + warning(_("format '%s' not handled by pre-2015 versions of zic"), + fields[i_format]); + } + zones = growalloc(zones, sizeof *zones, nzones, &nzones_alloc); + zones[nzones++] = z; + /* + ** If there was an UNTIL field on this line, + ** there's more information about the zone on the next line. + */ + return hasuntil; +} + +static zic_t +getleapdatetime(char **fields, bool expire_line) +{ + register const char * cp; + register const struct lookup * lp; + register zic_t i, j; + zic_t year; + int month, day; + zic_t dayoff, tod; + zic_t t; + char xs; + + dayoff = 0; + cp = fields[LP_YEAR]; + if (sscanf(cp, "%"SCNdZIC"%c", &year, &xs) != 1) { + /* + ** Leapin' Lizards! + */ + error(_("invalid leaping year")); + return -1; + } + if (!expire_line) { + if (!leapseen || leapmaxyear < year) + leapmaxyear = year; + if (!leapseen || leapminyear > year) + leapminyear = year; + leapseen = true; + } + j = EPOCH_YEAR; + while (j != year) { + if (year > j) { + i = len_years[isleap(j)]; + ++j; + } else { + --j; + i = -len_years[isleap(j)]; + } + dayoff = oadd(dayoff, i); + } + if ((lp = byword(fields[LP_MONTH], mon_names)) == NULL) { + error(_("invalid month name")); + return -1; + } + month = lp->l_value; + j = TM_JANUARY; + while (j != month) { + i = len_months[isleap(year)][j]; + dayoff = oadd(dayoff, i); + ++j; + } + cp = fields[LP_DAY]; + if (sscanf(cp, "%d%c", &day, &xs) != 1 || + day <= 0 || day > len_months[isleap(year)][month]) { + error(_("invalid day of month")); + return -1; + } + dayoff = oadd(dayoff, day - 1); + if (dayoff < min_time / SECSPERDAY) { + error(_("time too small")); + return -1; + } + if (dayoff > max_time / SECSPERDAY) { + error(_("time too large")); + return -1; + } + t = dayoff * SECSPERDAY; + tod = gethms(fields[LP_TIME], _("invalid time of day")); + t = tadd(t, tod); + if (t < 0) + error(_("leap second precedes Epoch")); + return t; +} + +static void +inleap(char **fields, int nfields) +{ + if (nfields != LEAP_FIELDS) + error(_("wrong number of fields on Leap line")); + else { + zic_t t = getleapdatetime(fields, false); + if (0 <= t) { + struct lookup const *lp = byword(fields[LP_ROLL], leap_types); + if (!lp) + error(_("invalid Rolling/Stationary field on Leap line")); + else { + int correction = 0; + if (!fields[LP_CORR][0]) /* infile() turns "-" into "". */ + correction = -1; + else if (strcmp(fields[LP_CORR], "+") == 0) + correction = 1; + else + error(_("invalid CORRECTION field on Leap line")); + if (correction) + leapadd(t, correction, lp->l_value); + } + } + } +} + +static void +inexpires(char **fields, int nfields) +{ + if (nfields != EXPIRES_FIELDS) + error(_("wrong number of fields on Expires line")); + else if (0 <= leapexpires) + error(_("multiple Expires lines")); + else + leapexpires = getleapdatetime(fields, true); +} + +static void +inlink(char **fields, int nfields) +{ + struct link l; + + if (nfields != LINK_FIELDS) { + error(_("wrong number of fields on Link line")); + return; + } + if (*fields[LF_TARGET] == '\0') { + error(_("blank TARGET field on Link line")); + return; + } + if (! namecheck(fields[LF_LINKNAME])) + return; + l.l_filenum = filenum; + l.l_linenum = linenum; + l.l_target = estrdup(fields[LF_TARGET]); + l.l_linkname = estrdup(fields[LF_LINKNAME]); + links = growalloc(links, sizeof *links, nlinks, &nlinks_alloc); + links[nlinks++] = l; +} + +static bool +rulesub(struct rule *rp, const char *loyearp, const char *hiyearp, + const char *typep, const char *monthp, const char *dayp, + const char *timep) +{ + register const struct lookup * lp; + register const char * cp; + register char * dp; + register char * ep; + char xs; + + if ((lp = byword(monthp, mon_names)) == NULL) { + error(_("invalid month name")); + return false; + } + rp->r_month = lp->l_value; + rp->r_todisstd = false; + rp->r_todisut = false; + dp = estrdup(timep); + if (*dp != '\0') { + ep = dp + strlen(dp) - 1; + switch (lowerit(*ep)) { + case 's': /* Standard */ + rp->r_todisstd = true; + rp->r_todisut = false; + *ep = '\0'; + break; + case 'w': /* Wall */ + rp->r_todisstd = false; + rp->r_todisut = false; + *ep = '\0'; + break; + case 'g': /* Greenwich */ + case 'u': /* Universal */ + case 'z': /* Zulu */ + rp->r_todisstd = true; + rp->r_todisut = true; + *ep = '\0'; + break; + } + } + rp->r_tod = gethms(dp, _("invalid time of day")); + free(dp); + /* + ** Year work. + */ + cp = loyearp; + lp = byword(cp, begin_years); + rp->r_lowasnum = lp == NULL; + if (!rp->r_lowasnum) switch (lp->l_value) { + case YR_MINIMUM: + rp->r_loyear = ZIC_MIN; + break; + case YR_MAXIMUM: + rp->r_loyear = ZIC_MAX; + break; + default: unreachable(); + } else if (sscanf(cp, "%"SCNdZIC"%c", &rp->r_loyear, &xs) != 1) { + error(_("invalid starting year")); + return false; + } + cp = hiyearp; + lp = byword(cp, end_years); + rp->r_hiwasnum = lp == NULL; + if (!rp->r_hiwasnum) switch (lp->l_value) { + case YR_MINIMUM: + rp->r_hiyear = ZIC_MIN; + break; + case YR_MAXIMUM: + rp->r_hiyear = ZIC_MAX; + break; + case YR_ONLY: + rp->r_hiyear = rp->r_loyear; + break; + default: unreachable(); + } else if (sscanf(cp, "%"SCNdZIC"%c", &rp->r_hiyear, &xs) != 1) { + error(_("invalid ending year")); + return false; + } + if (rp->r_loyear > rp->r_hiyear) { + error(_("starting year greater than ending year")); + return false; + } + if (*typep != '\0') { + error(_("year type \"%s\" is unsupported; use \"-\" instead"), + typep); + return false; + } + /* + ** Day work. + ** Accept things such as: + ** 1 + ** lastSunday + ** last-Sunday (undocumented; warn about this) + ** Sun<=20 + ** Sun>=7 + */ + dp = estrdup(dayp); + if ((lp = byword(dp, lasts)) != NULL) { + rp->r_dycode = DC_DOWLEQ; + rp->r_wday = lp->l_value; + rp->r_dayofmonth = len_months[1][rp->r_month]; + } else { + if ((ep = strchr(dp, '<')) != 0) + rp->r_dycode = DC_DOWLEQ; + else if ((ep = strchr(dp, '>')) != 0) + rp->r_dycode = DC_DOWGEQ; + else { + ep = dp; + rp->r_dycode = DC_DOM; + } + if (rp->r_dycode != DC_DOM) { + *ep++ = 0; + if (*ep++ != '=') { + error(_("invalid day of month")); + free(dp); + return false; + } + if ((lp = byword(dp, wday_names)) == NULL) { + error(_("invalid weekday name")); + free(dp); + return false; + } + rp->r_wday = lp->l_value; + } + if (sscanf(ep, "%d%c", &rp->r_dayofmonth, &xs) != 1 || + rp->r_dayofmonth <= 0 || + (rp->r_dayofmonth > len_months[1][rp->r_month])) { + error(_("invalid day of month")); + free(dp); + return false; + } + } + free(dp); + return true; +} + +static void +convert(uint_fast32_t val, char *buf) +{ + register int i; + register int shift; + unsigned char *const b = (unsigned char *) buf; + + for (i = 0, shift = 24; i < 4; ++i, shift -= 8) + b[i] = (val >> shift) & 0xff; +} + +static void +convert64(uint_fast64_t val, char *buf) +{ + register int i; + register int shift; + unsigned char *const b = (unsigned char *) buf; + + for (i = 0, shift = 56; i < 8; ++i, shift -= 8) + b[i] = (val >> shift) & 0xff; +} + +static void +puttzcode(zic_t val, FILE *fp) +{ + char buf[4]; + + convert(val, buf); + fwrite(buf, sizeof buf, 1, fp); +} + +static void +puttzcodepass(zic_t val, FILE *fp, int pass) +{ + if (pass == 1) + puttzcode(val, fp); + else { + char buf[8]; + + convert64(val, buf); + fwrite(buf, sizeof buf, 1, fp); + } +} + +static int +atcomp(const void *avp, const void *bvp) +{ + struct attype const *ap = avp, *bp = bvp; + zic_t a = ap->at, b = bp->at; + return a < b ? -1 : a > b; +} + +struct timerange { + int defaulttype; + ptrdiff_t base, count; + int leapbase, leapcount; + bool leapexpiry; +}; + +static struct timerange +limitrange(struct timerange r, zic_t lo, zic_t hi, + zic_t const *ats, unsigned char const *types) +{ + /* Omit ordinary transitions < LO. */ + while (0 < r.count && ats[r.base] < lo) { + r.defaulttype = types[r.base]; + r.count--; + r.base++; + } + + /* Omit as many initial leap seconds as possible, such that the + first leap second in the truncated list is <= LO, and is a + positive leap second if and only if it has a positive correction. + This supports common TZif readers that assume that the first leap + second is positive if and only if its correction is positive. */ + while (1 < r.leapcount && trans[r.leapbase + 1] <= lo) { + r.leapcount--; + r.leapbase++; + } + while (0 < r.leapbase + && ((corr[r.leapbase - 1] < corr[r.leapbase]) + != (0 < corr[r.leapbase]))) { + r.leapcount++; + r.leapbase--; + } + + + /* Omit ordinary and leap second transitions greater than HI + 1. */ + if (hi < max_time) { + while (0 < r.count && hi + 1 < ats[r.base + r.count - 1]) + r.count--; + while (0 < r.leapcount && hi + 1 < trans[r.leapbase + r.leapcount - 1]) + r.leapcount--; + } + + /* Determine whether to append an expiration to the leap second table. */ + r.leapexpiry = 0 <= leapexpires && leapexpires - 1 <= hi; + + return r; +} + +static void +writezone(const char *const name, const char *const string, char version, + int defaulttype) +{ + register FILE * fp; + register ptrdiff_t i, j; + register int pass; + char *tempname = NULL; + char const *outname = name; + + /* Allocate the ATS and TYPES arrays via a single malloc, + as this is a bit faster. Do not malloc(0) if !timecnt, + as that might return NULL even on success. */ + zic_t *ats = emalloc(align_to(size_product(timecnt + !timecnt, + sizeof *ats + 1), + alignof(zic_t))); + void *typesptr = ats + timecnt; + unsigned char *types = typesptr; + struct timerange rangeall = {0}, range32, range64; + + /* + ** Sort. + */ + if (timecnt > 1) + qsort(attypes, timecnt, sizeof *attypes, atcomp); + /* + ** Optimize. + */ + { + ptrdiff_t fromi, toi; + + toi = 0; + fromi = 0; + for ( ; fromi < timecnt; ++fromi) { + if (toi != 0 + && ((attypes[fromi].at + + utoffs[attypes[toi - 1].type]) + <= (attypes[toi - 1].at + + utoffs[toi == 1 ? 0 + : attypes[toi - 2].type]))) { + attypes[toi - 1].type = + attypes[fromi].type; + continue; + } + if (toi == 0 + || attypes[fromi].dontmerge + || (utoffs[attypes[toi - 1].type] + != utoffs[attypes[fromi].type]) + || (isdsts[attypes[toi - 1].type] + != isdsts[attypes[fromi].type]) + || (desigidx[attypes[toi - 1].type] + != desigidx[attypes[fromi].type])) + attypes[toi++] = attypes[fromi]; + } + timecnt = toi; + } + + if (noise && timecnt > 1200) { + if (timecnt > TZ_MAX_TIMES) + warning(_("reference clients mishandle" + " more than %d transition times"), + TZ_MAX_TIMES); + else + warning(_("pre-2014 clients may mishandle" + " more than 1200 transition times")); + } + /* + ** Transfer. + */ + for (i = 0; i < timecnt; ++i) { + ats[i] = attypes[i].at; + types[i] = attypes[i].type; + } + + /* + ** Correct for leap seconds. + */ + for (i = 0; i < timecnt; ++i) { + j = leapcnt; + while (--j >= 0) + if (ats[i] > trans[j] - corr[j]) { + ats[i] = tadd(ats[i], corr[j]); + break; + } + } + + rangeall.defaulttype = defaulttype; + rangeall.count = timecnt; + rangeall.leapcount = leapcnt; + range64 = limitrange(rangeall, lo_time, + max(hi_time, + redundant_time - (ZIC_MIN < redundant_time)), + ats, types); + range32 = limitrange(range64, ZIC32_MIN, ZIC32_MAX, ats, types); + + /* TZif version 4 is needed if a no-op transition is appended to + indicate the expiration of the leap second table, or if the first + leap second transition is not to a +1 or -1 correction. */ + for (pass = 1; pass <= 2; pass++) { + struct timerange const *r = pass == 1 ? &range32 : &range64; + if (pass == 1 && !want_bloat()) + continue; + if (r->leapexpiry) { + if (noise) + warning(_("%s: pre-2021b clients may mishandle" + " leap second expiry"), + name); + version = '4'; + } + if (0 < r->leapcount + && corr[r->leapbase] != 1 && corr[r->leapbase] != -1) { + if (noise) + warning(_("%s: pre-2021b clients may mishandle" + " leap second table truncation"), + name); + version = '4'; + } + if (version == '4') + break; + } + + fp = open_outfile(&outname, &tempname); + + for (pass = 1; pass <= 2; ++pass) { + register ptrdiff_t thistimei, thistimecnt, thistimelim; + register int thisleapi, thisleapcnt, thisleaplim; + struct tzhead tzh; + int pretranstype = -1, thisdefaulttype; + bool locut, hicut, thisleapexpiry; + zic_t lo, thismin, thismax; + int old0; + char omittype[TZ_MAX_TYPES]; + int typemap[TZ_MAX_TYPES]; + int thistypecnt, stdcnt, utcnt; + char thischars[TZ_MAX_CHARS]; + int thischarcnt; + bool toomanytimes; + int indmap[TZ_MAX_CHARS]; + + if (pass == 1) { + thisdefaulttype = range32.defaulttype; + thistimei = range32.base; + thistimecnt = range32.count; + toomanytimes = thistimecnt >> 31 >> 1 != 0; + thisleapi = range32.leapbase; + thisleapcnt = range32.leapcount; + thisleapexpiry = range32.leapexpiry; + thismin = ZIC32_MIN; + thismax = ZIC32_MAX; + } else { + thisdefaulttype = range64.defaulttype; + thistimei = range64.base; + thistimecnt = range64.count; + toomanytimes = thistimecnt >> 31 >> 31 >> 2 != 0; + thisleapi = range64.leapbase; + thisleapcnt = range64.leapcount; + thisleapexpiry = range64.leapexpiry; + thismin = min_time; + thismax = max_time; + } + if (toomanytimes) + error(_("too many transition times")); + + locut = thismin < lo_time && lo_time <= thismax; + hicut = thismin <= hi_time && hi_time < thismax; + thistimelim = thistimei + thistimecnt; + memset(omittype, true, typecnt); + + /* Determine whether to output a transition before the first + transition in range. This is needed when the output is + truncated at the start, and is also useful when catering to + buggy 32-bit clients that do not use time type 0 for + timestamps before the first transition. */ + if ((locut || (pass == 1 && thistimei)) + && ! (thistimecnt && ats[thistimei] == lo_time)) { + pretranstype = thisdefaulttype; + omittype[pretranstype] = false; + } + + /* Arguably the default time type in the 32-bit data + should be range32.defaulttype, which is suited for + timestamps just before ZIC32_MIN. However, zic + traditionally used the time type of the indefinite + past instead. Internet RFC 8532 says readers should + ignore 32-bit data, so this discrepancy matters only + to obsolete readers where the traditional type might + be more appropriate even if it's "wrong". So, use + the historical zic value, unless -r specifies a low + cutoff that excludes some 32-bit timestamps. */ + if (pass == 1 && lo_time <= thismin) + thisdefaulttype = range64.defaulttype; + + if (locut) + thisdefaulttype = unspecifiedtype; + omittype[thisdefaulttype] = false; + for (i = thistimei; i < thistimelim; i++) + omittype[types[i]] = false; + if (hicut) + omittype[unspecifiedtype] = false; + + /* Reorder types to make THISDEFAULTTYPE type 0. + Use TYPEMAP to swap OLD0 and THISDEFAULTTYPE so that + THISDEFAULTTYPE appears as type 0 in the output instead + of OLD0. TYPEMAP also omits unused types. */ + old0 = strlen(omittype); + +#ifndef LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH + /* + ** For some pre-2011 systems: if the last-to-be-written + ** standard (or daylight) type has an offset different from the + ** most recently used offset, + ** append an (unused) copy of the most recently used type + ** (to help get global "altzone" and "timezone" variables + ** set correctly). + */ + if (want_bloat()) { + register int mrudst, mrustd, hidst, histd, type; + + hidst = histd = mrudst = mrustd = -1; + if (0 <= pretranstype) { + if (isdsts[pretranstype]) + mrudst = pretranstype; + else + mrustd = pretranstype; + } + for (i = thistimei; i < thistimelim; i++) + if (isdsts[types[i]]) + mrudst = types[i]; + else mrustd = types[i]; + for (i = old0; i < typecnt; i++) { + int h = (i == old0 ? thisdefaulttype + : i == thisdefaulttype ? old0 : i); + if (!omittype[h]) { + if (isdsts[h]) + hidst = i; + else + histd = i; + } + } + if (hidst >= 0 && mrudst >= 0 && hidst != mrudst && + utoffs[hidst] != utoffs[mrudst]) { + isdsts[mrudst] = -1; + type = addtype(utoffs[mrudst], + &chars[desigidx[mrudst]], + true, + ttisstds[mrudst], + ttisuts[mrudst]); + isdsts[mrudst] = 1; + omittype[type] = false; + } + if (histd >= 0 && mrustd >= 0 && histd != mrustd && + utoffs[histd] != utoffs[mrustd]) { + isdsts[mrustd] = -1; + type = addtype(utoffs[mrustd], + &chars[desigidx[mrustd]], + false, + ttisstds[mrustd], + ttisuts[mrustd]); + isdsts[mrustd] = 0; + omittype[type] = false; + } + } +#endif /* !defined LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH */ + thistypecnt = 0; + for (i = old0; i < typecnt; i++) + if (!omittype[i]) + typemap[i == old0 ? thisdefaulttype + : i == thisdefaulttype ? old0 : i] + = thistypecnt++; + + for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i) + indmap[i] = -1; + thischarcnt = stdcnt = utcnt = 0; + for (i = old0; i < typecnt; i++) { + register char * thisabbr; + + if (omittype[i]) + continue; + if (ttisstds[i]) + stdcnt = thistypecnt; + if (ttisuts[i]) + utcnt = thistypecnt; + if (indmap[desigidx[i]] >= 0) + continue; + thisabbr = &chars[desigidx[i]]; + for (j = 0; j < thischarcnt; ++j) + if (strcmp(&thischars[j], thisabbr) == 0) + break; + if (j == thischarcnt) { + strcpy(&thischars[thischarcnt], thisabbr); + thischarcnt += strlen(thisabbr) + 1; + } + indmap[desigidx[i]] = j; + } + if (pass == 1 && !want_bloat()) { + hicut = thisleapexpiry = false; + pretranstype = -1; + thistimecnt = thisleapcnt = 0; + thistypecnt = thischarcnt = 1; + } +#define DO(field) fwrite(tzh.field, sizeof tzh.field, 1, fp) + memset(&tzh, 0, sizeof tzh); + memcpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); + tzh.tzh_version[0] = version; + convert(utcnt, tzh.tzh_ttisutcnt); + convert(stdcnt, tzh.tzh_ttisstdcnt); + convert(thisleapcnt + thisleapexpiry, tzh.tzh_leapcnt); + convert((0 <= pretranstype) + thistimecnt + hicut, + tzh.tzh_timecnt); + convert(thistypecnt, tzh.tzh_typecnt); + convert(thischarcnt, tzh.tzh_charcnt); + DO(tzh_magic); + DO(tzh_version); + DO(tzh_reserved); + DO(tzh_ttisutcnt); + DO(tzh_ttisstdcnt); + DO(tzh_leapcnt); + DO(tzh_timecnt); + DO(tzh_typecnt); + DO(tzh_charcnt); +#undef DO + if (pass == 1 && !want_bloat()) { + /* Output a minimal data block with just one time type. */ + puttzcode(0, fp); /* utoff */ + putc(0, fp); /* dst */ + putc(0, fp); /* index of abbreviation */ + putc(0, fp); /* empty-string abbreviation */ + continue; + } + + /* Output a LO_TIME transition if needed; see limitrange. + But do not go below the minimum representable value + for this pass. */ + lo = pass == 1 && lo_time < ZIC32_MIN ? ZIC32_MIN : lo_time; + + if (0 <= pretranstype) + puttzcodepass(lo, fp, pass); + for (i = thistimei; i < thistimelim; ++i) { + puttzcodepass(ats[i], fp, pass); + } + if (hicut) + puttzcodepass(hi_time + 1, fp, pass); + if (0 <= pretranstype) + putc(typemap[pretranstype], fp); + for (i = thistimei; i < thistimelim; i++) + putc(typemap[types[i]], fp); + if (hicut) + putc(typemap[unspecifiedtype], fp); + + for (i = old0; i < typecnt; i++) { + int h = (i == old0 ? thisdefaulttype + : i == thisdefaulttype ? old0 : i); + if (!omittype[h]) { + puttzcode(utoffs[h], fp); + putc(isdsts[h], fp); + putc(indmap[desigidx[h]], fp); + } + } + if (thischarcnt != 0) + fwrite(thischars, sizeof thischars[0], + thischarcnt, fp); + thisleaplim = thisleapi + thisleapcnt; + for (i = thisleapi; i < thisleaplim; ++i) { + register zic_t todo; + + if (roll[i]) { + if (timecnt == 0 || trans[i] < ats[0]) { + j = 0; + while (isdsts[j]) + if (++j >= typecnt) { + j = 0; + break; + } + } else { + j = 1; + while (j < timecnt && + trans[i] >= ats[j]) + ++j; + j = types[j - 1]; + } + todo = tadd(trans[i], -utoffs[j]); + } else todo = trans[i]; + puttzcodepass(todo, fp, pass); + puttzcode(corr[i], fp); + } + if (thisleapexpiry) { + /* Append a no-op leap correction indicating when the leap + second table expires. Although this does not conform to + Internet RFC 8536, most clients seem to accept this and + the plan is to amend the RFC to allow this in version 4 + TZif files. */ + puttzcodepass(leapexpires, fp, pass); + puttzcode(thisleaplim ? corr[thisleaplim - 1] : 0, fp); + } + if (stdcnt != 0) + for (i = old0; i < typecnt; i++) + if (!omittype[i]) + putc(ttisstds[i], fp); + if (utcnt != 0) + for (i = old0; i < typecnt; i++) + if (!omittype[i]) + putc(ttisuts[i], fp); + } + fprintf(fp, "\n%s\n", string); + close_file(fp, directory, name, tempname); + rename_dest(tempname, name); + free(ats); +} + +static char const * +abbroffset(char *buf, zic_t offset) +{ + char sign = '+'; + int seconds, minutes; + + if (offset < 0) { + offset = -offset; + sign = '-'; + } + + seconds = offset % SECSPERMIN; + offset /= SECSPERMIN; + minutes = offset % MINSPERHOUR; + offset /= MINSPERHOUR; + if (100 <= offset) { + error(_("%%z UT offset magnitude exceeds 99:59:59")); + return "%z"; + } else { + char *p = buf; + *p++ = sign; + *p++ = '0' + offset / 10; + *p++ = '0' + offset % 10; + if (minutes | seconds) { + *p++ = '0' + minutes / 10; + *p++ = '0' + minutes % 10; + if (seconds) { + *p++ = '0' + seconds / 10; + *p++ = '0' + seconds % 10; + } + } + *p = '\0'; + return buf; + } +} + +static char const disable_percent_s[] = ""; + +static ptrdiff_t +doabbr(char *abbr, struct zone const *zp, char const *letters, + bool isdst, zic_t save, bool doquotes) +{ + register char * cp; + register char * slashp; + ptrdiff_t len; + char const *format = zp->z_format; + + slashp = strchr(format, '/'); + if (slashp == NULL) { + char letterbuf[PERCENT_Z_LEN_BOUND + 1]; + if (zp->z_format_specifier == 'z') + letters = abbroffset(letterbuf, zp->z_stdoff + save); + else if (!letters) + letters = "%s"; + else if (letters == disable_percent_s) + return 0; + sprintf(abbr, format, letters); + } else if (isdst) { + strcpy(abbr, slashp + 1); + } else { + memcpy(abbr, format, slashp - format); + abbr[slashp - format] = '\0'; + } + len = strlen(abbr); + if (!doquotes) + return len; + for (cp = abbr; is_alpha(*cp); cp++) + continue; + if (len > 0 && *cp == '\0') + return len; + abbr[len + 2] = '\0'; + abbr[len + 1] = '>'; + memmove(abbr + 1, abbr, len); + abbr[0] = '<'; + return len + 2; +} + +static void +updateminmax(const zic_t x) +{ + if (min_year > x) + min_year = x; + if (max_year < x) + max_year = x; +} + +static int +stringoffset(char *result, zic_t offset) +{ + register int hours; + register int minutes; + register int seconds; + bool negative = offset < 0; + int len = negative; + + if (negative) { + offset = -offset; + result[0] = '-'; + } + seconds = offset % SECSPERMIN; + offset /= SECSPERMIN; + minutes = offset % MINSPERHOUR; + offset /= MINSPERHOUR; + hours = offset; + if (hours >= HOURSPERDAY * DAYSPERWEEK) { + result[0] = '\0'; + return 0; + } + len += sprintf(result + len, "%d", hours); + if (minutes != 0 || seconds != 0) { + len += sprintf(result + len, ":%02d", minutes); + if (seconds != 0) + len += sprintf(result + len, ":%02d", seconds); + } + return len; +} + +static int +stringrule(char *result, struct rule *const rp, zic_t save, zic_t stdoff) +{ + register zic_t tod = rp->r_tod; + register int compat = 0; + + if (rp->r_dycode == DC_DOM) { + register int month, total; + + if (rp->r_dayofmonth == 29 && rp->r_month == TM_FEBRUARY) + return -1; + total = 0; + for (month = 0; month < rp->r_month; ++month) + total += len_months[0][month]; + /* Omit the "J" in Jan and Feb, as that's shorter. */ + if (rp->r_month <= 1) + result += sprintf(result, "%d", total + rp->r_dayofmonth - 1); + else + result += sprintf(result, "J%d", total + rp->r_dayofmonth); + } else { + register int week; + register int wday = rp->r_wday; + register int wdayoff; + + if (rp->r_dycode == DC_DOWGEQ) { + wdayoff = (rp->r_dayofmonth - 1) % DAYSPERWEEK; + if (wdayoff) + compat = 2013; + wday -= wdayoff; + tod += wdayoff * SECSPERDAY; + week = 1 + (rp->r_dayofmonth - 1) / DAYSPERWEEK; + } else if (rp->r_dycode == DC_DOWLEQ) { + if (rp->r_dayofmonth == len_months[1][rp->r_month]) + week = 5; + else { + wdayoff = rp->r_dayofmonth % DAYSPERWEEK; + if (wdayoff) + compat = 2013; + wday -= wdayoff; + tod += wdayoff * SECSPERDAY; + week = rp->r_dayofmonth / DAYSPERWEEK; + } + } else return -1; /* "cannot happen" */ + if (wday < 0) + wday += DAYSPERWEEK; + result += sprintf(result, "M%d.%d.%d", + rp->r_month + 1, week, wday); + } + if (rp->r_todisut) + tod += stdoff; + if (rp->r_todisstd && !rp->r_isdst) + tod += save; + if (tod != 2 * SECSPERMIN * MINSPERHOUR) { + *result++ = '/'; + if (! stringoffset(result, tod)) + return -1; + if (tod < 0) { + if (compat < 2013) + compat = 2013; + } else if (SECSPERDAY <= tod) { + if (compat < 1994) + compat = 1994; + } + } + return compat; +} + +static int +rule_cmp(struct rule const *a, struct rule const *b) +{ + if (!a) + return -!!b; + if (!b) + return 1; + if (a->r_hiyear != b->r_hiyear) + return a->r_hiyear < b->r_hiyear ? -1 : 1; + if (a->r_hiyear == ZIC_MAX) + return 0; + if (a->r_month - b->r_month != 0) + return a->r_month - b->r_month; + return a->r_dayofmonth - b->r_dayofmonth; +} + +static int +stringzone(char *result, struct zone const *zpfirst, ptrdiff_t zonecount) +{ + register const struct zone * zp; + register struct rule * rp; + register struct rule * stdrp; + register struct rule * dstrp; + register ptrdiff_t i; + register int compat = 0; + register int c; + int offsetlen; + struct rule stdr, dstr; + ptrdiff_t len; + int dstcmp; + struct rule *lastrp[2] = { NULL, NULL }; + struct zone zstr[2]; + struct zone const *stdzp; + struct zone const *dstzp; + + result[0] = '\0'; + + /* Internet RFC 8536 section 5.1 says to use an empty TZ string if + future timestamps are truncated. */ + if (hi_time < max_time) + return -1; + + zp = zpfirst + zonecount - 1; + for (i = 0; i < zp->z_nrules; ++i) { + struct rule **last; + int cmp; + rp = &zp->z_rules[i]; + last = &lastrp[rp->r_isdst]; + cmp = rule_cmp(*last, rp); + if (cmp < 0) + *last = rp; + else if (cmp == 0) + return -1; + } + stdrp = lastrp[false]; + dstrp = lastrp[true]; + dstcmp = zp->z_nrules ? rule_cmp(dstrp, stdrp) : zp->z_isdst ? 1 : -1; + stdzp = dstzp = zp; + + if (dstcmp < 0) { + /* Standard time all year. */ + dstrp = NULL; + } else if (0 < dstcmp) { + /* DST all year. Use an abbreviation like + "XXX3EDT4,0/0,J365/23" for EDT (-04) all year. */ + zic_t save = dstrp ? dstrp->r_save : zp->z_save; + if (0 <= save) + { + /* Positive DST, the typical case for all-year DST. + Fake a timezone with negative DST. */ + stdzp = &zstr[0]; + dstzp = &zstr[1]; + zstr[0].z_stdoff = zp->z_stdoff + 2 * save; + zstr[0].z_format = "XXX"; /* Any 3 letters will do. */ + zstr[0].z_format_specifier = 0; + zstr[1].z_stdoff = zstr[0].z_stdoff; + zstr[1].z_format = zp->z_format; + zstr[1].z_format_specifier = zp->z_format_specifier; + } + dstr.r_month = TM_JANUARY; + dstr.r_dycode = DC_DOM; + dstr.r_dayofmonth = 1; + dstr.r_tod = 0; + dstr.r_todisstd = dstr.r_todisut = false; + dstr.r_isdst = true; + dstr.r_save = save < 0 ? save : -save; + dstr.r_abbrvar = dstrp ? dstrp->r_abbrvar : NULL; + stdr.r_month = TM_DECEMBER; + stdr.r_dycode = DC_DOM; + stdr.r_dayofmonth = 31; + stdr.r_tod = SECSPERDAY + dstr.r_save; + stdr.r_todisstd = stdr.r_todisut = false; + stdr.r_isdst = false; + stdr.r_save = 0; + stdr.r_abbrvar = save < 0 && stdrp ? stdrp->r_abbrvar : NULL; + dstrp = &dstr; + stdrp = &stdr; + } + len = doabbr(result, stdzp, stdrp ? stdrp->r_abbrvar : NULL, + false, 0, true); + offsetlen = stringoffset(result + len, - stdzp->z_stdoff); + if (! offsetlen) { + result[0] = '\0'; + return -1; + } + len += offsetlen; + if (dstrp == NULL) + return compat; + len += doabbr(result + len, dstzp, dstrp->r_abbrvar, + dstrp->r_isdst, dstrp->r_save, true); + if (dstrp->r_save != SECSPERMIN * MINSPERHOUR) { + offsetlen = stringoffset(result + len, + - (dstzp->z_stdoff + dstrp->r_save)); + if (! offsetlen) { + result[0] = '\0'; + return -1; + } + len += offsetlen; + } + result[len++] = ','; + c = stringrule(result + len, dstrp, dstrp->r_save, stdzp->z_stdoff); + if (c < 0) { + result[0] = '\0'; + return -1; + } + if (compat < c) + compat = c; + len += strlen(result + len); + result[len++] = ','; + c = stringrule(result + len, stdrp, dstrp->r_save, stdzp->z_stdoff); + if (c < 0) { + result[0] = '\0'; + return -1; + } + if (compat < c) + compat = c; + return compat; +} + +static void +outzone(const struct zone *zpfirst, ptrdiff_t zonecount) +{ + register ptrdiff_t i, j; + register zic_t starttime, untiltime; + register bool startttisstd; + register bool startttisut; + register char * startbuf; + register char * ab; + register char * envvar; + register int max_abbr_len; + register int max_envvar_len; + register bool prodstic; /* all rules are min to max */ + register int compat; + register bool do_extend; + register char version; + ptrdiff_t lastatmax = -1; + zic_t max_year0; + int defaulttype = -1; + + check_for_signal(); + + /* This cannot overflow; see FORMAT_LEN_GROWTH_BOUND. */ + max_abbr_len = 2 + max_format_len + max_abbrvar_len; + max_envvar_len = 2 * max_abbr_len + 5 * 9; + + startbuf = emalloc(max_abbr_len + 1); + ab = emalloc(max_abbr_len + 1); + envvar = emalloc(max_envvar_len + 1); + INITIALIZE(untiltime); + INITIALIZE(starttime); + /* + ** Now. . .finally. . .generate some useful data! + */ + timecnt = 0; + typecnt = 0; + charcnt = 0; + prodstic = zonecount == 1; + /* + ** Thanks to Earl Chew + ** for noting the need to unconditionally initialize startttisstd. + */ + startttisstd = false; + startttisut = false; + min_year = max_year = EPOCH_YEAR; + if (leapseen) { + updateminmax(leapminyear); + updateminmax(leapmaxyear + (leapmaxyear < ZIC_MAX)); + } + for (i = 0; i < zonecount; ++i) { + struct zone const *zp = &zpfirst[i]; + if (i < zonecount - 1) + updateminmax(zp->z_untilrule.r_loyear); + for (j = 0; j < zp->z_nrules; ++j) { + struct rule *rp = &zp->z_rules[j]; + if (rp->r_lowasnum) + updateminmax(rp->r_loyear); + if (rp->r_hiwasnum) + updateminmax(rp->r_hiyear); + if (rp->r_lowasnum || rp->r_hiwasnum) + prodstic = false; + } + } + /* + ** Generate lots of data if a rule can't cover all future times. + */ + compat = stringzone(envvar, zpfirst, zonecount); + version = compat < 2013 ? '2' : '3'; + do_extend = compat < 0; + if (noise) { + if (!*envvar) + warning("%s %s", + _("no POSIX environment variable for zone"), + zpfirst->z_name); + else if (compat != 0) { + /* Circa-COMPAT clients, and earlier clients, might + not work for this zone when given dates before + 1970 or after 2038. */ + warning(_("%s: pre-%d clients may mishandle" + " distant timestamps"), + zpfirst->z_name, compat); + } + } + if (do_extend) { + /* + ** Search through a couple of extra years past the obvious + ** 400, to avoid edge cases. For example, suppose a non-POSIX + ** rule applies from 2012 onwards and has transitions in March + ** and September, plus some one-off transitions in November + ** 2013. If zic looked only at the last 400 years, it would + ** set max_year=2413, with the intent that the 400 years 2014 + ** through 2413 will be repeated. The last transition listed + ** in the tzfile would be in 2413-09, less than 400 years + ** after the last one-off transition in 2013-11. Two years + ** might be overkill, but with the kind of edge cases + ** available we're not sure that one year would suffice. + */ + enum { years_of_observations = YEARSPERREPEAT + 2 }; + + if (min_year >= ZIC_MIN + years_of_observations) + min_year -= years_of_observations; + else min_year = ZIC_MIN; + if (max_year <= ZIC_MAX - years_of_observations) + max_year += years_of_observations; + else max_year = ZIC_MAX; + /* + ** Regardless of any of the above, + ** for a "proDSTic" zone which specifies that its rules + ** always have and always will be in effect, + ** we only need one cycle to define the zone. + */ + if (prodstic) { + min_year = 1900; + max_year = min_year + years_of_observations; + } + } + max_year = max(max_year, (redundant_time / (SECSPERDAY * DAYSPERNYEAR) + + EPOCH_YEAR + 1)); + max_year0 = max_year; + if (want_bloat()) { + /* For the benefit of older systems, + generate data from 1900 through 2038. */ + if (min_year > 1900) + min_year = 1900; + if (max_year < 2038) + max_year = 2038; + } + + if (min_time < lo_time || hi_time < max_time) + unspecifiedtype = addtype(0, "-00", false, false, false); + + for (i = 0; i < zonecount; ++i) { + struct rule *prevrp = NULL; + /* + ** A guess that may well be corrected later. + */ + zic_t save = 0; + struct zone const *zp = &zpfirst[i]; + bool usestart = i > 0 && (zp - 1)->z_untiltime > min_time; + bool useuntil = i < (zonecount - 1); + zic_t stdoff = zp->z_stdoff; + zic_t startoff = stdoff; + zic_t prevktime; + INITIALIZE(prevktime); + if (useuntil && zp->z_untiltime <= min_time) + continue; + eat(zp->z_filenum, zp->z_linenum); + *startbuf = '\0'; + if (zp->z_nrules == 0) { + int type; + save = zp->z_save; + doabbr(startbuf, zp, NULL, zp->z_isdst, save, false); + type = addtype(oadd(zp->z_stdoff, save), + startbuf, zp->z_isdst, startttisstd, + startttisut); + if (usestart) { + addtt(starttime, type); + usestart = false; + } else + defaulttype = type; + } else { + zic_t year; + for (year = min_year; year <= max_year; ++year) { + if (useuntil && year > zp->z_untilrule.r_hiyear) + break; + /* + ** Mark which rules to do in the current year. + ** For those to do, calculate rpytime(rp, year); + ** The former TYPE field was also considered here. + */ + for (j = 0; j < zp->z_nrules; ++j) { + zic_t one = 1; + zic_t y2038_boundary = one << 31; + struct rule *rp = &zp->z_rules[j]; + eats(zp->z_filenum, zp->z_linenum, + rp->r_filenum, rp->r_linenum); + rp->r_todo = year >= rp->r_loyear && + year <= rp->r_hiyear; + if (rp->r_todo) { + rp->r_temp = rpytime(rp, year); + rp->r_todo + = (rp->r_temp < y2038_boundary + || year <= max_year0); + } + } + for ( ; ; ) { + register ptrdiff_t k; + register zic_t jtime, ktime; + register zic_t offset; + struct rule *rp; + int type; + + INITIALIZE(ktime); + if (useuntil) { + /* + ** Turn untiltime into UT + ** assuming the current stdoff and + ** save values. + */ + untiltime = zp->z_untiltime; + if (!zp->z_untilrule.r_todisut) + untiltime = tadd(untiltime, + -stdoff); + if (!zp->z_untilrule.r_todisstd) + untiltime = tadd(untiltime, + -save); + } + /* + ** Find the rule (of those to do, if any) + ** that takes effect earliest in the year. + */ + k = -1; + for (j = 0; j < zp->z_nrules; ++j) { + struct rule *r = &zp->z_rules[j]; + if (!r->r_todo) + continue; + eats(zp->z_filenum, zp->z_linenum, + r->r_filenum, r->r_linenum); + offset = r->r_todisut ? 0 : stdoff; + if (!r->r_todisstd) + offset = oadd(offset, save); + jtime = r->r_temp; + if (jtime == min_time || + jtime == max_time) + continue; + jtime = tadd(jtime, -offset); + if (k < 0 || jtime < ktime) { + k = j; + ktime = jtime; + } else if (jtime == ktime) { + char const *dup_rules_msg = + _("two rules for same instant"); + eats(zp->z_filenum, zp->z_linenum, + r->r_filenum, r->r_linenum); + warning("%s", dup_rules_msg); + r = &zp->z_rules[k]; + eats(zp->z_filenum, zp->z_linenum, + r->r_filenum, r->r_linenum); + error("%s", dup_rules_msg); + } + } + if (k < 0) + break; /* go on to next year */ + rp = &zp->z_rules[k]; + rp->r_todo = false; + if (useuntil && ktime >= untiltime) { + if (!*startbuf + && (oadd(zp->z_stdoff, rp->r_save) + == startoff)) + doabbr(startbuf, zp, rp->r_abbrvar, + rp->r_isdst, rp->r_save, + false); + break; + } + save = rp->r_save; + if (usestart && ktime == starttime) + usestart = false; + if (usestart) { + if (ktime < starttime) { + startoff = oadd(zp->z_stdoff, + save); + doabbr(startbuf, zp, + rp->r_abbrvar, + rp->r_isdst, + rp->r_save, + false); + continue; + } + if (*startbuf == '\0' + && startoff == oadd(zp->z_stdoff, + save)) { + doabbr(startbuf, + zp, + rp->r_abbrvar, + rp->r_isdst, + rp->r_save, + false); + } + } + eats(zp->z_filenum, zp->z_linenum, + rp->r_filenum, rp->r_linenum); + doabbr(ab, zp, rp->r_abbrvar, + rp->r_isdst, rp->r_save, false); + offset = oadd(zp->z_stdoff, rp->r_save); + if (!want_bloat() && !useuntil && !do_extend + && prevrp && lo_time <= prevktime + && redundant_time <= ktime + && rp->r_hiyear == ZIC_MAX + && prevrp->r_hiyear == ZIC_MAX) + break; + type = addtype(offset, ab, rp->r_isdst, + rp->r_todisstd, rp->r_todisut); + if (defaulttype < 0 && !rp->r_isdst) + defaulttype = type; + if (rp->r_hiyear == ZIC_MAX + && ! (0 <= lastatmax + && ktime < attypes[lastatmax].at)) + lastatmax = timecnt; + addtt(ktime, type); + prevrp = rp; + prevktime = ktime; + } + } + } + if (usestart) { + bool isdst = startoff != zp->z_stdoff; + if (*startbuf == '\0' && zp->z_format) + doabbr(startbuf, zp, disable_percent_s, + isdst, save, false); + eat(zp->z_filenum, zp->z_linenum); + if (*startbuf == '\0') +error(_("can't determine time zone abbreviation to use just after until time")); + else { + int type = addtype(startoff, startbuf, isdst, + startttisstd, startttisut); + if (defaulttype < 0 && !isdst) + defaulttype = type; + addtt(starttime, type); + } + } + /* + ** Now we may get to set starttime for the next zone line. + */ + if (useuntil) { + startttisstd = zp->z_untilrule.r_todisstd; + startttisut = zp->z_untilrule.r_todisut; + starttime = zp->z_untiltime; + if (!startttisstd) + starttime = tadd(starttime, -save); + if (!startttisut) + starttime = tadd(starttime, -stdoff); + } + } + if (defaulttype < 0) + defaulttype = 0; + if (0 <= lastatmax) + attypes[lastatmax].dontmerge = true; + if (do_extend) { + /* + ** If we're extending the explicitly listed observations + ** for 400 years because we can't fill the POSIX-TZ field, + ** check whether we actually ended up explicitly listing + ** observations through that period. If there aren't any + ** near the end of the 400-year period, add a redundant + ** one at the end of the final year, to make it clear + ** that we are claiming to have definite knowledge of + ** the lack of transitions up to that point. + */ + struct rule xr; + struct attype *lastat; + xr.r_month = TM_JANUARY; + xr.r_dycode = DC_DOM; + xr.r_dayofmonth = 1; + xr.r_tod = 0; + for (lastat = attypes, i = 1; i < timecnt; i++) + if (attypes[i].at > lastat->at) + lastat = &attypes[i]; + if (!lastat || lastat->at < rpytime(&xr, max_year - 1)) { + addtt(rpytime(&xr, max_year + 1), + lastat ? lastat->type : defaulttype); + attypes[timecnt - 1].dontmerge = true; + } + } + writezone(zpfirst->z_name, envvar, version, defaulttype); + free(startbuf); + free(ab); + free(envvar); +} + +static void +addtt(zic_t starttime, int type) +{ + attypes = growalloc(attypes, sizeof *attypes, timecnt, &timecnt_alloc); + attypes[timecnt].at = starttime; + attypes[timecnt].dontmerge = false; + attypes[timecnt].type = type; + ++timecnt; +} + +static int +addtype(zic_t utoff, char const *abbr, bool isdst, bool ttisstd, bool ttisut) +{ + register int i, j; + + if (! (-1L - 2147483647L <= utoff && utoff <= 2147483647L)) { + error(_("UT offset out of range")); + exit(EXIT_FAILURE); + } + if (!want_bloat()) + ttisstd = ttisut = false; + + for (j = 0; j < charcnt; ++j) + if (strcmp(&chars[j], abbr) == 0) + break; + if (j == charcnt) + newabbr(abbr); + else { + /* If there's already an entry, return its index. */ + for (i = 0; i < typecnt; i++) + if (utoff == utoffs[i] && isdst == isdsts[i] && j == desigidx[i] + && ttisstd == ttisstds[i] && ttisut == ttisuts[i]) + return i; + } + /* + ** There isn't one; add a new one, unless there are already too + ** many. + */ + if (typecnt >= TZ_MAX_TYPES) { + error(_("too many local time types")); + exit(EXIT_FAILURE); + } + i = typecnt++; + utoffs[i] = utoff; + isdsts[i] = isdst; + ttisstds[i] = ttisstd; + ttisuts[i] = ttisut; + desigidx[i] = j; + return i; +} + +static void +leapadd(zic_t t, int correction, int rolling) +{ + register int i; + + if (TZ_MAX_LEAPS <= leapcnt) { + error(_("too many leap seconds")); + exit(EXIT_FAILURE); + } + if (rolling && (lo_time != min_time || hi_time != max_time)) { + error(_("Rolling leap seconds not supported with -r")); + exit(EXIT_FAILURE); + } + for (i = 0; i < leapcnt; ++i) + if (t <= trans[i]) + break; + memmove(&trans[i + 1], &trans[i], (leapcnt - i) * sizeof *trans); + memmove(&corr[i + 1], &corr[i], (leapcnt - i) * sizeof *corr); + memmove(&roll[i + 1], &roll[i], (leapcnt - i) * sizeof *roll); + trans[i] = t; + corr[i] = correction; + roll[i] = rolling; + ++leapcnt; +} + +static void +adjleap(void) +{ + register int i; + register zic_t last = 0; + register zic_t prevtrans = 0; + + /* + ** propagate leap seconds forward + */ + for (i = 0; i < leapcnt; ++i) { + if (trans[i] - prevtrans < 28 * SECSPERDAY) { + error(_("Leap seconds too close together")); + exit(EXIT_FAILURE); + } + prevtrans = trans[i]; + trans[i] = tadd(trans[i], last); + last = corr[i] += last; + } + + if (0 <= leapexpires) { + leapexpires = oadd(leapexpires, last); + if (! (leapcnt == 0 || (trans[leapcnt - 1] < leapexpires))) { + error(_("last Leap time does not precede Expires time")); + exit(EXIT_FAILURE); + } + } +} + +/* Is A a space character in the C locale? */ +static bool +is_space(char a) +{ + switch (a) { + default: + return false; + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + return true; + } +} + +/* Is A an alphabetic character in the C locale? */ +static bool +is_alpha(char a) +{ + switch (a) { + default: + return false; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + return true; + } +} + +/* If A is an uppercase character in the C locale, return its lowercase + counterpart. Otherwise, return A. */ +static char +lowerit(char a) +{ + switch (a) { + default: return a; + case 'A': return 'a'; case 'B': return 'b'; case 'C': return 'c'; + case 'D': return 'd'; case 'E': return 'e'; case 'F': return 'f'; + case 'G': return 'g'; case 'H': return 'h'; case 'I': return 'i'; + case 'J': return 'j'; case 'K': return 'k'; case 'L': return 'l'; + case 'M': return 'm'; case 'N': return 'n'; case 'O': return 'o'; + case 'P': return 'p'; case 'Q': return 'q'; case 'R': return 'r'; + case 'S': return 's'; case 'T': return 't'; case 'U': return 'u'; + case 'V': return 'v'; case 'W': return 'w'; case 'X': return 'x'; + case 'Y': return 'y'; case 'Z': return 'z'; + } +} + +/* case-insensitive equality */ +ATTRIBUTE_REPRODUCIBLE static bool +ciequal(register const char *ap, register const char *bp) +{ + while (lowerit(*ap) == lowerit(*bp++)) + if (*ap++ == '\0') + return true; + return false; +} + +ATTRIBUTE_REPRODUCIBLE static bool +itsabbr(register const char *abbr, register const char *word) +{ + if (lowerit(*abbr) != lowerit(*word)) + return false; + ++word; + while (*++abbr != '\0') + do { + if (*word == '\0') + return false; + } while (lowerit(*word++) != lowerit(*abbr)); + return true; +} + +/* Return true if ABBR is an initial prefix of WORD, ignoring ASCII case. */ + +ATTRIBUTE_REPRODUCIBLE static bool +ciprefix(char const *abbr, char const *word) +{ + do + if (!*abbr) + return true; + while (lowerit(*abbr++) == lowerit(*word++)); + + return false; +} + +static const struct lookup * +byword(const char *word, const struct lookup *table) +{ + register const struct lookup * foundlp; + register const struct lookup * lp; + + if (word == NULL || table == NULL) + return NULL; + + /* If TABLE is LASTS and the word starts with "last" followed + by a non-'-', skip the "last" and look in WDAY_NAMES instead. + Warn about any usage of the undocumented prefix "last-". */ + if (table == lasts && ciprefix("last", word) && word[4]) { + if (word[4] == '-') + warning(_("\"%s\" is undocumented; use \"last%s\" instead"), + word, word + 5); + else { + word += 4; + table = wday_names; + } + } + + /* + ** Look for exact match. + */ + for (lp = table; lp->l_word != NULL; ++lp) + if (ciequal(word, lp->l_word)) + return lp; + /* + ** Look for inexact match. + */ + foundlp = NULL; + for (lp = table; lp->l_word != NULL; ++lp) + if (ciprefix(word, lp->l_word)) { + if (foundlp == NULL) + foundlp = lp; + else return NULL; /* multiple inexact matches */ + } + + if (foundlp && noise) { + /* Warn about any backward-compatibility issue with pre-2017c zic. */ + bool pre_2017c_match = false; + for (lp = table; lp->l_word; lp++) + if (itsabbr(word, lp->l_word)) { + if (pre_2017c_match) { + warning(_("\"%s\" is ambiguous in pre-2017c zic"), word); + break; + } + pre_2017c_match = true; + } + } + + return foundlp; +} + +static int +getfields(char *cp, char **array, int arrayelts) +{ + register char * dp; + register int nsubs; + + nsubs = 0; + for ( ; ; ) { + char *dstart; + while (is_space(*cp)) + ++cp; + if (*cp == '\0' || *cp == '#') + break; + dstart = dp = cp; + do { + if ((*dp = *cp++) != '"') + ++dp; + else while ((*dp = *cp++) != '"') + if (*dp != '\0') + ++dp; + else { + error(_("Odd number of quotation marks")); + exit(EXIT_FAILURE); + } + } while (*cp && *cp != '#' && !is_space(*cp)); + if (is_space(*cp)) + ++cp; + *dp = '\0'; + if (nsubs == arrayelts) { + error(_("Too many input fields")); + exit(EXIT_FAILURE); + } + array[nsubs++] = dstart + (*dstart == '-' && dp == dstart + 1); + } + return nsubs; +} + +ATTRIBUTE_NORETURN static void +time_overflow(void) +{ + error(_("time overflow")); + exit(EXIT_FAILURE); +} + +ATTRIBUTE_REPRODUCIBLE static zic_t +oadd(zic_t t1, zic_t t2) +{ +#ifdef ckd_add + zic_t sum; + if (!ckd_add(&sum, t1, t2)) + return sum; +#else + if (t1 < 0 ? ZIC_MIN - t1 <= t2 : t2 <= ZIC_MAX - t1) + return t1 + t2; +#endif + time_overflow(); +} + +ATTRIBUTE_REPRODUCIBLE static zic_t +tadd(zic_t t1, zic_t t2) +{ +#ifdef ckd_add + zic_t sum; + if (!ckd_add(&sum, t1, t2) && min_time <= sum && sum <= max_time) + return sum; +#else + if (t1 < 0 ? min_time - t1 <= t2 : t2 <= max_time - t1) + return t1 + t2; +#endif + if (t1 == min_time || t1 == max_time) + return t1; + time_overflow(); +} + +/* +** Given a rule, and a year, compute the date (in seconds since January 1, +** 1970, 00:00 LOCAL time) in that year that the rule refers to. +*/ + +static zic_t +rpytime(const struct rule *rp, zic_t wantedy) +{ + register int m, i; + register zic_t dayoff; /* with a nod to Margaret O. */ + register zic_t t, y; + int yrem; + + if (wantedy == ZIC_MIN) + return min_time; + if (wantedy == ZIC_MAX) + return max_time; + m = TM_JANUARY; + y = EPOCH_YEAR; + + /* dayoff = floor((wantedy - y) / YEARSPERREPEAT) * DAYSPERREPEAT, + sans overflow. */ + yrem = wantedy % YEARSPERREPEAT - y % YEARSPERREPEAT; + dayoff = ((wantedy / YEARSPERREPEAT - y / YEARSPERREPEAT + + yrem / YEARSPERREPEAT - (yrem % YEARSPERREPEAT < 0)) + * DAYSPERREPEAT); + /* wantedy = y + ((wantedy - y) mod YEARSPERREPEAT), sans overflow. */ + wantedy = y + (yrem + 2 * YEARSPERREPEAT) % YEARSPERREPEAT; + + while (wantedy != y) { + i = len_years[isleap(y)]; + dayoff = oadd(dayoff, i); + y++; + } + while (m != rp->r_month) { + i = len_months[isleap(y)][m]; + dayoff = oadd(dayoff, i); + ++m; + } + i = rp->r_dayofmonth; + if (m == TM_FEBRUARY && i == 29 && !isleap(y)) { + if (rp->r_dycode == DC_DOWLEQ) + --i; + else { + error(_("use of 2/29 in non leap-year")); + exit(EXIT_FAILURE); + } + } + --i; + dayoff = oadd(dayoff, i); + if (rp->r_dycode == DC_DOWGEQ || rp->r_dycode == DC_DOWLEQ) { + /* + ** Don't trust mod of negative numbers. + */ + zic_t wday = ((EPOCH_WDAY + dayoff % DAYSPERWEEK + DAYSPERWEEK) + % DAYSPERWEEK); + while (wday != rp->r_wday) + if (rp->r_dycode == DC_DOWGEQ) { + dayoff = oadd(dayoff, 1); + if (++wday >= DAYSPERWEEK) + wday = 0; + ++i; + } else { + dayoff = oadd(dayoff, -1); + if (--wday < 0) + wday = DAYSPERWEEK - 1; + --i; + } + if (i < 0 || i >= len_months[isleap(y)][m]) { + if (noise) + warning(_("rule goes past start/end of month; \ +will not work with pre-2004 versions of zic")); + } + } + if (dayoff < min_time / SECSPERDAY) + return min_time; + if (dayoff > max_time / SECSPERDAY) + return max_time; + t = (zic_t) dayoff * SECSPERDAY; + return tadd(t, rp->r_tod); +} + +static void +newabbr(const char *string) +{ + register int i; + + if (strcmp(string, GRANDPARENTED) != 0) { + register const char * cp; + const char * mp; + + cp = string; + mp = NULL; + while (is_alpha(*cp) || ('0' <= *cp && *cp <= '9') + || *cp == '-' || *cp == '+') + ++cp; + if (noise && cp - string < 3) + mp = _("time zone abbreviation has fewer than 3 characters"); + if (cp - string > ZIC_MAX_ABBR_LEN_WO_WARN) + mp = _("time zone abbreviation has too many characters"); + if (*cp != '\0') +mp = _("time zone abbreviation differs from POSIX standard"); + if (mp != NULL) + warning("%s (%s)", mp, string); + } + i = strlen(string) + 1; + if (charcnt + i > TZ_MAX_CHARS) { + error(_("too many, or too long, time zone abbreviations")); + exit(EXIT_FAILURE); + } + strcpy(&chars[charcnt], string); + charcnt += i; +} + +/* Ensure that the directories of ARGNAME exist, by making any missing + ones. If ANCESTORS, do this only for ARGNAME's ancestors; otherwise, + do it for ARGNAME too. Exit with failure if there is trouble. + Do not consider an existing file to be trouble. */ +static void +mkdirs(char const *argname, bool ancestors) +{ + char *name = estrdup(argname); + char *cp = name; + + /* On MS-Windows systems, do not worry about drive letters or + backslashes, as this should suffice in practice. Time zone + names do not use drive letters and backslashes. If the -d + option of zic does not name an already-existing directory, + it can use slashes to separate the already-existing + ancestor prefix from the to-be-created subdirectories. */ + + /* Do not mkdir a root directory, as it must exist. */ + while (*cp == '/') + cp++; + + while (cp && ((cp = strchr(cp, '/')) || !ancestors)) { + if (cp) + *cp = '\0'; + /* + ** Try to create it. It's OK if creation fails because + ** the directory already exists, perhaps because some + ** other process just created it. For simplicity do + ** not check first whether it already exists, as that + ** is checked anyway if the mkdir fails. + */ + if (mkdir(name, MKDIR_UMASK) != 0) { + /* Do not report an error if err == EEXIST, because + some other process might have made the directory + in the meantime. Likewise for ENOSYS, because + Solaris 10 mkdir fails with ENOSYS if the + directory is an automounted mount point. + Likewise for EACCES, since mkdir can fail + with EACCES merely because the parent directory + is unwritable. Likewise for most other error + numbers. */ + int err = errno; + if (err == ELOOP || err == ENAMETOOLONG + || err == ENOENT || err == ENOTDIR) { + error(_("%s: Can't create directory %s: %s"), + progname, name, strerror(err)); + exit(EXIT_FAILURE); + } + } + if (cp) + *cp++ = '/'; + } + free(name); +} diff --git a/scripts/dev-setup.sh b/scripts/dev-setup.sh new file mode 100644 index 0000000..cefcbee --- /dev/null +++ b/scripts/dev-setup.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +apt-get install libbsd-dev libc6-dev libjemalloc-dev libxxhash-dev