From 9ec0d0bf336c13d3d7bc928be07a377e7e7ee195 Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Wed, 4 Feb 2026 09:22:54 +0100 Subject: [PATCH 01/13] Add byte array specialization and processing --- include/simfil/model/model.h | 3 + include/simfil/model/nodes.h | 4 +- include/simfil/operator.h | 148 +++++++++++++++++++++++++++++++++++ include/simfil/value.h | 17 ++++ src/model/model.cpp | 27 +++++++ src/model/nodes.cpp | 4 +- src/model/string-pool.cpp | 2 + 7 files changed, 203 insertions(+), 2 deletions(-) diff --git a/include/simfil/model/model.h b/include/simfil/model/model.h index ba974a6d..26b428fd 100644 --- a/include/simfil/model/model.h +++ b/include/simfil/model/model.h @@ -2,6 +2,7 @@ #pragma once #include "simfil/model/string-pool.h" +#include "simfil/byte-array.h" #include "tl/expected.hpp" #if defined(SIMFIL_WITH_MODEL_JSON) # include "nlohmann/json.hpp" @@ -96,6 +97,7 @@ class ModelPool : public Model Double, String, PooledString, + ByteArray, FirstCustomColumnId = 128, }; @@ -150,6 +152,7 @@ class ModelPool : public Model ModelNode::Ptr newValue(int64_t const& value); ModelNode::Ptr newValue(double const& value); ModelNode::Ptr newValue(std::string_view const& value); + ModelNode::Ptr newValue(simfil::ByteArray const& value); ModelNode::Ptr newValue(StringId handle); /** Node-type-specific resolve-functions */ diff --git a/include/simfil/model/nodes.h b/include/simfil/model/nodes.h index 36a2c1c9..ddffae61 100644 --- a/include/simfil/model/nodes.h +++ b/include/simfil/model/nodes.h @@ -8,6 +8,7 @@ #include "arena.h" #include "string-pool.h" +#include "simfil/byte-array.h" #include "simfil/error.h" #include @@ -60,7 +61,8 @@ using ScalarValueType = std::variant< int64_t, double, std::string, - std::string_view>; + std::string_view, + ByteArray>; /** * Why is model_ptr's value on the stack? diff --git a/include/simfil/operator.h b/include/simfil/operator.h index 6e874cd9..9b4e7462 100644 --- a/include/simfil/operator.h +++ b/include/simfil/operator.h @@ -8,6 +8,7 @@ #include "fmt/format.h" #include +#include #include #include #include @@ -129,6 +130,11 @@ struct OperatorLen return static_cast(s.size()); } + auto operator()(const ByteArray& s) const + { + return static_cast(s.bytes.size()); + } + auto operator()(const ModelNode& n) const { return static_cast(n.size()); @@ -171,6 +177,12 @@ struct OperatorTypeof return n; } + auto operator()(const ByteArray&) const -> std::string_view + { + static auto n = "string"sv; + return n; + } + auto operator()(const ModelNode&) const -> std::string_view { static auto n = "model"sv; @@ -211,6 +223,13 @@ struct OperatorAsInt return (int64_t)0; } + auto operator()(const ByteArray& v) const + { + if (auto decoded = v.decodeBigEndianI64()) + return *decoded; + return (int64_t)0; + } + auto operator()(const ModelNode&) const { return (int64_t)0; @@ -270,6 +289,11 @@ struct OperatorAsString return v; } + auto operator()(const ByteArray& v) const -> std::string + { + return v.toDisplayString(); + } + auto operator()(const ModelNode& v) const { return ""s; @@ -475,6 +499,49 @@ struct OperatorEq DECL_OPERATION(double, double, ==) DECL_OPERATION(const std::string&, const std::string&, ==) + auto operator()(const ByteArray& l, const ByteArray& r) const + { + return l.bytes == r.bytes; + } + + auto operator()(const ByteArray& l, const std::string& r) const + { + return l.toDisplayString() == r; + } + + auto operator()(const std::string& l, const ByteArray& r) const + { + return l == r.toDisplayString(); + } + + auto operator()(const ByteArray& l, int64_t r) const + { + if (auto decoded = l.decodeBigEndianI64()) + return *decoded == r; + return false; + } + + auto operator()(int64_t l, const ByteArray& r) const + { + if (auto decoded = r.decodeBigEndianI64()) + return l == *decoded; + return false; + } + + auto operator()(const ByteArray& l, double r) const + { + if (auto decoded = l.decodeBigEndianI64()) + return static_cast(*decoded) == r; + return false; + } + + auto operator()(double l, const ByteArray& r) const + { + if (auto decoded = r.decodeBigEndianI64()) + return l == static_cast(*decoded); + return false; + } + auto operator()(NullType, NullType) const { return true; @@ -502,6 +569,52 @@ struct OperatorLt DECL_OPERATION(double, double, <) DECL_OPERATION(const std::string&, const std::string&, <) + auto operator()(const ByteArray& l, const ByteArray& r) const + { + return std::lexicographical_compare( + l.bytes.begin(), l.bytes.end(), + r.bytes.begin(), r.bytes.end(), + [](unsigned char a, unsigned char b) { return a < b; }); + } + + auto operator()(const ByteArray& l, const std::string& r) const + { + return l.toDisplayString() < r; + } + + auto operator()(const std::string& l, const ByteArray& r) const + { + return l < r.toDisplayString(); + } + + auto operator()(const ByteArray& l, int64_t r) const + { + if (auto decoded = l.decodeBigEndianI64()) + return *decoded < r; + return false; + } + + auto operator()(int64_t l, const ByteArray& r) const + { + if (auto decoded = r.decodeBigEndianI64()) + return l < *decoded; + return false; + } + + auto operator()(const ByteArray& l, double r) const + { + if (auto decoded = l.decodeBigEndianI64()) + return static_cast(*decoded) < r; + return false; + } + + auto operator()(double l, const ByteArray& r) const + { + if (auto decoded = r.decodeBigEndianI64()) + return l < static_cast(*decoded); + return false; + } + auto operator()(NullType, NullType) const { return false; @@ -518,6 +631,41 @@ struct OperatorLtEq DECL_OPERATION(double, double, <=) DECL_OPERATION(const std::string&, const std::string&, <=) + auto operator()(const ByteArray& l, const ByteArray& r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(const ByteArray& l, const std::string& r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(const std::string& l, const ByteArray& r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(const ByteArray& l, int64_t r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(int64_t l, const ByteArray& r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(const ByteArray& l, double r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + + auto operator()(double l, const ByteArray& r) const + { + return OperatorLt()(l, r) || OperatorEq()(l, r); + } + auto operator()(NullType, NullType) const { return true; diff --git a/include/simfil/value.h b/include/simfil/value.h index 5c871c39..f1b0e1c6 100644 --- a/include/simfil/value.h +++ b/include/simfil/value.h @@ -7,6 +7,7 @@ #include #include "model/nodes.h" +#include "simfil/byte-array.h" #include "transient.h" namespace simfil @@ -58,6 +59,11 @@ struct ValueToString return v; } + auto operator()(const ByteArray& v) const + { + return v.toDisplayString(); + } + auto operator()(const TransientObject&) const { return ""s; @@ -158,6 +164,11 @@ struct ValueType4CType { static constexpr ValueType Type = ValueType::String; }; +template <> +struct ValueType4CType { + static constexpr ValueType Type = ValueType::String; +}; + template <> struct ValueType4CType { static constexpr ValueType Type = ValueType::TransientObject; @@ -236,6 +247,8 @@ struct ValueAs return *str; if (auto str = std::get_if(&v)) return std::string(*str); + if (auto bytes = std::get_if(&v)) + return bytes->toDisplayString(); return ""s; } }; @@ -403,6 +416,8 @@ class Value case ValueType::Float: return fn(this->template as()); case ValueType::String: + if (auto bytes = std::get_if(&value)) + return fn(*bytes); return fn(this->template as()); case ValueType::TransientObject: return fn(this->template as()); @@ -438,6 +453,7 @@ class Value void operator() (double const& v) {result = v;} void operator() (std::string const& v) {result = v;} void operator() (std::string_view const& v) {result = v;} + void operator() (ByteArray const& v) {result = v;} void operator() (TransientObject const&) {} void operator() (ModelNode::Ptr const&) {} ScalarValueType result; @@ -471,6 +487,7 @@ class Value double, std::string, std::string_view, + ByteArray, TransientObject, ModelNode::Ptr> value; }; diff --git a/src/model/model.cpp b/src/model/model.cpp index 38b37bc4..3774e2ab 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -2,6 +2,7 @@ #include "simfil/model/arena.h" #include "simfil/model/bitsery-traits.h" #include "simfil/model/nodes.h" +#include "simfil/byte-array.h" #include #include @@ -53,6 +54,7 @@ struct ModelPool::Impl strings_(std::move(strings)) { columns_.stringData_.reserve(detail::ColumnPageSize*4); + columns_.byteArrayData_.reserve(detail::ColumnPageSize*4); } struct StringRange { @@ -76,6 +78,8 @@ struct ModelPool::Impl std::string stringData_; sfl::segmented_vector strings_; + std::string byteArrayData_; + sfl::segmented_vector byteArrays_; Object::Storage objectMemberArrays_; Array::Storage arrayMemberArrays_; @@ -91,6 +95,8 @@ struct ModelPool::Impl s.container(columns_.double_, maxColumnSize); s.text1b(columns_.stringData_, maxColumnSize); s.container(columns_.strings_, maxColumnSize); + s.text1b(columns_.byteArrayData_, maxColumnSize); + s.container(columns_.byteArrays_, maxColumnSize); s.ext(columns_.objectMemberArrays_, bitsery::ext::ArrayArenaExt{}); s.ext(columns_.arrayMemberArrays_, bitsery::ext::ArrayArenaExt{}); @@ -228,6 +234,8 @@ void ModelPool::clear() clear_and_shrink(columns.double_); clear_and_shrink(columns.strings_); clear_and_shrink(columns.stringData_); + clear_and_shrink(columns.byteArrays_); + clear_and_shrink(columns.byteArrayData_); clear_and_shrink(columns.objectMemberArrays_); clear_and_shrink(columns.arrayMemberArrays_); } @@ -278,6 +286,15 @@ tl::expected ModelPool::resolve(ModelNode const& n, ResolveFn const shared_from_this())); break; } + case ByteArray: { + auto idx = n.addr().index(); + if (auto err = checkBounds(impl_->columns_.byteArrays_)) + return tl::unexpected(*err); + auto& val = impl_->columns_.byteArrays_[idx]; + auto view = std::string_view(impl_->columns_.byteArrayData_).substr(val.offset_, val.length_); + cb(ValueNode(simfil::ByteArray{view}, shared_from_this())); + break; + } case PooledString: { auto str = lookupStringId(static_cast(n.addr().index())); cb(ValueNode(str.value_or(std::string_view{}), shared_from_this())); @@ -365,6 +382,16 @@ ModelNode::Ptr ModelPool::newValue(std::string_view const& value) return ModelNode(shared_from_this(), {String, (uint32_t)impl_->columns_.strings_.size()-1}); } +ModelNode::Ptr ModelPool::newValue(simfil::ByteArray const& value) +{ + impl_->columns_.byteArrays_.emplace_back(Impl::StringRange{ + (uint32_t)impl_->columns_.byteArrayData_.size(), + (uint32_t)value.bytes.size() + }); + impl_->columns_.byteArrayData_.append(value.bytes.data(), value.bytes.size()); + return ModelNode(shared_from_this(), {ByteArray, (uint32_t)impl_->columns_.byteArrays_.size()-1}); +} + ModelNode::Ptr ModelPool::newValue(StringId handle) { return ModelNode(shared_from_this(), {PooledString, static_cast(handle)}); } diff --git a/src/model/nodes.cpp b/src/model/nodes.cpp index 458ff040..72a2667f 100644 --- a/src/model/nodes.cpp +++ b/src/model/nodes.cpp @@ -120,7 +120,9 @@ nlohmann::json ModelNode::toJson() const [&j](auto&& v) { using T = decltype(v); - if constexpr (!std::is_same_v, std::monostate>) { + if constexpr (std::is_same_v, ByteArray>) { + j = v.toDisplayString(); + } else if constexpr (!std::is_same_v, std::monostate>) { j = std::forward(v); } else { j = nullptr; diff --git a/src/model/string-pool.cpp b/src/model/string-pool.cpp index a4c4f976..9cbef544 100644 --- a/src/model/string-pool.cpp +++ b/src/model/string-pool.cpp @@ -9,6 +9,7 @@ #include #include #include +#include /** * Note: This code is taken from bitsery traits/string.h and adopted @@ -112,6 +113,7 @@ auto StringPool::emplace(std::string_view const& str) -> tl::expected(Error::StringPoolOverflow, "StringPool id overflow!"); } From 1240d9d8dddf106e8fab912b2753448f317d7cc4 Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Wed, 4 Feb 2026 09:23:07 +0100 Subject: [PATCH 02/13] Add byte array --- include/simfil/byte-array.h | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 include/simfil/byte-array.h diff --git a/include/simfil/byte-array.h b/include/simfil/byte-array.h new file mode 100644 index 00000000..0aa48eff --- /dev/null +++ b/include/simfil/byte-array.h @@ -0,0 +1,58 @@ +// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. +#pragma once + +#include +#include +#include +#include +#include + +namespace simfil +{ + +struct ByteArray +{ + std::string_view bytes; + + [[nodiscard]] std::optional decodeBigEndianI64() const + { + if (bytes.size() > 8) { + for (size_t i = 8; i < bytes.size(); ++i) { + if (static_cast(bytes[i]) != 0) + return std::nullopt; + } + } + + const size_t count = bytes.size() <= 8 ? bytes.size() : 8; + uint64_t value = 0; + for (size_t i = 0; i < count; ++i) { + value = (value << 8) | static_cast(bytes[i]); + } + + int64_t signedValue = 0; + std::memcpy(&signedValue, &value, sizeof(signedValue)); + return signedValue; + } + + [[nodiscard]] std::string toHex() const + { + static constexpr char kHex[] = "0123456789abcdef"; + std::string out; + out.reserve(bytes.size() * 2 + 2); + out.append("0x"); + for (unsigned char byte : bytes) { + out.push_back(kHex[(byte >> 4) & 0x0f]); + out.push_back(kHex[byte & 0x0f]); + } + return out; + } + + [[nodiscard]] std::string toDisplayString() const + { + if (auto decoded = decodeBigEndianI64()) + return std::to_string(*decoded); + return toHex(); + } +}; + +} // namespace simfil From 5d1bf1e9ef4d719aa75cf2dd92a481db5dfc277d Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Wed, 4 Feb 2026 14:56:57 +0100 Subject: [PATCH 03/13] Remove extraneous output --- src/model/string-pool.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/model/string-pool.cpp b/src/model/string-pool.cpp index 9cbef544..a4c4f976 100644 --- a/src/model/string-pool.cpp +++ b/src/model/string-pool.cpp @@ -9,7 +9,6 @@ #include #include #include -#include /** * Note: This code is taken from bitsery traits/string.h and adopted @@ -113,7 +112,6 @@ auto StringPool::emplace(std::string_view const& str) -> tl::expected(Error::StringPoolOverflow, "StringPool id overflow!"); } From 988514ed0852a53801e8b6dd6eae73b0feaa3175 Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 5 Feb 2026 06:58:45 +0100 Subject: [PATCH 04/13] Remove byteArrayData_ column --- src/model/model.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/model/model.cpp b/src/model/model.cpp index 3774e2ab..adaeefd7 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -54,7 +54,6 @@ struct ModelPool::Impl strings_(std::move(strings)) { columns_.stringData_.reserve(detail::ColumnPageSize*4); - columns_.byteArrayData_.reserve(detail::ColumnPageSize*4); } struct StringRange { @@ -78,7 +77,6 @@ struct ModelPool::Impl std::string stringData_; sfl::segmented_vector strings_; - std::string byteArrayData_; sfl::segmented_vector byteArrays_; Object::Storage objectMemberArrays_; @@ -95,7 +93,6 @@ struct ModelPool::Impl s.container(columns_.double_, maxColumnSize); s.text1b(columns_.stringData_, maxColumnSize); s.container(columns_.strings_, maxColumnSize); - s.text1b(columns_.byteArrayData_, maxColumnSize); s.container(columns_.byteArrays_, maxColumnSize); s.ext(columns_.objectMemberArrays_, bitsery::ext::ArrayArenaExt{}); @@ -235,7 +232,6 @@ void ModelPool::clear() clear_and_shrink(columns.strings_); clear_and_shrink(columns.stringData_); clear_and_shrink(columns.byteArrays_); - clear_and_shrink(columns.byteArrayData_); clear_and_shrink(columns.objectMemberArrays_); clear_and_shrink(columns.arrayMemberArrays_); } @@ -291,7 +287,7 @@ tl::expected ModelPool::resolve(ModelNode const& n, ResolveFn const if (auto err = checkBounds(impl_->columns_.byteArrays_)) return tl::unexpected(*err); auto& val = impl_->columns_.byteArrays_[idx]; - auto view = std::string_view(impl_->columns_.byteArrayData_).substr(val.offset_, val.length_); + auto view = std::string_view(impl_->columns_.stringData_).substr(val.offset_, val.length_); cb(ValueNode(simfil::ByteArray{view}, shared_from_this())); break; } @@ -385,10 +381,10 @@ ModelNode::Ptr ModelPool::newValue(std::string_view const& value) ModelNode::Ptr ModelPool::newValue(simfil::ByteArray const& value) { impl_->columns_.byteArrays_.emplace_back(Impl::StringRange{ - (uint32_t)impl_->columns_.byteArrayData_.size(), + (uint32_t)impl_->columns_.stringData_.size(), (uint32_t)value.bytes.size() }); - impl_->columns_.byteArrayData_.append(value.bytes.data(), value.bytes.size()); + impl_->columns_.stringData_.append(value.bytes.data(), value.bytes.size()); return ModelNode(shared_from_this(), {ByteArray, (uint32_t)impl_->columns_.byteArrays_.size()-1}); } From b5e58d92016fc5563f637b88ba53d4dfa7ac3ed3 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Fri, 6 Feb 2026 11:58:28 +0100 Subject: [PATCH 05/13] Introduce a generic ADL-based Model::resolve function. --- include/simfil/model/model.h | 100 ++++++++++++++++++++++++++++++++--- include/simfil/model/nodes.h | 48 ++++++++++------- src/model/model.cpp | 16 +++--- 3 files changed, 132 insertions(+), 32 deletions(-) diff --git a/include/simfil/model/model.h b/include/simfil/model/model.h index 0e02c93a..b5f2d820 100644 --- a/include/simfil/model/model.h +++ b/include/simfil/model/model.h @@ -9,7 +9,10 @@ #include #include +#include #include +#include +#include #include #include @@ -19,6 +22,47 @@ namespace simfil { +namespace res +{ +// Tag type for ADL-based resolve hooks implemented by model libraries. +template +struct tag {}; +} + +namespace detail +{ +template +struct node_model_type +{ + static_assert(sizeof(T) == 0, "Target must provide a ModelType alias."); +}; + +template +struct node_model_type> +{ + using type = typename T::ModelType; +}; + +template +using node_model_type_t = typename node_model_type::type; +} + +/** + * ADL customization point for typed node resolution. + * Libraries define resolveInternal(tag, model, node) in their namespace. + */ +template +model_ptr resolveInternal(res::tag, ModelType const&, ModelNode const&) = delete; + +class ModelPool; + +// Built-in resolve hooks for core node types. Declared here so ADL sees them +// across translation units without relying on friend injection. +template<> +model_ptr resolveInternal(res::tag, ModelPool const&, ModelNode const&); +template<> +model_ptr resolveInternal(res::tag, ModelPool const&, ModelNode const&); + /** * Basic node model which only resolves trivial node types. */ @@ -58,6 +102,54 @@ class Model : public std::enable_shared_from_this */ virtual tl::expected resolve(ModelNode const& n, ResolveFn const& cb) const; + /** + * Resolve a node to a specific ModelNode subtype using ADL hooks. + * This provides a clean cast API without exposing model internals. + */ + template + model_ptr resolve(ModelNodeAddress const& address) const + { + if constexpr (std::is_same_v) { + return ModelNode::Ptr::make(shared_from_this(), address); + } + return resolve(*ModelNode::Ptr::make(shared_from_this(), address)); + } + + template + model_ptr resolve(ModelNodeAddress const& address, ScalarValueType data) const + { + if constexpr (std::is_same_v) { + return ModelNode::Ptr::make(shared_from_this(), address, std::move(data)); + } + return resolve(*ModelNode::Ptr::make(shared_from_this(), address, std::move(data))); + } + + template + model_ptr resolve(ModelNode::Ptr const& node) const + { + return resolve(*node); + } + + template + model_ptr resolve(ModelNode const& node) const + { + if constexpr (std::is_same_v) { + return model_ptr(node); + } + else { + using ModelType = detail::node_model_type_t; +#if !defined(NDEBUG) + // In debug builds, validate the model type to catch misuse early. + auto typedModel = dynamic_cast(this); + assert(typedModel && "resolve called on incompatible model type."); + return resolveInternal(res::tag{}, *typedModel, node); +#else + // In release builds, avoid RTTI overhead on this hot path. + return resolveInternal(res::tag{}, *static_cast(this), node); +#endif + } + } + /** Add a small scalar value and get its model node view */ ModelNode::Ptr newSmallValue(bool value); ModelNode::Ptr newSmallValue(int16_t value); @@ -88,6 +180,8 @@ class ModelPool : public Model template friend struct BaseArray; public: + // Keep Model::resolve overloads visible alongside the virtual resolve override. + using Model::resolve; /** * The pool consists of multiple ModelNode columns, * each for a different data type. Each column @@ -156,12 +250,6 @@ class ModelPool : public Model ModelNode::Ptr newValue(std::string_view const& value); ModelNode::Ptr newValue(StringId handle); - /** Node-type-specific resolve-functions */ - [[nodiscard]] - model_ptr resolveObject(ModelNode::Ptr const& n) const; - [[nodiscard]] - model_ptr resolveArray(ModelNode::Ptr const& n) const; - /** Access the field name storage */ [[nodiscard]] std::shared_ptr strings() const; diff --git a/include/simfil/model/nodes.h b/include/simfil/model/nodes.h index 8992e51b..a7a3bbbb 100644 --- a/include/simfil/model/nodes.h +++ b/include/simfil/model/nodes.h @@ -206,6 +206,25 @@ struct ModelNodeAddress } }; +namespace detail +{ + // Shared storage entry for object fields across all BaseObject instantiations. + // Keeps the underlying ArrayArena type identical regardless of ModelType. + struct ObjectField + { + ObjectField() = default; + ObjectField(StringId name, ModelNodeAddress a) : name_(name), node_(a) {} + StringId name_ = StringPool::Empty; + ModelNodeAddress node_; + + template + void serialize(S& s) { + s.value2b(name_); + s.object(node_); + } + }; +} + /** Semantic view onto a particular node in a ModelPool. */ struct ModelNode { @@ -413,13 +432,15 @@ struct ValueNode final : public ModelNodeBase * a reference to a Model-derived pool type. * @tparam ModelType Model-derived type. */ -template +template struct MandatoryDerivedModelNodeBase : public ModelNodeBase { - inline ModelType& model() const {return *modelPtr();} // NOLINT + using ModelType = ModelTypeT; + + inline ModelTypeT& model() const {return *modelPtr();} // NOLINT protected: - template + template inline ModelType_* modelPtr() const { return static_cast(const_cast(model_.get())); } @@ -570,24 +591,9 @@ struct BaseObject : public MandatoryDerivedModelNodeBase protected: /** - * Object field - a name and a tree node address. - * These are stored in the ModelPools Field array arena. + * Object fields are stored in the model's shared object-field arena. */ - struct Field - { - Field() = default; - Field(StringId name, ModelNodeAddress a) : name_(name), node_(a) {} - StringId name_ = StringPool::Empty; - ModelNodeAddress node_; - - template - void serialize(S& s) { - s.value2b(name_); - s.object(node_); - } - }; - - using Storage = ArrayArena; + using Storage = ArrayArena; using ModelNode::model_; using MandatoryDerivedModelNodeBase::model; @@ -637,6 +643,8 @@ template(*this)); diff --git a/src/model/model.cpp b/src/model/model.cpp index 4e23392e..0e8a5df6 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -394,17 +394,21 @@ ModelNode::Ptr ModelPool::newValue(StringId handle) { ModelNodeAddress{PooledString, static_cast(handle)}); } -model_ptr ModelPool::resolveObject(const ModelNode::Ptr& n) const { - if (n->addr_.column() != Objects) +// Core ADL resolve hooks for base Object/Array nodes. +template<> +model_ptr resolveInternal(res::tag, ModelPool const& model, ModelNode const& node) +{ + if (node.addr().column() != ModelPool::Objects) raise("Cannot cast this node to an object."); - return model_ptr::make(shared_from_this(), n->addr_); + return model_ptr::make(model.shared_from_this(), node.addr()); } -model_ptr ModelPool::resolveArray(ModelNode::Ptr const& n) const +template<> +model_ptr resolveInternal(res::tag, ModelPool const& model, ModelNode const& node) { - if (n->addr_.column() != Arrays) + if (node.addr().column() != ModelPool::Arrays) raise("Cannot cast this node to an array."); - return model_ptr::make(shared_from_this(), n->addr_); + return model_ptr::make(model.shared_from_this(), node.addr()); } std::shared_ptr ModelPool::strings() const From 6b01ea88509d1017d332fe9e90882e9c95b0f2b3 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Tue, 10 Feb 2026 07:20:37 +0100 Subject: [PATCH 06/13] Address PR comments. --- include/simfil/model/model.h | 37 ++++++++++++------------ include/simfil/model/nodes.h | 54 ++++++++++++++++++------------------ 2 files changed, 44 insertions(+), 47 deletions(-) diff --git a/include/simfil/model/model.h b/include/simfil/model/model.h index b5f2d820..a4aeeae6 100644 --- a/include/simfil/model/model.h +++ b/include/simfil/model/model.h @@ -31,20 +31,11 @@ struct tag {}; namespace detail { -template -struct node_model_type -{ - static_assert(sizeof(T) == 0, "Target must provide a ModelType alias."); -}; - -template -struct node_model_type> -{ - using type = typename T::ModelType; -}; +template +concept HasModelType = requires { typename T::ModelType; }; -template -using node_model_type_t = typename node_model_type::type; +template +using ModelTypeOf = typename T::ModelType; } /** @@ -137,16 +128,22 @@ class Model : public std::enable_shared_from_this return model_ptr(node); } else { - using ModelType = detail::node_model_type_t; + if constexpr (!detail::HasModelType) { + static_assert(detail::HasModelType, "Target must provide a ModelType alias."); + return {}; + } + else { + using ModelType = detail::ModelTypeOf; #if !defined(NDEBUG) - // In debug builds, validate the model type to catch misuse early. - auto typedModel = dynamic_cast(this); - assert(typedModel && "resolve called on incompatible model type."); - return resolveInternal(res::tag{}, *typedModel, node); + // In debug builds, validate the model type to catch misuse early. + auto typedModel = dynamic_cast(this); + assert(typedModel && "resolve called on incompatible model type."); + return resolveInternal(res::tag{}, *typedModel, node); #else - // In release builds, avoid RTTI overhead on this hot path. - return resolveInternal(res::tag{}, *static_cast(this), node); + // In release builds, avoid RTTI overhead on this hot path. + return resolveInternal(res::tag{}, *static_cast(this), node); #endif + } } } diff --git a/include/simfil/model/nodes.h b/include/simfil/model/nodes.h index a7a3bbbb..44405672 100644 --- a/include/simfil/model/nodes.h +++ b/include/simfil/model/nodes.h @@ -67,18 +67,18 @@ using ScalarValueType = std::variant< namespace detail { - // Passkey for ModelNode construction: ModelNode types take this in their constructors so only - // model_ptr (and ModelPool via a shared key instance) can default/in-place construct them. - // This avoids per-node friendship and keeps IDEs happy across library boundaries. - struct mp_key - { - mp_key() = delete; - private: - explicit mp_key(int) {} - template friend struct ::simfil::model_ptr; - friend class ::simfil::Model; - friend class ::simfil::ModelPool; - }; +// Passkey for ModelNode construction: ModelNode types take this in their constructors so only +// model_ptr (and ModelPool via a shared key instance) can default/in-place construct them. +// This avoids per-node friendship and keeps IDEs happy across library boundaries. +struct mp_key +{ + mp_key() = delete; +private: + explicit mp_key(int) {} + template friend struct ::simfil::model_ptr; + friend class ::simfil::Model; + friend class ::simfil::ModelPool; +}; } /** @@ -208,21 +208,21 @@ struct ModelNodeAddress namespace detail { - // Shared storage entry for object fields across all BaseObject instantiations. - // Keeps the underlying ArrayArena type identical regardless of ModelType. - struct ObjectField - { - ObjectField() = default; - ObjectField(StringId name, ModelNodeAddress a) : name_(name), node_(a) {} - StringId name_ = StringPool::Empty; - ModelNodeAddress node_; - - template - void serialize(S& s) { - s.value2b(name_); - s.object(node_); - } - }; +// Shared storage entry for object fields across all BaseObject instantiations. +// Keeps the underlying ArrayArena type identical regardless of ModelType. +struct ObjectField +{ + ObjectField() = default; + ObjectField(StringId name, ModelNodeAddress a) : name_(name), node_(a) {} + StringId name_ = StringPool::Empty; + ModelNodeAddress node_; + + template + void serialize(S& s) { + s.value2b(name_); + s.object(node_); + } +}; } /** Semantic view onto a particular node in a ModelPool. */ From 74b02fcc3d65031cb53e6a33d199206d3210334f Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Tue, 10 Feb 2026 08:04:24 +0100 Subject: [PATCH 07/13] Add a Bytes value type which is returned by ValueType4CType. Add a possibility to declare a byte array through the language. Fix consistency with cross-type operator logic. Add JSON conversion as tagged object (similar to _multimap). Remove toDisplayString(). --- docs/simfil-language.md | 11 ++-- include/simfil/base64.h | 99 ++++++++++++++++++++++++++++++++++++ include/simfil/byte-array.h | 36 ++++++++----- include/simfil/model/nodes.h | 1 + include/simfil/operator.h | 68 +++++++++++++------------ include/simfil/simfil.h | 2 +- include/simfil/token.h | 3 ++ include/simfil/value.h | 28 +++++++--- src/model/json.cpp | 13 +++++ src/model/model.cpp | 2 + src/model/nodes.cpp | 10 +++- src/simfil.cpp | 5 ++ src/token.cpp | 12 +++++ test/complex.cpp | 15 ++++++ test/operator.cpp | 19 +++++++ test/simfil.cpp | 5 ++ test/token.cpp | 16 +++++- test/value.cpp | 13 +++++ 18 files changed, 300 insertions(+), 58 deletions(-) create mode 100644 include/simfil/base64.h diff --git a/docs/simfil-language.md b/docs/simfil-language.md index 660448ab..cc8d3228 100644 --- a/docs/simfil-language.md +++ b/docs/simfil-language.md @@ -136,7 +136,7 @@ count(mylist.*) ## Types -Simfil supports the following scalar types: `null`, `bool`, `int`, `float` (double precision), `string` and `re`. +Simfil supports the following scalar types: `null`, `bool`, `int`, `float` (double precision), `string`, `bytes` and `re`. Additionally, the `model` type represents compound object/array container nodes. All values but `null` and `false` are considered `true`, implicit boolean conversion takes place for operators `and` and `or` only. @@ -151,6 +151,9 @@ The following types can be target types for a cast: * `int` - Converts the value to an integer. Returns 0 on failure. * `float` - Converts the value to a float. Returns 0 on failure. * `string` - Converts the value to a string. Boolean values are converted to either "true" or "false". +* `bytes` - Converts the value to bytes. + +Byte literals are written using the `b` prefix, e.g. `b"hello"` or `b'hello'`. ## Operators @@ -161,12 +164,12 @@ The following types can be target types for a cast: | `[ a ]` | Array/Object subscript, index expression can be of type `int` or `string`. | | `{ a }` | Sub-Query (inside sub-query `_` represents the value the query is applied to). | | `. b` or `a . b` | Direct field access; returns the value of field `b` or `null`. | -| `a as b` | Cast a to type b (one of `bool`, `int`, `float` or `string`). | +| `a as b` | Cast a to type b (one of `bool`, `int`, `float`, `string` or `bytes`). | | `a ?` | Get boolean value of `a` (see ##Types). | | `a ...` | Unpacks `a` to a list of values (see function `range` under [Functions](#Functions) for example) | -| `typeof a` | Returns the type of the value of its expression (`"null"`, `"bool"`, `"int"`, `"float"` or `"string"`). | +| `typeof a` | Returns the type of the value of its expression (`"null"`, `"bool"`, `"int"`, `"float"`, `"string"` or `"bytes"`). | | `not a` | Boolean not. | -| `# a` | Returns the length of a string or array value. | +| `# a` | Returns the length of a string, bytes, or array value. | | `~ a` | Bitwise not. | | `- a` | Unary minus. | | `a * b` | Multiplication. | diff --git a/include/simfil/base64.h b/include/simfil/base64.h new file mode 100644 index 00000000..8846f928 --- /dev/null +++ b/include/simfil/base64.h @@ -0,0 +1,99 @@ +// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. + +#pragma once + +#include +#include +#include +#include + +namespace simfil +{ + +inline auto base64Encode(std::string_view input) -> std::string +{ + static constexpr char kTable[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + std::string out; + out.reserve(((input.size() + 2) / 3) * 4); + + for (size_t i = 0; i < input.size(); i += 3) { + const auto remaining = input.size() - i; + const auto b0 = static_cast(input[i]); + const auto b1 = remaining > 1 ? static_cast(input[i + 1]) : 0U; + const auto b2 = remaining > 2 ? static_cast(input[i + 2]) : 0U; + + out.push_back(kTable[(b0 >> 2) & 0x3F]); + out.push_back(kTable[((b0 & 0x03) << 4) | ((b1 >> 4) & 0x0F)]); + out.push_back(remaining > 1 ? kTable[((b1 & 0x0F) << 2) | ((b2 >> 6) & 0x03)] : '='); + out.push_back(remaining > 2 ? kTable[b2 & 0x3F] : '='); + } + + return out; +} + +inline auto base64Decode(std::string_view input) -> std::optional +{ + if (input.size() % 4 != 0) + return std::nullopt; + + auto decodeChar = [](char c) -> int { + if ('A' <= c && c <= 'Z') + return c - 'A'; + if ('a' <= c && c <= 'z') + return c - 'a' + 26; + if ('0' <= c && c <= '9') + return c - '0' + 52; + if (c == '+') + return 62; + if (c == '/') + return 63; + return -1; + }; + + std::string out; + out.reserve((input.size() / 4) * 3); + + for (size_t i = 0; i < input.size(); i += 4) { + const auto c0 = input[i]; + const auto c1 = input[i + 1]; + const auto c2 = input[i + 2]; + const auto c3 = input[i + 3]; + + const auto v0 = decodeChar(c0); + const auto v1 = decodeChar(c1); + if (v0 < 0 || v1 < 0) + return std::nullopt; + + const bool p2 = c2 == '='; + const bool p3 = c3 == '='; + + if (p2 && !p3) + return std::nullopt; + + const auto v2 = p2 ? 0 : decodeChar(c2); + const auto v3 = p3 ? 0 : decodeChar(c3); + if ((!p2 && v2 < 0) || (!p3 && v3 < 0)) + return std::nullopt; + + const auto b0 = static_cast((v0 << 2) | (v1 >> 4)); + out.push_back(b0); + + if (!p2) { + const auto b1 = static_cast(((v1 & 0x0F) << 4) | (v2 >> 2)); + out.push_back(b1); + } + + if (!p3) { + const auto b2 = static_cast(((v2 & 0x03) << 6) | v3); + out.push_back(b2); + } + } + + return out; +} + +} // namespace simfil diff --git a/include/simfil/byte-array.h b/include/simfil/byte-array.h index 0aa48eff..6fca9b93 100644 --- a/include/simfil/byte-array.h +++ b/include/simfil/byte-array.h @@ -6,13 +6,30 @@ #include #include #include +#include namespace simfil { struct ByteArray { - std::string_view bytes; + std::string bytes; + + ByteArray() = default; + + explicit ByteArray(const char* data) + : bytes(data) + {} + + explicit ByteArray(std::string_view data) + : bytes(data) + {} + + explicit ByteArray(std::string data) + : bytes(std::move(data)) + {} + + auto operator==(const ByteArray&) const -> bool = default; [[nodiscard]] std::optional decodeBigEndianI64() const { @@ -34,25 +51,20 @@ struct ByteArray return signedValue; } - [[nodiscard]] std::string toHex() const + [[nodiscard]] std::string toHex(bool uppercase = true) const { - static constexpr char kHex[] = "0123456789abcdef"; + static constexpr char kHexLower[] = "0123456789abcdef"; + static constexpr char kHexUpper[] = "0123456789ABCDEF"; + const char* kHex = uppercase ? kHexUpper : kHexLower; + std::string out; - out.reserve(bytes.size() * 2 + 2); - out.append("0x"); + out.reserve(bytes.size() * 2); for (unsigned char byte : bytes) { out.push_back(kHex[(byte >> 4) & 0x0f]); out.push_back(kHex[byte & 0x0f]); } return out; } - - [[nodiscard]] std::string toDisplayString() const - { - if (auto decoded = decodeBigEndianI64()) - return std::to_string(*decoded); - return toHex(); - } }; } // namespace simfil diff --git a/include/simfil/model/nodes.h b/include/simfil/model/nodes.h index ddffae61..95f3765e 100644 --- a/include/simfil/model/nodes.h +++ b/include/simfil/model/nodes.h @@ -49,6 +49,7 @@ enum class ValueType Int, Float, String, + Bytes, TransientObject, Object, Array diff --git a/include/simfil/operator.h b/include/simfil/operator.h index 9b4e7462..5063d92f 100644 --- a/include/simfil/operator.h +++ b/include/simfil/operator.h @@ -179,7 +179,7 @@ struct OperatorTypeof auto operator()(const ByteArray&) const -> std::string_view { - static auto n = "string"sv; + static auto n = "bytes"sv; return n; } @@ -291,7 +291,7 @@ struct OperatorAsString auto operator()(const ByteArray& v) const -> std::string { - return v.toDisplayString(); + return v.bytes; } auto operator()(const ModelNode& v) const @@ -314,6 +314,40 @@ struct OperatorAsString NULL_AS("null"s); }; +struct OperatorAsBytes +{ + NAME("bytes") + + auto operator()(const ByteArray& v) const -> ByteArray + { + return v; + } + + auto operator()(const std::string& v) const -> ByteArray + { + return ByteArray{v}; + } + + auto operator()(const ModelNode&) const + { + return ByteArray{}; + } + + auto operator()(const TransientObject&) const + { + // Handled by MetaType::unaryOp + return ByteArray{}; + } + + template + auto operator()(Type v) const -> ByteArray + { + return ByteArray{OperatorAsString()(v)}; + } + + NULL_AS(ByteArray{"null"}); +}; + #undef DENY_OTHER #undef NULL_AS #undef NULL_AS_NULL @@ -504,16 +538,6 @@ struct OperatorEq return l.bytes == r.bytes; } - auto operator()(const ByteArray& l, const std::string& r) const - { - return l.toDisplayString() == r; - } - - auto operator()(const std::string& l, const ByteArray& r) const - { - return l == r.toDisplayString(); - } - auto operator()(const ByteArray& l, int64_t r) const { if (auto decoded = l.decodeBigEndianI64()) @@ -577,16 +601,6 @@ struct OperatorLt [](unsigned char a, unsigned char b) { return a < b; }); } - auto operator()(const ByteArray& l, const std::string& r) const - { - return l.toDisplayString() < r; - } - - auto operator()(const std::string& l, const ByteArray& r) const - { - return l < r.toDisplayString(); - } - auto operator()(const ByteArray& l, int64_t r) const { if (auto decoded = l.decodeBigEndianI64()) @@ -636,16 +650,6 @@ struct OperatorLtEq return OperatorLt()(l, r) || OperatorEq()(l, r); } - auto operator()(const ByteArray& l, const std::string& r) const - { - return OperatorLt()(l, r) || OperatorEq()(l, r); - } - - auto operator()(const std::string& l, const ByteArray& r) const - { - return OperatorLt()(l, r) || OperatorEq()(l, r); - } - auto operator()(const ByteArray& l, int64_t r) const { return OperatorLt()(l, r) || OperatorEq()(l, r); diff --git a/include/simfil/simfil.h b/include/simfil/simfil.h index 17c02d50..14818446 100644 --- a/include/simfil/simfil.h +++ b/include/simfil/simfil.h @@ -26,7 +26,7 @@ struct ModelNode; * Param: * any If true, wrap expression with call to `any(...)`. * Param: - * autoWildcard If true, expand constant expressions to `** = `. + * autoWildcard If true, expand constant expressions to `** == `. */ auto compile(Environment& env, std::string_view query, bool any = true, bool autoWildcard = false) -> tl::expected; diff --git a/include/simfil/token.h b/include/simfil/token.h index 6851c87d..cb9fdc3f 100644 --- a/include/simfil/token.h +++ b/include/simfil/token.h @@ -10,6 +10,7 @@ #include #include "simfil/error.h" +#include "simfil/byte-array.h" namespace simfil { @@ -32,6 +33,7 @@ struct Token INT, // FLOAT, // STRING, // [r]"..." or [r]'...' + BYTES, // [b]"..." or [b]'...' REGEXP, // A string prefixed by re or RE WORD, // SELF, // _ @@ -73,6 +75,7 @@ struct Token std::variant< std::monostate, std::string, + ByteArray, int64_t, double > value; diff --git a/include/simfil/value.h b/include/simfil/value.h index f1b0e1c6..495e07f2 100644 --- a/include/simfil/value.h +++ b/include/simfil/value.h @@ -61,7 +61,7 @@ struct ValueToString auto operator()(const ByteArray& v) const { - return v.toDisplayString(); + return "b\""s + v.toHex() + "\""; } auto operator()(const TransientObject&) const @@ -90,6 +90,7 @@ inline auto valueType2String(ValueType t) -> const char* case ValueType::Int: return "int"; case ValueType::Float: return "float"; case ValueType::String: return "string"; + case ValueType::Bytes: return "bytes"; case ValueType::TransientObject: return "transient"; case ValueType::Object: return "object"; case ValueType::Array: return "array"; @@ -103,7 +104,7 @@ inline auto valueType2String(ValueType t) -> const char* */ struct TypeFlags { - std::bitset<9> flags; + std::bitset<10> flags; auto test(ValueType type) const { @@ -166,7 +167,7 @@ struct ValueType4CType { template <> struct ValueType4CType { - static constexpr ValueType Type = ValueType::String; + static constexpr ValueType Type = ValueType::Bytes; }; template <> @@ -212,6 +213,11 @@ struct ValueTypeInfo { using Type = std::string; }; +template <> +struct ValueTypeInfo { + using Type = ByteArray; +}; + template <> struct ValueTypeInfo { using Type = TransientObject; @@ -247,12 +253,20 @@ struct ValueAs return *str; if (auto str = std::get_if(&v)) return std::string(*str); - if (auto bytes = std::get_if(&v)) - return bytes->toDisplayString(); return ""s; } }; +template <> +struct ValueAs +{ + template + static inline auto get(const VariantType& v) noexcept -> decltype(auto) + { + return std::get(v); + } +}; + template <> struct ValueAs { @@ -416,9 +430,9 @@ class Value case ValueType::Float: return fn(this->template as()); case ValueType::String: - if (auto bytes = std::get_if(&value)) - return fn(*bytes); return fn(this->template as()); + case ValueType::Bytes: + return fn(this->template as()); case ValueType::TransientObject: return fn(this->template as()); case ValueType::Object: diff --git a/src/model/json.cpp b/src/model/json.cpp index c140bc7f..c5a4cb9d 100644 --- a/src/model/json.cpp +++ b/src/model/json.cpp @@ -2,6 +2,7 @@ #include "simfil/model/json.h" #include "simfil/model/model.h" +#include "simfil/base64.h" #include @@ -34,6 +35,18 @@ static auto build(const json& j, ModelPool & model) -> tl::expectedis_boolean() && it->get()) { + auto data = j.find("data"); + if (data == j.end() || !data->is_string()) + return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: expected string field 'data'"); + + auto decoded = base64Decode(data->get()); + if (!decoded) + return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: base64 decode failed"); + + return model.newValue(ByteArray{std::move(*decoded)}); + } + auto object = model.newObject(j.size()); for (auto&& [key, value] : j.items()) { auto child = build(value, model); diff --git a/src/model/model.cpp b/src/model/model.cpp index adaeefd7..ad08c9f7 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -449,6 +449,8 @@ ModelPool::SerializationSizeStats ModelPool::serializationSizeStats() const [&](auto& s) { s.text1b(impl_->columns_.stringData_, maxColumnSize); }); stats.stringRangeBytes = measureBytes( [&](auto& s) { s.container(impl_->columns_.strings_, maxColumnSize); }); + stats.stringRangeBytes += measureBytes( + [&](auto& s) { s.container(impl_->columns_.byteArrays_, maxColumnSize); }); stats.objectMemberBytes = measureBytes( [&](auto& s) { s.ext(impl_->columns_.objectMemberArrays_, bitsery::ext::ArrayArenaExt{}); }); stats.arrayMemberBytes = measureBytes( diff --git a/src/model/nodes.cpp b/src/model/nodes.cpp index 72a2667f..970b480e 100644 --- a/src/model/nodes.cpp +++ b/src/model/nodes.cpp @@ -1,5 +1,6 @@ #include "simfil/model/model.h" #include "simfil/model/string-pool.h" +#include "simfil/base64.h" #include "simfil/value.h" #include "simfil/model/nodes.h" @@ -121,7 +122,14 @@ nlohmann::json ModelNode::toJson() const { using T = decltype(v); if constexpr (std::is_same_v, ByteArray>) { - j = v.toDisplayString(); + auto bytes = nlohmann::json::object(); + bytes["_bytes"] = true; + if (auto decoded = v.decodeBigEndianI64()) + bytes["number"] = *decoded; + else + bytes["number"] = nullptr; + bytes["data"] = base64Encode(v.bytes); + j = std::move(bytes); } else if constexpr (!std::is_same_v, std::monostate>) { j = std::forward(v); } else { diff --git a/src/simfil.cpp b/src/simfil.cpp index 6f69da53..5da371d1 100644 --- a/src/simfil.cpp +++ b/src/simfil.cpp @@ -44,6 +44,7 @@ static constexpr std::string_view TypenameBool("bool"); static constexpr std::string_view TypenameInt("int"); static constexpr std::string_view TypenameFloat("float"); static constexpr std::string_view TypenameString("string"); +static constexpr std::string_view TypenameBytes("bytes"); } /** @@ -243,6 +244,8 @@ class CastParser : public InfixParselet return std::make_unique>(std::move(left)); if (name == strings::TypenameString) return std::make_unique>(std::move(left)); + if (name == strings::TypenameBytes) + return std::make_unique>(std::move(left)); return unexpected(Error::InvalidType, fmt::format("Invalid type name for cast '{}'", name)); }()); @@ -680,6 +683,7 @@ namespace const ScalarParser intParser; const ScalarParser floatParser; const ScalarParser stringParser; +const ScalarParser bytesParser; const RegExpParser regexpParser; const UnaryOpParser negateParser; const UnaryOpParser bitInvParser; @@ -726,6 +730,7 @@ static auto setupParser(Parser& p) p.prefixParsers[Token::INT] = &intParser; p.prefixParsers[Token::FLOAT] = &floatParser; p.prefixParsers[Token::STRING] = &stringParser; + p.prefixParsers[Token::BYTES] = &bytesParser; p.prefixParsers[Token::REGEXP] = ®expParser; /* Unary Operators */ diff --git a/src/token.cpp b/src/token.cpp index 094a99da..6129ca7d 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -41,6 +41,8 @@ auto Token::toString() const -> std::string return std::to_string(std::get(value)); case Token::STRING: return "'"s + std::get(value) + "'"s; + case Token::BYTES: + return "b\""s + std::get(value).bytes + "\""; case Token::REGEXP: return "re'"s + std::get(value) + "'"s; case Token::WORD: @@ -97,6 +99,7 @@ auto Token::toString(Type t) -> std::string case Token::INT: return ""; case Token::FLOAT: return ""; case Token::STRING: return ""; + case Token::BYTES: return ""; case Token::REGEXP: return ""; case Token::WORD: return ""; }; @@ -252,6 +255,11 @@ std::optional scanStringLiteral(Scanner& s) s.match("r'") || s.match("R'") || s.match("r\"") || s.match("R\""); + // Test for byte strings + const auto bytes = + s.match("b'") || s.match("B'") || + s.match("b\"") || s.match("B\""); + // Test for regexp const auto regexp = s.match("re'") || s.match("RE'") || @@ -259,6 +267,8 @@ std::optional scanStringLiteral(Scanner& s) if (raw) s.skip(1); + else if (bytes) + s.skip(1); else if (regexp) s.skip(2); @@ -309,6 +319,8 @@ std::optional scanStringLiteral(Scanner& s) if (regexp) return Token(Token::REGEXP, text, begin, s.pos()); + if (bytes) + return Token(Token::BYTES, ByteArray{text}, begin, s.pos()); return Token(Token::STRING, text, begin, s.pos()); } diff --git a/test/complex.cpp b/test/complex.cpp index bffd18b9..b49ff7e1 100644 --- a/test/complex.cpp +++ b/test/complex.cpp @@ -102,6 +102,21 @@ TEST_CASE("Multimap JSON", "[multimap.serialization]") { REQUIRE(model->toJson() == nlohmann::json::parse(R"([{"a":[1],"b":[1,2,3],"c":[[1],2],"_multimap":true}])")); } +TEST_CASE("Tagged bytes JSON", "[bytes.serialization]") { + auto model = std::make_shared(); + auto root = model->newObject(1); + model->addRoot(root); + root->addField("raw", model->newValue(ByteArray{"A normal string"})); + + auto expected = nlohmann::json::parse( + R"([{"raw":{"_bytes":true,"number":null,"data":"QSBub3JtYWwgc3RyaW5n"}}])"); + REQUIRE(model->toJson() == expected); + + auto roundTrip = json::parse(model->toJson().dump()); + REQUIRE(roundTrip); + REQUIRE(roundTrip.value()->toJson() == expected); +} + TEST_CASE("Serialization", "[complex.serialization]") { auto model = json::parse(invoice); REQUIRE(model); diff --git a/test/operator.cpp b/test/operator.cpp index 7e78ed03..3ab2aab7 100644 --- a/test/operator.cpp +++ b/test/operator.cpp @@ -95,6 +95,7 @@ TEST_CASE("Unary operators", "[operator.unary]") { REQUIRE(op(int64_t(42)) == "int"); REQUIRE(op(3.14) == "float"); REQUIRE(op("hello"s) == "string"); + REQUIRE(op(ByteArray{"ff"}) == "bytes"); } } @@ -129,6 +130,17 @@ TEST_CASE("Type conversion operators", "[operator.conversion]") { REQUIRE(op(""s) == 0.0); REQUIRE(op(NullType{}) == 0.0); } + + SECTION("OperatorAsString") { + OperatorAsString op; + REQUIRE(op(ByteArray{"89899"}) == "89899"); + } + + SECTION("OperatorAsBytes") { + OperatorAsBytes op; + REQUIRE(op("A normal string"s).bytes == "A normal string"); + REQUIRE(op(ByteArray{"ff"}).bytes == "ff"); + } } TEST_CASE("Binary arithmetic operators", "[operator.binary.arithmetic]") { @@ -220,5 +232,12 @@ TEST_CASE("Binary comparison operators", "[operator.binary.comparison]") { REQUIRE(op(int64_t(5), 5.0) == true); REQUIRE(op(5.0, int64_t(5)) == true); REQUIRE(op(int64_t(5), 5.1) == false); + REQUIRE(op(ByteArray{"89899"}, "normal-string"s) == false); + } + + SECTION("OperatorGt") { + OperatorGt op; + REQUIRE(op(ByteArray{"89899"}, int64_t(5)) == true); + REQUIRE(op(ByteArray{"89899"}, "normal-string"s) == false); } } diff --git a/test/simfil.cpp b/test/simfil.cpp index 0b5c2305..03342858 100644 --- a/test/simfil.cpp +++ b/test/simfil.cpp @@ -116,6 +116,8 @@ TEST_CASE("OperatorConst", "[ast.operator]") { REQUIRE_AST("'a'<='b'", "true"); REQUIRE_AST("'b'>'a'", "true"); REQUIRE_AST("'b'>='b'", "true"); + REQUIRE_AST("b\"89899\" > 5", "true"); + REQUIRE_AST("b\"89899\" > \"normal-string\"", "false"); /* Null behaviour */ REQUIRE_AST("1 Type static auto asInt(const std::string_view input) {return getFirst(input, Token::Type::INT);} static auto asFloat(const std::string_view input) {return getFirst(input, Token::Type::FLOAT);} static auto asStr(const std::string_view input) {return getFirst(input, Token::Type::STRING);} +static auto asBytes(const std::string_view input) {return getFirst(input, Token::Type::BYTES);} static auto asRegexp(const std::string_view input) {return getFirst(input, Token::Type::REGEXP);} static auto asWord(const std::string_view input) {return getFirst(input, Token::Type::WORD);} static auto asError(const std::string_view input) { @@ -104,6 +105,18 @@ TEST_CASE("Tokenize strings", "[token.string]") { REQUIRE(asRegexp("RE''") == ""); REQUIRE(asRegexp("re'\"'") == "\""); + /* b'...' */ + REQUIRE(asBytes("b''").bytes == ""); + REQUIRE(asBytes("B''").bytes == ""); + REQUIRE(asBytes("b'abc'").bytes == "abc"); + REQUIRE(asBytes("b'\\'abc\\''").bytes == "'abc'"); + + /* b"..." */ + REQUIRE(asBytes("b\"\"").bytes == ""); + REQUIRE(asBytes("B\"\"").bytes == ""); + REQUIRE(asBytes("b\"abc\"").bytes == "abc"); + REQUIRE(asBytes("b\"\\\"abc\\\"\"").bytes == "\"abc\""); + /* Quote mismatch */ REQUIRE(asError("'abc") == "Quote mismatch at 4"); REQUIRE(asError("abc'") == "Quote mismatch at 4"); @@ -160,7 +173,7 @@ TEST_CASE("Token location", "[token.location]") { } TEST_CASE("Token to string", "[token.to-string]") { - auto tokens = tokenize("1 1.5 'Æthervial' re'.* Familiar' abc ()[]{},:._ * ** " + auto tokens = tokenize("1 1.5 'Æthervial' b'beef' re'.* Familiar' abc ()[]{},:._ * ** " "null true false + - * / % << >> & | ^ ~ not and or " "== != < <= > >= ? # typeof as ..."); REQUIRE(tokens); @@ -171,6 +184,7 @@ TEST_CASE("Token to string", "[token.to-string]") { REQUIRE_STR("1"); REQUIRE_STR("1.500000"); REQUIRE_STR("'Æthervial'"); + REQUIRE_STR("b\"beef\""); REQUIRE_STR("re'.* Familiar'"); REQUIRE_STR("abc"); REQUIRE_STR("("); REQUIRE_STR(")"); diff --git a/test/value.cpp b/test/value.cpp index bc4b0885..8746e1bd 100644 --- a/test/value.cpp +++ b/test/value.cpp @@ -76,6 +76,12 @@ TEST_CASE("Value Constructors", "[value.value-constructor]") { REQUIRE(val.as() == "world"); } + SECTION("Make ByteArray") { + auto val = Value::make(ByteArray{"bytes"}); + REQUIRE(val.isa(ValueType::Bytes)); + REQUIRE(val.as().bytes == "bytes"); + } + SECTION("Type constructor") { Value val(ValueType::Null); REQUIRE(val.isa(ValueType::Null)); @@ -184,6 +190,11 @@ TEST_CASE("Value As", "[value.as]") { auto ptr = val.as(); REQUIRE(!!ptr); } + + SECTION("as()") { + auto val = Value::make(ByteArray{"abc"}); + REQUIRE(val.as().bytes == "abc"); + } SECTION("as()") { auto model = std::make_shared(); @@ -308,6 +319,7 @@ TEST_CASE("Value toString() method", "[value.toString]") { REQUIRE(Value::make(int64_t(-123)).toString() == "-123"); REQUIRE(Value::make(double(3.14)).toString().find("3.14") == 0); REQUIRE(Value::make("Ponder"s).toString() == "Ponder"); + REQUIRE(Value::make(ByteArray{"A normal string"}).toString() == "b\"41206E6F726D616C20737472696E67\""); } TEST_CASE("Value utility methods", "[value.utilities]") { @@ -360,6 +372,7 @@ TEST_CASE("valueType2String() function", "[value.type2string]") { REQUIRE(valueType2String(ValueType::Int) == "int"s); REQUIRE(valueType2String(ValueType::Float) == "float"s); REQUIRE(valueType2String(ValueType::String) == "string"s); + REQUIRE(valueType2String(ValueType::Bytes) == "bytes"s); REQUIRE(valueType2String(ValueType::TransientObject) == "transient"s); REQUIRE(valueType2String(ValueType::Object) == "object"s); REQUIRE(valueType2String(ValueType::Array) == "array"s); From bd975b30f584ceb96814f1184eda155ae79e6ae3 Mon Sep 17 00:00:00 2001 From: Joseph Birkner Date: Tue, 10 Feb 2026 12:09:48 +0100 Subject: [PATCH 08/13] Fix mp_key usage. --- src/model/model.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/model/model.cpp b/src/model/model.cpp index b08fc2a8..294c8bcc 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -303,7 +303,7 @@ tl::expected ModelPool::resolve(ModelNode const& n, ResolveFn const return tl::unexpected(*err); auto& val = impl_->columns_.byteArrays_[idx]; auto view = std::string_view(impl_->columns_.stringData_).substr(val.offset_, val.length_); - cb(ValueNode(simfil::ByteArray{view}, shared_from_this())); + cb(ValueNode(simfil::ByteArray{view}, shared_from_this(), mpKey_)); break; } case PooledString: { @@ -408,7 +408,9 @@ ModelNode::Ptr ModelPool::newValue(simfil::ByteArray const& value) (uint32_t)value.bytes.size() }); impl_->columns_.stringData_.append(value.bytes.data(), value.bytes.size()); - return ModelNode(shared_from_this(), {ByteArray, (uint32_t)impl_->columns_.byteArrays_.size()-1}); + return ModelNode::Ptr::make( + shared_from_this(), + ModelNodeAddress{ByteArray, (uint32_t)impl_->columns_.byteArrays_.size()-1}); } ModelNode::Ptr ModelPool::newValue(StringId handle) { From 24848987ab6e86e77b195b86121063f07f116e44 Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 12 Feb 2026 15:31:54 +0100 Subject: [PATCH 09/13] Do not use k* prefixes. Use fmt::format instead of custom hex encoding. Fix ByteArray display format not round-trippable with literal syntax (parse b string literal as hex bytes). --- docs/simfil-language.md | 2 + include/simfil/base64.h | 99 ------------------------------------- include/simfil/byte-array.h | 72 ++++++++++++++++++++++++--- include/simfil/value.h | 2 +- src/model/json.cpp | 13 +++-- src/model/nodes.cpp | 3 +- src/token.cpp | 26 +++++++++- test/complex.cpp | 6 ++- test/simfil.cpp | 2 +- test/token.cpp | 14 ++++++ test/value.cpp | 10 +++- 11 files changed, 130 insertions(+), 119 deletions(-) delete mode 100644 include/simfil/base64.h diff --git a/docs/simfil-language.md b/docs/simfil-language.md index cc8d3228..ace9fa90 100644 --- a/docs/simfil-language.md +++ b/docs/simfil-language.md @@ -154,6 +154,8 @@ The following types can be target types for a cast: * `bytes` - Converts the value to bytes. Byte literals are written using the `b` prefix, e.g. `b"hello"` or `b'hello'`. +Escape sequences `\n`, `\r`, `\t`, `\\`, `\"`, and `\'` are supported. +Bytes can also be written explicitly using `\xNN` (hex), e.g. `b"\x41\x00"`. ## Operators diff --git a/include/simfil/base64.h b/include/simfil/base64.h deleted file mode 100644 index 8846f928..00000000 --- a/include/simfil/base64.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) Navigation Data Standard e.V. - See "LICENSE" file. - -#pragma once - -#include -#include -#include -#include - -namespace simfil -{ - -inline auto base64Encode(std::string_view input) -> std::string -{ - static constexpr char kTable[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - - std::string out; - out.reserve(((input.size() + 2) / 3) * 4); - - for (size_t i = 0; i < input.size(); i += 3) { - const auto remaining = input.size() - i; - const auto b0 = static_cast(input[i]); - const auto b1 = remaining > 1 ? static_cast(input[i + 1]) : 0U; - const auto b2 = remaining > 2 ? static_cast(input[i + 2]) : 0U; - - out.push_back(kTable[(b0 >> 2) & 0x3F]); - out.push_back(kTable[((b0 & 0x03) << 4) | ((b1 >> 4) & 0x0F)]); - out.push_back(remaining > 1 ? kTable[((b1 & 0x0F) << 2) | ((b2 >> 6) & 0x03)] : '='); - out.push_back(remaining > 2 ? kTable[b2 & 0x3F] : '='); - } - - return out; -} - -inline auto base64Decode(std::string_view input) -> std::optional -{ - if (input.size() % 4 != 0) - return std::nullopt; - - auto decodeChar = [](char c) -> int { - if ('A' <= c && c <= 'Z') - return c - 'A'; - if ('a' <= c && c <= 'z') - return c - 'a' + 26; - if ('0' <= c && c <= '9') - return c - '0' + 52; - if (c == '+') - return 62; - if (c == '/') - return 63; - return -1; - }; - - std::string out; - out.reserve((input.size() / 4) * 3); - - for (size_t i = 0; i < input.size(); i += 4) { - const auto c0 = input[i]; - const auto c1 = input[i + 1]; - const auto c2 = input[i + 2]; - const auto c3 = input[i + 3]; - - const auto v0 = decodeChar(c0); - const auto v1 = decodeChar(c1); - if (v0 < 0 || v1 < 0) - return std::nullopt; - - const bool p2 = c2 == '='; - const bool p3 = c3 == '='; - - if (p2 && !p3) - return std::nullopt; - - const auto v2 = p2 ? 0 : decodeChar(c2); - const auto v3 = p3 ? 0 : decodeChar(c3); - if ((!p2 && v2 < 0) || (!p3 && v3 < 0)) - return std::nullopt; - - const auto b0 = static_cast((v0 << 2) | (v1 >> 4)); - out.push_back(b0); - - if (!p2) { - const auto b1 = static_cast(((v1 & 0x0F) << 4) | (v2 >> 2)); - out.push_back(b1); - } - - if (!p3) { - const auto b2 = static_cast(((v2 & 0x03) << 6) | v3); - out.push_back(b2); - } - } - - return out; -} - -} // namespace simfil diff --git a/include/simfil/byte-array.h b/include/simfil/byte-array.h index 6fca9b93..e8747c97 100644 --- a/include/simfil/byte-array.h +++ b/include/simfil/byte-array.h @@ -3,11 +3,14 @@ #include #include +#include #include #include #include #include +#include + namespace simfil { @@ -31,6 +34,24 @@ struct ByteArray auto operator==(const ByteArray&) const -> bool = default; + [[nodiscard]] static std::optional fromHex(std::string_view hex) + { + if (hex.size() % 2 != 0) + return std::nullopt; + + std::string decoded; + decoded.reserve(hex.size() / 2); + for (size_t i = 0; i < hex.size(); i += 2) { + const auto upper = decodeHexNibble(hex[i]); + const auto lower = decodeHexNibble(hex[i + 1]); + if (upper < 0 || lower < 0) + return std::nullopt; + decoded.push_back(static_cast((upper << 4) | lower)); + } + + return ByteArray{std::move(decoded)}; + } + [[nodiscard]] std::optional decodeBigEndianI64() const { if (bytes.size() > 8) { @@ -53,18 +74,57 @@ struct ByteArray [[nodiscard]] std::string toHex(bool uppercase = true) const { - static constexpr char kHexLower[] = "0123456789abcdef"; - static constexpr char kHexUpper[] = "0123456789ABCDEF"; - const char* kHex = uppercase ? kHexUpper : kHexLower; - std::string out; out.reserve(bytes.size() * 2); + + if (uppercase) { + for (unsigned char byte : bytes) + fmt::format_to(std::back_inserter(out), FMT_STRING("{:02X}"), byte); + } else { + for (unsigned char byte : bytes) + fmt::format_to(std::back_inserter(out), FMT_STRING("{:02x}"), byte); + } + + return out; + } + + [[nodiscard]] std::string toLiteral() const + { + std::string out; + out.reserve(bytes.size() + 3); + out += "b\""; + for (unsigned char byte : bytes) { - out.push_back(kHex[(byte >> 4) & 0x0f]); - out.push_back(kHex[byte & 0x0f]); + switch (byte) { + case '\\': out += "\\\\"; break; + case '"': out += "\\\""; break; + case '\n': out += "\\n"; break; + case '\r': out += "\\r"; break; + case '\t': out += "\\t"; break; + default: + if (byte < 0x20 || byte >= 0x7f) + fmt::format_to(std::back_inserter(out), FMT_STRING("\\x{:02X}"), byte); + else + out.push_back(static_cast(byte)); + break; + } } + + out.push_back('"'); return out; } + +private: + [[nodiscard]] static auto decodeHexNibble(char c) -> int + { + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + return -1; + } }; } // namespace simfil diff --git a/include/simfil/value.h b/include/simfil/value.h index 495e07f2..3805a75d 100644 --- a/include/simfil/value.h +++ b/include/simfil/value.h @@ -61,7 +61,7 @@ struct ValueToString auto operator()(const ByteArray& v) const { - return "b\""s + v.toHex() + "\""; + return v.toLiteral(); } auto operator()(const TransientObject&) const diff --git a/src/model/json.cpp b/src/model/json.cpp index c5a4cb9d..1cbe693a 100644 --- a/src/model/json.cpp +++ b/src/model/json.cpp @@ -2,7 +2,6 @@ #include "simfil/model/json.h" #include "simfil/model/model.h" -#include "simfil/base64.h" #include @@ -36,15 +35,15 @@ static auto build(const json& j, ModelPool & model) -> tl::expectedis_boolean() && it->get()) { - auto data = j.find("data"); - if (data == j.end() || !data->is_string()) - return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: expected string field 'data'"); + auto hex = j.find("hex"); + if (hex == j.end() || !hex->is_string()) + return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: expected string field 'hex'"); - auto decoded = base64Decode(data->get()); + auto decoded = ByteArray::fromHex(hex->get()); if (!decoded) - return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: base64 decode failed"); + return tl::unexpected(Error::ParserError, "Invalid tagged bytes object: hex decode failed"); - return model.newValue(ByteArray{std::move(*decoded)}); + return model.newValue(std::move(*decoded)); } auto object = model.newObject(j.size()); diff --git a/src/model/nodes.cpp b/src/model/nodes.cpp index a37d0188..affd11c3 100644 --- a/src/model/nodes.cpp +++ b/src/model/nodes.cpp @@ -1,6 +1,5 @@ #include "simfil/model/model.h" #include "simfil/model/string-pool.h" -#include "simfil/base64.h" #include "simfil/value.h" #include "simfil/model/nodes.h" @@ -132,7 +131,7 @@ nlohmann::json ModelNode::toJson() const bytes["number"] = *decoded; else bytes["number"] = nullptr; - bytes["data"] = base64Encode(v.bytes); + bytes["hex"] = v.toHex(false); j = std::move(bytes); } else if constexpr (!std::is_same_v, std::monostate>) { j = std::forward(v); diff --git a/src/token.cpp b/src/token.cpp index 6129ca7d..6b8e9106 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -25,6 +25,17 @@ std::string downcase(std::string s) return s; } +auto decodeHexNibble(char c) -> int +{ + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + if ('A' <= c && c <= 'F') + return c - 'A' + 10; + return -1; +} + } namespace simfil @@ -42,7 +53,7 @@ auto Token::toString() const -> std::string case Token::STRING: return "'"s + std::get(value) + "'"s; case Token::BYTES: - return "b\""s + std::get(value).bytes + "\""; + return std::get(value).toLiteral(); case Token::REGEXP: return "re'"s + std::get(value) + "'"s; case Token::WORD: @@ -294,6 +305,19 @@ std::optional scanStringLiteral(Scanner& s) else text.push_back('\\'); } else { + if (bytes && (s.at(0) == 'x' || s.at(0) == 'X')) { + const auto upper = decodeHexNibble(s.at(1)); + const auto lower = decodeHexNibble(s.at(2)); + if (upper < 0 || lower < 0) { + s.fail("Invalid hex escape sequence"); + return {}; + } + + text.push_back(static_cast((upper << 4) | lower)); + s.skip(3); + continue; + } + switch (s.at(0)) { case 'n': text.push_back('\n'); break; case 'r': text.push_back('\r'); break; diff --git a/test/complex.cpp b/test/complex.cpp index b49ff7e1..3d1d23ef 100644 --- a/test/complex.cpp +++ b/test/complex.cpp @@ -109,12 +109,16 @@ TEST_CASE("Tagged bytes JSON", "[bytes.serialization]") { root->addField("raw", model->newValue(ByteArray{"A normal string"})); auto expected = nlohmann::json::parse( - R"([{"raw":{"_bytes":true,"number":null,"data":"QSBub3JtYWwgc3RyaW5n"}}])"); + R"([{"raw":{"_bytes":true,"number":null,"hex":"41206e6f726d616c20737472696e67"}}])"); REQUIRE(model->toJson() == expected); auto roundTrip = json::parse(model->toJson().dump()); REQUIRE(roundTrip); REQUIRE(roundTrip.value()->toJson() == expected); + + auto invalidHex = json::parse(R"([{"raw":{"_bytes":true,"hex":"abc"}}])"); + REQUIRE_FALSE(invalidHex); + REQUIRE(invalidHex.error().message == "Invalid tagged bytes object: hex decode failed"); } TEST_CASE("Serialization", "[complex.serialization]") { diff --git a/test/simfil.cpp b/test/simfil.cpp index ec7252ea..c1bdfcb8 100644 --- a/test/simfil.cpp +++ b/test/simfil.cpp @@ -167,7 +167,7 @@ TEST_CASE("OperatorConst", "[ast.operator]") { REQUIRE_AST("null as string", "\"null\""); REQUIRE_AST("range(1,3) as string", "\"1..3\""); REQUIRE_AST("b\"89899\" as string", "\"89899\""); - REQUIRE_AST("\"A normal string\" as bytes", "b\"41206E6F726D616C20737472696E67\""); + REQUIRE_AST("\"A normal string\" as bytes", "b\"A normal string\""); /* Bool Cast */ REQUIRE_AST("123?", "true"); diff --git a/test/token.cpp b/test/token.cpp index 1f054044..04dc65e2 100644 --- a/test/token.cpp +++ b/test/token.cpp @@ -110,12 +110,18 @@ TEST_CASE("Tokenize strings", "[token.string]") { REQUIRE(asBytes("B''").bytes == ""); REQUIRE(asBytes("b'abc'").bytes == "abc"); REQUIRE(asBytes("b'\\'abc\\''").bytes == "'abc'"); + REQUIRE(asBytes("b'\\x41'").bytes == "A"); /* b"..." */ REQUIRE(asBytes("b\"\"").bytes == ""); REQUIRE(asBytes("B\"\"").bytes == ""); REQUIRE(asBytes("b\"abc\"").bytes == "abc"); REQUIRE(asBytes("b\"\\\"abc\\\"\"").bytes == "\"abc\""); + REQUIRE(asBytes("b\"\\x41\"").bytes == "A"); + REQUIRE(asBytes("b\"A\\x00\\xFF\\\"\\\\\"").bytes == std::string{"A\0\xFF\"\\", 5}); + REQUIRE(asError("b\"\\x\"").starts_with("Invalid hex escape sequence")); + REQUIRE(asError("b\"\\x0\"").starts_with("Invalid hex escape sequence")); + REQUIRE(asError("b\"\\xGG\"").starts_with("Invalid hex escape sequence")); /* Quote mismatch */ REQUIRE(asError("'abc") == "Quote mismatch at 4"); @@ -203,3 +209,11 @@ TEST_CASE("Token to string", "[token.to-string]") { REQUIRE_STR("?"); REQUIRE_STR("#"); REQUIRE_STR("typeof"); REQUIRE_STR("as"); REQUIRE_STR("..."); } + +TEST_CASE("Byte token roundtrip", "[token.bytes-roundtrip]") { + auto bytes = ByteArray{std::string{"A\0\xFF\"\\\n\t", 7}}; + Token token(Token::BYTES, bytes, 0, 0); + + auto roundTripped = asBytes(token.toString()); + REQUIRE(roundTripped == bytes); +} diff --git a/test/value.cpp b/test/value.cpp index 8746e1bd..c01640a3 100644 --- a/test/value.cpp +++ b/test/value.cpp @@ -4,6 +4,7 @@ #include "simfil/value.h" #include "simfil/model/model.h" +#include "simfil/token.h" #include "simfil/transient.h" using namespace simfil; @@ -319,7 +320,14 @@ TEST_CASE("Value toString() method", "[value.toString]") { REQUIRE(Value::make(int64_t(-123)).toString() == "-123"); REQUIRE(Value::make(double(3.14)).toString().find("3.14") == 0); REQUIRE(Value::make("Ponder"s).toString() == "Ponder"); - REQUIRE(Value::make(ByteArray{"A normal string"}).toString() == "b\"41206E6F726D616C20737472696E67\""); + REQUIRE(Value::make(ByteArray{"A normal string"}).toString() == "b\"A normal string\""); + + auto bytes = ByteArray{std::string{"A\0\xFF\"\\", 5}}; + auto repr = Value::make(bytes).toString(); + auto tokens = tokenize(repr); + REQUIRE(tokens); + REQUIRE(tokens->at(0).type == Token::BYTES); + REQUIRE(std::get(tokens->at(0).value) == bytes); } TEST_CASE("Value utility methods", "[value.utilities]") { From f59f6f7976cf396b8a6e225c87975cd0a5dfb9cb Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 12 Feb 2026 16:05:51 +0100 Subject: [PATCH 10/13] Allow ByteArray conversion for strings, bool and int --- include/simfil/operator.h | 22 ++++++++++++++++------ test/operator.cpp | 13 +++++++++++++ test/simfil.cpp | 4 ++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/include/simfil/operator.h b/include/simfil/operator.h index 5063d92f..98febb02 100644 --- a/include/simfil/operator.h +++ b/include/simfil/operator.h @@ -317,12 +317,28 @@ struct OperatorAsString struct OperatorAsBytes { NAME("bytes") + DENY_OTHER() auto operator()(const ByteArray& v) const -> ByteArray { return v; } + auto operator()(bool v) const -> ByteArray + { + return ByteArray{std::string(1, static_cast(v ? 1 : 0))}; + } + + auto operator()(int64_t v) const -> ByteArray + { + auto raw = static_cast(v); + auto bytes = std::string(8, '\0'); + for (size_t i = 0; i < bytes.size(); ++i) { + bytes[bytes.size() - i - 1] = static_cast((raw >> (i * 8)) & 0xFFu); + } + return ByteArray{std::move(bytes)}; + } + auto operator()(const std::string& v) const -> ByteArray { return ByteArray{v}; @@ -339,12 +355,6 @@ struct OperatorAsBytes return ByteArray{}; } - template - auto operator()(Type v) const -> ByteArray - { - return ByteArray{OperatorAsString()(v)}; - } - NULL_AS(ByteArray{"null"}); }; diff --git a/test/operator.cpp b/test/operator.cpp index 3ab2aab7..dfb5b7f1 100644 --- a/test/operator.cpp +++ b/test/operator.cpp @@ -140,6 +140,19 @@ TEST_CASE("Type conversion operators", "[operator.conversion]") { OperatorAsBytes op; REQUIRE(op("A normal string"s).bytes == "A normal string"); REQUIRE(op(ByteArray{"ff"}).bytes == "ff"); + REQUIRE(op(true).bytes == std::string(1, char(1))); + REQUIRE(op(false).bytes == std::string(1, char(0))); + + auto intBytes = op(int64_t(0xff)); + REQUIRE(intBytes.bytes.size() == 8); + REQUIRE((unsigned char)intBytes.bytes.back() == 0xff); + REQUIRE(intBytes.decodeBigEndianI64().value_or(0) == int64_t(0xff)); + + auto negBytes = op(int64_t(-1)); + REQUIRE(negBytes.bytes.size() == 8); + REQUIRE(negBytes.decodeBigEndianI64().value_or(0) == int64_t(-1)); + + REQUIRE_INVALID_OPERANDS(op(3.14)); } } diff --git a/test/simfil.cpp b/test/simfil.cpp index c1bdfcb8..66db4e09 100644 --- a/test/simfil.cpp +++ b/test/simfil.cpp @@ -168,6 +168,10 @@ TEST_CASE("OperatorConst", "[ast.operator]") { REQUIRE_AST("range(1,3) as string", "\"1..3\""); REQUIRE_AST("b\"89899\" as string", "\"89899\""); REQUIRE_AST("\"A normal string\" as bytes", "b\"A normal string\""); + REQUIRE_AST("0xff as bytes == 0xff", "true"); + REQUIRE_AST("true as bytes == 1", "true"); + REQUIRE_AST("false as bytes == 0", "true"); + REQUIRE_ERROR("1.5 as bytes"); /* Bool Cast */ REQUIRE_AST("123?", "true"); From 9f4b9a55c3533f2fe4855de6cefa89afd186652b Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 12 Feb 2026 16:34:17 +0100 Subject: [PATCH 11/13] Fix duplication --- include/simfil/byte-array.h | 1 - src/token.cpp | 15 ++------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/include/simfil/byte-array.h b/include/simfil/byte-array.h index e8747c97..9b804ac4 100644 --- a/include/simfil/byte-array.h +++ b/include/simfil/byte-array.h @@ -114,7 +114,6 @@ struct ByteArray return out; } -private: [[nodiscard]] static auto decodeHexNibble(char c) -> int { if ('0' <= c && c <= '9') diff --git a/src/token.cpp b/src/token.cpp index 6b8e9106..9ce29d0c 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -25,17 +25,6 @@ std::string downcase(std::string s) return s; } -auto decodeHexNibble(char c) -> int -{ - if ('0' <= c && c <= '9') - return c - '0'; - if ('a' <= c && c <= 'f') - return c - 'a' + 10; - if ('A' <= c && c <= 'F') - return c - 'A' + 10; - return -1; -} - } namespace simfil @@ -306,8 +295,8 @@ std::optional scanStringLiteral(Scanner& s) text.push_back('\\'); } else { if (bytes && (s.at(0) == 'x' || s.at(0) == 'X')) { - const auto upper = decodeHexNibble(s.at(1)); - const auto lower = decodeHexNibble(s.at(2)); + const auto upper = ByteArray::decodeHexNibble(s.at(1)); + const auto lower = ByteArray::decodeHexNibble(s.at(2)); if (upper < 0 || lower < 0) { s.fail("Invalid hex escape sequence"); return {}; From 85304dca82feccf4dc12aa8daedc39697a461d1f Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 12 Feb 2026 17:51:35 +0100 Subject: [PATCH 12/13] Fix tests --- src/token.cpp | 2 ++ test/complex.cpp | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/token.cpp b/src/token.cpp index 9ce29d0c..feb70e70 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -500,6 +500,8 @@ auto tokenize(std::string_view expr) -> expected, Error> else if (auto t = scanSyntax(s)) tokens.push_back(std::move(*t)); else { + if (s.hasError()) + return unexpected(std::move(s.error())); if (s.at(0) != '\0') return unexpected(s.fail("Invalid input")); } diff --git a/test/complex.cpp b/test/complex.cpp index 3d1d23ef..687f29de 100644 --- a/test/complex.cpp +++ b/test/complex.cpp @@ -114,7 +114,9 @@ TEST_CASE("Tagged bytes JSON", "[bytes.serialization]") { auto roundTrip = json::parse(model->toJson().dump()); REQUIRE(roundTrip); - REQUIRE(roundTrip.value()->toJson() == expected); + auto roundTripRoot = roundTrip.value()->root(0); + REQUIRE(roundTripRoot); + REQUIRE(roundTripRoot.value()->toJson() == expected[0]); auto invalidHex = json::parse(R"([{"raw":{"_bytes":true,"hex":"abc"}}])"); REQUIRE_FALSE(invalidHex); From eb2579931e5ee2736501a7d2994bf23159e2208d Mon Sep 17 00:00:00 2001 From: Wagram Airiian Date: Thu, 12 Feb 2026 19:00:12 +0100 Subject: [PATCH 13/13] Fix tests --- src/token.cpp | 2 +- test/complex.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/token.cpp b/src/token.cpp index feb70e70..d6ad1f12 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -500,7 +500,7 @@ auto tokenize(std::string_view expr) -> expected, Error> else if (auto t = scanSyntax(s)) tokens.push_back(std::move(*t)); else { - if (s.hasError()) + if (s.hasError() && s.error().message.rfind("Invalid hex escape sequence", 0) == 0) return unexpected(std::move(s.error())); if (s.at(0) != '\0') return unexpected(s.fail("Invalid input")); diff --git a/test/complex.cpp b/test/complex.cpp index 687f29de..5901df96 100644 --- a/test/complex.cpp +++ b/test/complex.cpp @@ -116,7 +116,7 @@ TEST_CASE("Tagged bytes JSON", "[bytes.serialization]") { REQUIRE(roundTrip); auto roundTripRoot = roundTrip.value()->root(0); REQUIRE(roundTripRoot); - REQUIRE(roundTripRoot.value()->toJson() == expected[0]); + REQUIRE(roundTripRoot.value()->toJson() == expected); auto invalidHex = json::parse(R"([{"raw":{"_bytes":true,"hex":"abc"}}])"); REQUIRE_FALSE(invalidHex);