From 0a43d92472ec20dd6d4dc892cc33fc7b7ed5b074 Mon Sep 17 00:00:00 2001 From: Dima Korolev Date: Fri, 17 Oct 2025 21:32:28 -0700 Subject: [PATCH] Rust schema generation. --- typesystem/evolution/type_evolution.h | 2 +- typesystem/primitive_types.dsl.h | 32 ++-- typesystem/reflection/reflection.h | 8 +- typesystem/reflection/types.h | 2 +- typesystem/schema/schema.h | 170 ++++++++++++++++++++- typesystem/schema/test.cc | 4 +- typesystem/serialization/json/primitives.h | 2 +- typesystem/typename.h | 2 +- 8 files changed, 194 insertions(+), 28 deletions(-) diff --git a/typesystem/evolution/type_evolution.h b/typesystem/evolution/type_evolution.h index a7e944ee..18ca87aa 100644 --- a/typesystem/evolution/type_evolution.h +++ b/typesystem/evolution/type_evolution.h @@ -60,7 +60,7 @@ template \ struct Evolve { \ template \ diff --git a/typesystem/primitive_types.dsl.h b/typesystem/primitive_types.dsl.h index ffc73ac5..f746098b 100644 --- a/typesystem/primitive_types.dsl.h +++ b/typesystem/primitive_types.dsl.h @@ -26,7 +26,7 @@ SOFTWARE. // This file is used for mass registering of primitives type handlers in reflection and serialization routines. // Typical usecase: -// #define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) ... +// #define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, crnt_type, rust_type, fs_type, md_type, ts_type) ... // #include "primitive_types.dsl.h" // #undef CURRENT_DECLARE_PRIMITIVE_TYPE @@ -34,28 +34,28 @@ SOFTWARE. // clang-format off -CURRENT_DECLARE_PRIMITIVE_TYPE(11, bool, Bool, "bool", "`true` or `false`", "boolean") +CURRENT_DECLARE_PRIMITIVE_TYPE(11, bool, Bool, bool, "bool", "`true` or `false`", "boolean") -CURRENT_DECLARE_PRIMITIVE_TYPE(21, uint8_t, UInt8, "byte", "Integer (8-bit unsigned)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(22, uint16_t, UInt16, "uint16", "Integer (16-bit unsigned)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(23, uint32_t, UInt32, "uint32", "Integer (32-bit unsigned)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(24, uint64_t, UInt64, "uint64", "Integer (64-bit unsigned)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(21, uint8_t, UInt8, u8, "byte", "Integer (8-bit unsigned)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(22, uint16_t, UInt16, u16, "uint16", "Integer (16-bit unsigned)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(23, uint32_t, UInt32, u32, "uint32", "Integer (32-bit unsigned)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(24, uint64_t, UInt64, u64, "uint64", "Integer (64-bit unsigned)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(31, int8_t, Int8, "sbyte", "Integer (8-bit signed)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(32, int16_t, Int16, "int16", "Integer (16-bit signed)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(33, int32_t, Int32, "int32", "Integer (32-bit signed)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(34, int64_t, Int64, "int64", "Integer (64-bit signed)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(31, int8_t, Int8, i8, "sbyte", "Integer (8-bit signed)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(32, int16_t, Int16, i16, "int16", "Integer (16-bit signed)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(33, int32_t, Int32, i32,"int32", "Integer (32-bit signed)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(34, int64_t, Int64, i64, "int64", "Integer (64-bit signed)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(41, char, Char, "char", "Character", "number") // NOTE(dkorolev): Although F# chars are Unicode. -CURRENT_DECLARE_PRIMITIVE_TYPE(42, std::string, String, "string", "String", "string") +CURRENT_DECLARE_PRIMITIVE_TYPE(41, char, Char, char, "char", "Character", "number") // NOTE(dkorolev): Although F# chars are Unicode. +CURRENT_DECLARE_PRIMITIVE_TYPE(42, std::string, String, String, "string", "String", "string") -CURRENT_DECLARE_PRIMITIVE_TYPE(51, float, Float, "float", "Number (floating point, single precision)", "number") -CURRENT_DECLARE_PRIMITIVE_TYPE(52, double, Double, "double", "Number (floating point, double precision)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(51, float, Float, f32, "float", "Number (floating point, single precision)", "number") +CURRENT_DECLARE_PRIMITIVE_TYPE(52, double, Double, f64, "double", "Number (floating point, double precision)", "number") CURRENT_DECLARE_PRIMITIVE_TYPE( - 61, std::chrono::microseconds, Microseconds, "int64 // microseconds.", "Time (microseconds since epoch)", "number") + 61, std::chrono::microseconds, Microseconds, i64, "int64 // microseconds.", "Time (microseconds since epoch)", "number") CURRENT_DECLARE_PRIMITIVE_TYPE( - 62, std::chrono::milliseconds, Milliseconds, "int64 // milliseconds.", "Time (milliseconds since epoch)", "number") + 62, std::chrono::milliseconds, Milliseconds, i64, "int64 // milliseconds.", "Time (milliseconds since epoch)", "number") // clang-format on diff --git a/typesystem/reflection/reflection.h b/typesystem/reflection/reflection.h index 4ba4793d..b03dfbd3 100644 --- a/typesystem/reflection/reflection.h +++ b/typesystem/reflection/reflection.h @@ -132,7 +132,7 @@ struct RecursiveTypeTraverser { fields_list_t& fields_; }; -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, rs_type, fs_type, md_type, typescript_type) \ TypeID operator()(TypeSelector) { return TypeID::current_type; } #include "../primitive_types.dsl.h" #undef CURRENT_DECLARE_PRIMITIVE_TYPE @@ -371,9 +371,9 @@ struct ReflectorImpl { size_t KnownTypesCountForUnitTest() const { return map_.size(); } -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ - ReflectedType operator()(TypeSelector) { \ - return ReflectedType(ReflectedType_Primitive(TypeID::current_type)); \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, rs_type, fs_type, md_type, ts_type) \ + ReflectedType operator()(TypeSelector) { \ + return ReflectedType(ReflectedType_Primitive(TypeID::current_type)); \ } #include "../primitive_types.dsl.h" #undef CURRENT_DECLARE_PRIMITIVE_TYPE diff --git a/typesystem/reflection/types.h b/typesystem/reflection/types.h index c7bdcdc9..dd1b2ccb 100644 --- a/typesystem/reflection/types.h +++ b/typesystem/reflection/types.h @@ -90,7 +90,7 @@ constexpr uint64_t TYPEID_CYCLIC_DEPENDENCY_TYPE = TYPEID_TYPE_RANGE * TYPEID_CY // clang-format off CURRENT_ENUM(TypeID, uint64_t) { -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, rs_type, fs_type, md_type, ts_type) \ current_type = TYPEID_BASIC_TYPE + typeid_index, #include "../primitive_types.dsl.h" #undef CURRENT_DECLARE_PRIMITIVE_TYPE diff --git a/typesystem/schema/schema.h b/typesystem/schema/schema.h index e0402d39..981c5d3e 100644 --- a/typesystem/schema/schema.h +++ b/typesystem/schema/schema.h @@ -66,15 +66,17 @@ CURRENT_STRUCT(NamespaceToExpose) { // TODO(dkorolev): Refactor `PrimitiveTypesList` to avoid copy-pasting of `operator()(const *_Primitive& p)`. struct PrimitiveTypesListImpl final { std::map cpp_name; + std::map rust_name; std::map fsharp_name; std::map markdown_name; std::map typescript_name; PrimitiveTypesListImpl() { -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, rstype, fs_type, md_type, ts_type) \ cpp_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = #cpp_type; \ + rust_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = #rstype; \ fsharp_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = fs_type; \ markdown_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = md_type; \ - typescript_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = typescript_type; + typescript_name[static_cast(TYPEID_BASIC_TYPE + typeid_index)] = ts_type; #include "../primitive_types.dsl.h" #undef CURRENT_DECLARE_PRIMITIVE_TYPE } @@ -95,6 +97,7 @@ enum class Language : int { Current, // C++, `CURRENT_STRUCT`-s. CPP, // C++, native `struct`-s. FSharp, // F#. + Rust, // Rust. Markdown, // [GitHub] Markdown. JSON, // A compact JSON we use to describe schema to third parties. TypeScript, // TypeScript. @@ -853,6 +856,167 @@ struct LanguageSyntaxImpl final { // LCOV_EXCL_STOP }; +template <> +struct LanguageSyntaxImpl final { + static std::string Header(const std::string&) { + return "#![allow(unused_imports)]\n" + "use serde::{Deserialize, Serialize};\n" + "use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};\n"; + } + + static std::string Footer(const std::string&) { return ""; } + + static std::string SanitizeRustSymbol(const std::string& unsanitized_name) { + // TODO(dkorolev): Definitely not a complete list. + static std::set fsharp_reserved_symbols{"type","pub","in"}; + return fsharp_reserved_symbols.count(unsanitized_name) ? "r#" + unsanitized_name : unsanitized_name; + } + + struct FullSchemaPrinter final { + const std::map& types_; + std::ostream& os_; + mutable std::unordered_set> + empty_structs_; // To not print the type of a DU case for empty structs. + + std::string TypeName(TypeID type_id) const { + const auto cit = types_.find(type_id); + if (cit == types_.end()) { + return "UNKNOWN_TYPE_" + current::ToString(type_id); // LCOV_EXCL_LINE + } else { + struct RustTypeNamePrinter final { + const FullSchemaPrinter& self_; + std::ostringstream& oss_; + + RustTypeNamePrinter(const FullSchemaPrinter& self, std::ostringstream& oss) : self_(self), oss_(oss) {} + + // `operator()(...)`-s of this block print F# type name only, without the expansion. + // They assume the declaration order is respected, and any dependencies have already been listed. + void operator()(const ReflectedType_Primitive& p) const { + const auto& globals = PrimitiveTypesList(); + if (globals.rust_name.count(p.type_id)) { + oss_ << globals.rust_name.at(p.type_id); + } else { + oss_ << "UNKNOWN_BASIC_TYPE_" + current::ToString(p.type_id); // LCOV_EXCL_LINE + } + } + void operator()(const ReflectedType_Enum& e) const { oss_ << SanitizeRustSymbol(e.name); } + void operator()(const ReflectedType_Array& a) const { + oss_ << "Vec<" << SanitizeRustSymbol(self_.TypeName(a.element_type)) << '>'; + } + void operator()(const ReflectedType_Vector& v) const { + oss_ << "Vec<" << SanitizeRustSymbol(self_.TypeName(v.element_type)) << '>'; + } + void operator()(const ReflectedType_Map& m) const { + // TODO(dkorolev): Use an ordered dictionary in .NET one day. + oss_ << "BTreeMap<" << SanitizeRustSymbol(self_.TypeName(m.key_type)) << ", " + << self_.TypeName(m.value_type) << '>'; + } + void operator()(const ReflectedType_UnorderedMap& m) const { + oss_ << "HashMap<" << SanitizeRustSymbol(self_.TypeName(m.key_type)) << ", " + << self_.TypeName(m.value_type) << '>'; + } + void operator()(const ReflectedType_Set& s) const { + // TODO(dkorolev): Wrong! + oss_ << "BTreeSet<" << self_.TypeName(s.value_type) << '>'; + } + void operator()(const ReflectedType_UnorderedSet& s) const { + oss_ << "HashSet<" << self_.TypeName(s.value_type) << '>'; + } + void operator()(const ReflectedType_Pair& p) const { + oss_ << '(' << SanitizeRustSymbol(self_.TypeName(p.first_type)) << ", " + << SanitizeRustSymbol(self_.TypeName(p.second_type)) << ')'; + } + void operator()(const ReflectedType_Optional& o) const { + oss_ << "Option<" << SanitizeRustSymbol(self_.TypeName(o.optional_type)) << '>'; + } + void operator()(const ReflectedType_Variant& v) const { oss_ << SanitizeRustSymbol(v.name); } + void operator()(const ReflectedType_Struct& s) const { + oss_ << SanitizeRustSymbol(s.TemplateInnerTypeExpandedName()); + } + }; + + std::ostringstream oss; + cit->second.Call(RustTypeNamePrinter(*this, oss)); + return oss.str(); + } + } + + FullSchemaPrinter(const std::map& types, + std::ostream& os, + const std::string&, + const Optional&) + : types_(types), os_(os) {} + + // `operator()`-s of this block print complete declarations of F# types. + // The types that require complete declarations in F# are records and discriminated unions. + void operator()(const ReflectedType_Primitive&) const {} + void operator()(const ReflectedType_Enum& e) const { + os_ << "\nTODO(dkorolev): type " << SanitizeRustSymbol(e.name) << " = " << TypeName(e.underlying_type) << '\n'; + } + void operator()(const ReflectedType_Array&) const {} + void operator()(const ReflectedType_Vector&) const {} + void operator()(const ReflectedType_Pair&) const {} + void operator()(const ReflectedType_Map&) const {} + void operator()(const ReflectedType_UnorderedMap&) const {} + void operator()(const ReflectedType_Set&) const {} + void operator()(const ReflectedType_UnorderedSet&) const {} + void operator()(const ReflectedType_Optional&) const {} + void operator()(const ReflectedType_Variant& v) const { + os_ << "\n" + << "#[derive(Debug, Serialize, Deserialize)]\n" + << "pub enum " << v.name << " {\n"; + for (TypeID c : v.cases) { + const auto name = TypeName(c); + const auto& t = types_.at(c); + CURRENT_ASSERT(Exists(t) || Exists(t)); // Must be one of. + if (!empty_structs_.count(Value(t).type_id)) { + os_ << " " << name << '(' << name << "),\n"; + } + } + os_ << "}\n"; + } + + // When dumping a `CURRENT_STRUCT` as an F# record, since inheritance is not supported by Newtonsoft.JSON, + // all base class fields are hoisted to the top of the record. + void RecursivelyListStructFieldsForRust(std::ostringstream& os, const ReflectedType_Struct& s) const { + if (Exists(s.super_id)) { + RecursivelyListStructFieldsForRust(os, Value(types_.at(Value(s.super_id)))); + } + for (const auto& f : s.fields) { + if (Exists(f.description)) { + AppendAsMultilineCommentIndentedTwoSpaces(os, Value(f.description)); + } + const auto& t = types_.at(f.type_id); + if (Exists(t) || Exists(t)) { + os << " pub " << SanitizeRustSymbol(f.name) << ": Box<" << TypeName(f.type_id) << ">,\n"; + } else { + os << " pub " << SanitizeRustSymbol(f.name) << ": " << TypeName(f.type_id) << ",\n"; + } + } + } + void operator()(const ReflectedType_Struct& s) const { + std::ostringstream os; + RecursivelyListStructFieldsForRust(os, s); + const std::string fields = os.str(); + if (!fields.empty()) { + os_ << "\n" + << "#[derive(Debug, Serialize, Deserialize)]\n" + << "pub struct " << s.TemplateInnerTypeExpandedName() << " {\n" + << fields + << "}\n"; + } else { + empty_structs_.insert(s.type_id); + } + } + }; // struct LanguageSyntax::FullSchemaPrinter + + // LCOV_EXCL_START + static std::string ErrorMessageWithTypeId(TypeID type_id, FullSchemaPrinter&) { + return "#error \"Unknown struct with `type_id` = " + current::ToString(type_id) + "\"\n"; + } + // LCOV_EXCL_STOP +}; + template <> struct LanguageSyntaxImpl final { static std::string Header(const std::string&) { return "# Data Dictionary\n"; } @@ -1641,6 +1805,8 @@ struct ToStringImpl final { return "cpp"; case reflection::Language::FSharp: return "fs"; + case reflection::Language::Rust: + return "rs"; case reflection::Language::Markdown: return "md"; case reflection::Language::JSON: diff --git a/typesystem/schema/test.cc b/typesystem/schema/test.cc index 06850192..07e19f77 100644 --- a/typesystem/schema/test.cc +++ b/typesystem/schema/test.cc @@ -425,7 +425,7 @@ TEST(Schema, LanguageEnumIteration) { for (auto l = Language::begin; l != Language::end; ++l) { s.push_back(current::ToString(l)); } - EXPECT_EQ("internal_json h cpp fs md json ts", current::strings::Join(s, ' ')); + EXPECT_EQ("internal_json h cpp fs rs md json ts", current::strings::Join(s, ' ')); } namespace schema_test { @@ -442,7 +442,7 @@ TEST(Schema, LanguageEnumCompileTimeForEach) { auto it = schema_test::LanguagesIterator(); EXPECT_EQ("", current::strings::Join(it.s, ' ')); current::reflection::ForEachLanguage(it); - EXPECT_EQ("internal_json h cpp fs md json ts", current::strings::Join(it.s, ' ')); + EXPECT_EQ("internal_json h cpp fs rs md json ts", current::strings::Join(it.s, ' ')); } #define SMOKE_TEST_TEMPLATES_NAMESPACE smoke_test_templates_namespace_native diff --git a/typesystem/serialization/json/primitives.h b/typesystem/serialization/json/primitives.h index 8e760c53..934e85a8 100644 --- a/typesystem/serialization/json/primitives.h +++ b/typesystem/serialization/json/primitives.h @@ -58,7 +58,7 @@ struct JSONValueAssignerImpl { }; } // namespace json -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, currnt_type, rs_type, fs_type, md_type, ts_type) \ template \ struct SerializeImpl, cpp_type> { \ static void DoSerialize(json::JSONStringifier& json_stringifier, copy_free value) { \ diff --git a/typesystem/typename.h b/typesystem/typename.h index 0b6178b6..8b0d2738 100644 --- a/typesystem/typename.h +++ b/typesystem/typename.h @@ -155,7 +155,7 @@ struct CurrentTypeNameImpl { static std::string GetCurrentTypeName() { return reflection::EnumName(); } }; -#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, fs_type, md_type, typescript_type) \ +#define CURRENT_DECLARE_PRIMITIVE_TYPE(typeid_index, cpp_type, current_type, rstype, fs_type, md_type, ts_type) \ template \ struct CurrentTypeNameImpl { \ static const char* GetCurrentTypeName() { return #cpp_type; } \