From e7145bc3edc3ff9e257117046c0614df2bdcba14 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev Date: Thu, 29 May 2025 18:01:43 +0300 Subject: [PATCH 1/2] spec for TLV Extensions --- gossip/gossip-protocol-spec.md | 59 ++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/gossip/gossip-protocol-spec.md b/gossip/gossip-protocol-spec.md index 4d3a87d..b9e2494 100644 --- a/gossip/gossip-protocol-spec.md +++ b/gossip/gossip-protocol-spec.md @@ -28,7 +28,7 @@ Each message contains data specific to its type, such as shared values, filters, ## Message format -Each message is sent in a binary form with a maximum size of 1232 bytes (1280 is a minimum `IPv6 TPU`, 40 bytes is the size of `IPv6` header and 8 bytes is the size of the fragment header). +Each message is sent in a binary form with a maximum size of 1232 bytes (1280 is a minimum `IPv6 TPU`, 40 bytes is the size of `IPv6` header and 8 bytes is the size of the fragment header). Data sent in each message is serialized from a `Protocol` type, which can be one of: @@ -83,14 +83,14 @@ Fields described in the tables below have their types specified using Rust notat * `u32` - 32-bit unsigned integer, and so on... * `[u8]` - dynamic size array of 1-byte elements * `[u8; 32]` - fixed size array of 32 elements, with each element being 1 byte -* `[[u8; 64]]` - a two-dimensional array containing arrays of 64 1-byte elements +* `[[u8; 64]]` - a two-dimensional array containing arrays of 64 1-byte elements * `b[u8]` - a bit vector containing 1-byte elements * `u32 | None` - an option type, meaning an element is either `u32` (in this case) or `None` * `(u32, [u8, 16])` - a tuple that contains two elements - one is a 32-bit integer, the second one is a 16-element array of bytes -* `MyStruct` - a complex type (either defined as a struct or a Rust enum), consisting of +* `MyStruct` - a complex type (either defined as a struct or a Rust enum), consisting of many elements of different basic types -The **Size** column in the tables below contains the size of data in bytes. The size of dynamic arrays contains an additional _plus_ (`+`) sign, e.g. `32+`, which means the array has at least 32 bytes. Empty dynamic arrays always have 8 bytes which is the size of the array header containing array length. +The **Size** column in the tables below contains the size of data in bytes. The size of dynamic arrays contains an additional _plus_ (`+`) sign, e.g. `32+`, which means the array has at least 32 bytes. Empty dynamic arrays always have 8 bytes which is the size of the array header containing array length. In case the size of a particular complex data is unknown it is marked with `?`. The limit, however, is always 1232 bytes for the whole data packet (payload within the UDP packet). #### Data serialization @@ -124,7 +124,7 @@ struct SomeType { y: u16, } ``` -In the first case, the serialized object of the `CompressionType` enum will only contain a 4-byte header with the discriminant value set to the selected variant (`0 = GZip`, `1 = Bzip2`). In the latter case, apart from the header, the serialized data will contain additional bytes according to which variant was selected: +In the first case, the serialized object of the `CompressionType` enum will only contain a 4-byte header with the discriminant value set to the selected variant (`0 = GZip`, `1 = Bzip2`). In the latter case, apart from the header, the serialized data will contain additional bytes according to which variant was selected: * `Variant1`: 8 bytes * `Variant2`: 6 bytes (the sum of `x` and `y` fields of `SomeType` struct) @@ -414,7 +414,7 @@ enum CrdsData { #### LegacyContactInfo (Deprecated) -Basic info about the node. Nodes send this message to introduce themselves to the cluster and provide all addresses and ports that their peers can use to communicate with them. +Basic info about the node. Nodes send this message to introduce themselves to the cluster and provide all addresses and ports that their peers can use to communicate with them. | Data | Type | Size | Description | |------|:----:|:----:|-------------| @@ -502,7 +502,7 @@ A validator's vote on a fork. Contains a one-byte index from the vote tower (ran | Data | Type | Size | Description | |------|:----:|:----:|-------------| -| `index` | `u8` | 1 | vote tower index | +| `index` | `u8` | 1 | vote tower index | | `from` | `[u8; 32]` | 32 | public key of the origin | | `transaction` | [`Transaction`](#transaction) | 59+ | a vote transaction, an atomically-committed sequence of instructions | | `wallclock` | `u64` | 8 | wallclock of the node that generated the message | @@ -597,7 +597,7 @@ The first available slot in the Solana [blockstore][blockstore] that contains an | Data | Type | Size | Description | |------|:----:|:----:|-------------| -| `index` | `u8` | 1 | _**The only valid value is `0u8`** since this is now a deprecated field_ | +| `index` | `u8` | 1 | _**The only valid value is `0u8`** since this is now a deprecated field_ | | `from` | `[u8; 32]`| 32 | public key of the origin | | `root` | `u64` | 8 | _deprecated_ | | `lowest` | `u64` | 8 | the lowest slot | @@ -609,18 +609,18 @@ The first available slot in the Solana [blockstore][blockstore] that contains an | Data | Type | Size | Description | |------|:----:|:----:|-------------| -| `first` | `u64` | 8 | first slot number | -| `compression` | [`CompressionType`](#compressiontype) | 4 | compression type | -| `compressed_list` | `[u8]` | 8+ | compressed slots list | +| `first` | `u64` | 8 | first slot number | +| `compression` | [`CompressionType`](#compressiontype) | 4 | compression type | +| `compressed_list` | `[u8]` | 8+ | compressed slots list | ##### CompressionType Compression type enum. | Enum ID | Data | Description | |:-------:|------|-------------| -| 0 | `Uncompressed` | uncompressed | -| 1 | `GZip` | gzip | -| 2 | `BZip2`| bzip2 | +| 0 | `Uncompressed` | uncompressed | +| 1 | `GZip` | gzip | +| 2 | `BZip2`| bzip2 |
Solana client Rust implementation @@ -688,7 +688,7 @@ Contains a one-byte index and list of all slots from an epoch (epoch consists of | Data | Type | Size | Description | |------|:----:|:----:|-------------| -| `index` | `u8` | 1 | index | +| `index` | `u8` | 1 | index | | `from` | `[u8, 32]` | 32 | public key of the origin | | `slots` | [`[CompressedSlots]`](#compressedslots) | 8+ | list of slots | | `wallclock` | `u64` | 8 | wallclock of the node that generated the message | @@ -944,7 +944,7 @@ Basic info about the node. Nodes send this message to introduce themselves to th | `version` | [`Version`](#version-1) | 13+ | Solana client version | | `addrs` | [`[IpAddr]`](#ipaddr) | 8+ | list of unique IP addresses | | `sockets` | [`[SocketEntry]`](#socketentry) | 8+ | list of unique sockets | -| `extensions` | [`[Extension]`](#extension) | 8+ | future additions to `ContactInfo` will be added to `Extensions` instead of modifying `ContactInfo`, currently unused | +| `extensions` | [`[TlvRecord]`](#extension) | 8+ | allows adding fields in `ContactInfo` that are not supported by all validators in the cluster. Planned use for multihoming and multicast support. | ##### Version | Data | Type | Size | Description | @@ -995,8 +995,23 @@ The list of `key` identifiers is shown in the table below: | `tvu_quic` | 11 | `tvu` over QUIC | | `tpu_vote_quic` | 12 | `tpu_vote` over QUIC | -##### Extension -_Currently empty (unused)_ +##### TlvRecord + +| Data | Type | Size | Description | +|------|:----:|:----:|-------------| +| type |`u8` | 1 | Type of the TLV | +| length |`u8` | 1 | Length of the data in bytes | +| value | `[u8]` | `length` | Actual contents of the TLV record| + + +The following TLV types are currently reserved: + +| Type | Name | Description | +|:----:|:----:|-------------| +| 1 | Multihoming | Multihoming support configuration | +| 2 | Multicast | Multicast support configuration | + +Currently, the clients do not have to support parsing any of the TLV records.
Solana client Rust implementation @@ -1010,10 +1025,14 @@ struct ContactInfo { version: Version, addrs: Vec, sockets: Vec, - extensions: Vec, + extensions: Vec, } -enum Extension {} +struct TlvRecord { + typ: u8, // type + #[serde(with = "short_vec")] + bytes: Vec, // length and value +} enum IpAddr { V4(Ipv4Addr), From bbd69eb39bf191b025f448876b5113cd8f8071e5 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev Date: Thu, 11 Sep 2025 15:58:13 +0000 Subject: [PATCH 2/2] add notes about ShortVec where appropriate --- gossip/gossip-protocol-spec.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/gossip/gossip-protocol-spec.md b/gossip/gossip-protocol-spec.md index b9e2494..c1dc469 100644 --- a/gossip/gossip-protocol-spec.md +++ b/gossip/gossip-protocol-spec.md @@ -97,7 +97,7 @@ In case the size of a particular complex data is unknown it is marked with `?`. In the Rust implementation of the Solana node, the data is serialized into a binary form using the [`bincode` crate][bincode] as follows: * basic types, e.g. `u8`, `u16`, `u64`, etc. - are serialized as they are present in the memory, e.g. `u8` type is serialized as 1 byte, `u16` as 2 bytes, and so on, * array elements are serialized as above, e.g. `[u8; 32]` array is serialized as 32 bytes, `[u16; 32]` will be serialized as 32 16-bit elements which are equal to 64 bytes, -* dynamically sized arrays always include an 8-byte header that specifies the array length, followed by the data bytes. Therefore, an empty array occupies 8 bytes, +* dynamically sized arrays always include a header that specifies the array length, followed by the data bytes. The header is 1 byte for arrays of type ShortVec (these can not hold >127 elements), arrays of type Vec have 8 byte header. * bit vectors are serialized similar to dynamic arrays - their header contains 1-byte which tells whether there is any data in the vector, followed by an 8-byte array length and the data, * [enum types](#enum-types) contain a header with a 4-byte discriminant (tells which enum variant is selected) + additional data, * option types are serialized using a 1-byte discriminant followed by the bytes of data. If a value is `None` discriminant is set to 0 and the data part is empty, otherwise it is set to 1 with data serialized according to its type, @@ -266,7 +266,7 @@ enum Protocol { struct PruneData { pubkey: Pubkey, - prunes: Vec, signature: Signature, destination: Pubkey, wallclock: u64, @@ -944,7 +944,7 @@ Basic info about the node. Nodes send this message to introduce themselves to th | `version` | [`Version`](#version-1) | 13+ | Solana client version | | `addrs` | [`[IpAddr]`](#ipaddr) | 8+ | list of unique IP addresses | | `sockets` | [`[SocketEntry]`](#socketentry) | 8+ | list of unique sockets | -| `extensions` | [`[TlvRecord]`](#extension) | 8+ | allows adding fields in `ContactInfo` that are not supported by all validators in the cluster. Planned use for multihoming and multicast support. | +| `extensions` | [`[TlvRecord]`](#extension) | 1+ | allows adding fields in `ContactInfo` that are not supported by all validators in the cluster. Planned use for multihoming and multicast support. | ##### Version | Data | Type | Size | Description | @@ -1000,9 +1000,11 @@ The list of `key` identifiers is shown in the table below: | Data | Type | Size | Description | |------|:----:|:----:|-------------| | type |`u8` | 1 | Type of the TLV | -| length |`u8` | 1 | Length of the data in bytes | +| length | ShortU16 | 1 | Length of the data in bytes | | value | `[u8]` | `length` | Actual contents of the TLV record| +Maximal size of the TLV record in gossip is restricted to 127 bytes. +Because of this, ShortU16 in case of gossip is guaranteed to be within 'u8' binary compatibility. The following TLV types are currently reserved: @@ -1023,15 +1025,14 @@ struct ContactInfo { outset: u64, shred_version: u16, version: Version, - addrs: Vec, - sockets: Vec, - extensions: Vec, + addrs: ShortVec, + sockets: ShortVec, + extensions: ShortVec, } struct TlvRecord { typ: u8, // type - #[serde(with = "short_vec")] - bytes: Vec, // length and value + bytes: ShortVec, // length and value } enum IpAddr {