diff --git a/.claude/skills/lading-optimize-hunt/README.md b/.claude/skills/lading-optimize-hunt/README.md index 154a1e983..2332a7445 100644 --- a/.claude/skills/lading-optimize-hunt/README.md +++ b/.claude/skills/lading-optimize-hunt/README.md @@ -17,7 +17,7 @@ This document explains the reasoning behind each optimization pattern in the lad ## Preallocation Patterns -### `Vec::with_capacity(n)` / `String::with_capacity(n)` / `HashMap::with_capacity(n)` +### `Vec::with_capacity(n)` / `String::with_capacity(n)` / `FxHashMap::with_capacity(n)` **Pattern**: `Vec::new()` + repeated push operations @@ -53,46 +53,6 @@ for i in 0..1000 { --- -## Hash Function Optimization - -### `FxHashMap` from rustc-hash - -**Pattern**: Using default `HashMap` with integer, pointer, or short string keys - -**Why it matters**: The default HashMap uses SipHash, a cryptographically secure hash function designed to prevent DoS attacks. For internal data structures where DoS isn't a concern, faster non-cryptographic hashes significantly improve performance. - -**How it works**: -- SipHash: Secure but slow (~13 CPU cycles per byte) -- FxHash: Fast but not DoS-resistant (~0.5 CPU cycles per byte) -- FxHash works on 8 bytes at a time, making it much faster for common key types - -**Performance impact**: -- rustc benchmarks showed 4-84% slowdowns when switching FROM FxHash back to default HashMap -- Consistently outperforms FNV hash in rustc itself -- Best for: integer keys, pointer keys, small fixed-size keys - -**Example**: -```rust -// Slow for internal data structures -use std::collections::HashMap; -let mut map: HashMap = HashMap::new(); - -// Fast for internal data structures -use rustc_hash::FxHashMap; -let mut map: FxHashMap = FxHashMap::default(); -``` - -**Bug risk**: Hash collision DoS attacks (only relevant if keys come from untrusted sources) - -**When NOT to use**: User-facing APIs, networked services where keys come from untrusted sources - -**References**: -- [Hashing - The Rust Performance Book](https://nnethercote.github.io/perf-book/hashing.html) -- [rustc-hash GitHub](https://github.com/rust-lang/rustc-hash) -- [Using rustc-hash in Rust: A Guide](https://dlcoder.medium.com/using-rustc-hash-in-rust-a-guide-to-faster-and-safer-hashing-1f3dacfbf6de) - ---- - ## String Building Optimization ### `write!()` to reused buffer instead of `format!()` @@ -260,7 +220,7 @@ for _ in 0..1_000_000 { ### Bounded buffers for unbounded growth -**Pattern**: Collections that grow indefinitely without limits (unbounded `Vec`, `String`, `HashMap`) +**Pattern**: Collections that grow indefinitely without limits (unbounded `Vec`, `String`, `FxHashMap`) **Why it matters**: Unbounded growth can lead to memory exhaustion, unpredictable performance, and OOM crashes. In long-running services or load generators, this is a critical reliability concern. diff --git a/.claude/skills/lading-optimize-hunt/SKILL.md b/.claude/skills/lading-optimize-hunt/SKILL.md index 4892b71ff..883fa4e77 100644 --- a/.claude/skills/lading-optimize-hunt/SKILL.md +++ b/.claude/skills/lading-optimize-hunt/SKILL.md @@ -71,8 +71,7 @@ Otherwise, check pending hunt issues or pick from hot subsystems: |---------|-----------|----------| | `Vec::new()` + repeated push | `Vec::with_capacity(n)` | None | | `String::new()` + repeated push | `String::with_capacity(n)` | None | -| `HashMap::new()` hot insert | `HashMap::with_capacity(n)` | None | -| Default `HashMap` with int keys | `FxHashMap` (rustc-hash) | Hash collision DoS | +| `FxHashMap::default()` hot insert | `FxHashMap::with_capacity(n)` | None | | `format!()` in hot loop | `write!()` to reused buffer | Format errors, buffer sizing | | `&Vec` or `&String` parameter | `&[T]` or `&str` slice | None | | Allocation in hot loop | Move outside loop | Lifetime issues | diff --git a/Cargo.lock b/Cargo.lock index 5afdef675..82cdfb4ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -956,6 +956,7 @@ dependencies = [ "hyper", "hyper-util", "once_cell", + "rustc-hash", "shared", "tokio", "tokio-stream", diff --git a/Cargo.toml b/Cargo.toml index 634792332..e2bfd82c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,7 @@ large_stack_arrays = "deny" float_cmp = "deny" manual_memcpy = "deny" unnecessary_to_owned = "deny" +disallowed_types = "deny" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)', 'cfg(loom)'] } diff --git a/clippy.toml b/clippy.toml index 0141e080d..b28fdde90 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,2 +1,7 @@ # https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown doc-valid-idents = ["OpenTelemetry", "gRPC", ".."] + +# https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types +disallowed-types = [ + { path = "std::collections::HashMap", reason = "Use rustc_hash::FxHashMap for better performance." }, +] diff --git a/integration/ducks/Cargo.toml b/integration/ducks/Cargo.toml index 2210bd822..6d7f89efe 100644 --- a/integration/ducks/Cargo.toml +++ b/integration/ducks/Cargo.toml @@ -15,6 +15,7 @@ http-body-util = { workspace = true } hyper = { workspace = true, features = ["server"] } hyper-util = { workspace = true } once_cell = { workspace = true } +rustc-hash = { workspace = true } shared = { path = "../shared" } ddsketch-agent = { workspace = true } tokio = { workspace = true, features = [ diff --git a/integration/ducks/src/main.rs b/integration/ducks/src/main.rs index 2d8bb7dcc..bec08bf63 100644 --- a/integration/ducks/src/main.rs +++ b/integration/ducks/src/main.rs @@ -31,7 +31,9 @@ use shared::{ integration_target_server::{IntegrationTarget, IntegrationTargetServer}, }, }; -use std::{collections::HashMap, net::SocketAddr, pin::Pin, sync::Arc, time::Duration}; +use std::{net::SocketAddr, pin::Pin, sync::Arc, time::Duration}; + +use rustc_hash::FxHashMap; use tokio::task::JoinSet; use tokio::{ io::AsyncReadExt, @@ -54,7 +56,7 @@ struct HttpCounters { entropy: DDSketch, request_count: u64, total_bytes: u64, - methods: HashMap, + methods: FxHashMap, } impl From<&HttpCounters> for HttpMetrics { diff --git a/lading/src/bin/captool/analyze/jsonl.rs b/lading/src/bin/captool/analyze/jsonl.rs index 668c7cdef..67bda2c7a 100644 --- a/lading/src/bin/captool/analyze/jsonl.rs +++ b/lading/src/bin/captool/analyze/jsonl.rs @@ -2,10 +2,12 @@ //! //! This module provides analysis operations on JSONL (row-based) capture files. -use std::collections::{BTreeSet, HashMap, hash_map::RandomState}; +use std::collections::BTreeSet; +use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hasher}; use lading_capture::line::{Line, MetricKind}; +use rustc_hash::FxHashMap; use super::{MetricInfo, SeriesStats}; @@ -39,9 +41,9 @@ pub(crate) fn list_metrics(lines: &[Line]) -> Vec { pub(crate) fn analyze_metric( lines: &[Line], metric_name: &str, -) -> HashMap, SeriesStats> { - let mut context_map: HashMap, Vec)> = HashMap::new(); - let mut fetch_indices: HashMap> = HashMap::new(); +) -> FxHashMap, SeriesStats> { + let mut context_map: FxHashMap, Vec)> = FxHashMap::default(); + let mut fetch_indices: FxHashMap> = FxHashMap::default(); let hash_builder = RandomState::new(); // Filter to matching metric and group by label set @@ -72,7 +74,7 @@ pub(crate) fn analyze_metric( } // Compute statistics - let mut results = HashMap::new(); + let mut results = FxHashMap::default(); for (key, (labels, values)) in context_map { if values.is_empty() { continue; diff --git a/lading/src/bin/captool/analyze/parquet.rs b/lading/src/bin/captool/analyze/parquet.rs index f047e1066..619d9ab7b 100644 --- a/lading/src/bin/captool/analyze/parquet.rs +++ b/lading/src/bin/captool/analyze/parquet.rs @@ -3,13 +3,14 @@ //! This module provides analysis operations on parquet capture files using //! Apache Arrow, staying in columnar format for efficiency. -use std::collections::{BTreeSet, HashMap}; +use std::collections::BTreeSet; use std::fs::File; use std::path::Path; use arrow_array::{Array, Float64Array, MapArray, StringArray, UInt64Array}; use lading_capture_schema::columns; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use rustc_hash::FxHashMap; use super::{MetricInfo, SeriesStats}; @@ -93,12 +94,12 @@ pub(crate) fn list_metrics>(path: P) -> Result, E pub(crate) fn analyze_metric>( path: P, metric_name: &str, -) -> Result, SeriesStats>, Error> { +) -> Result, SeriesStats>, Error> { let file = File::open(path)?; let reader_builder = ParquetRecordBatchReaderBuilder::try_new(file)?; let reader = reader_builder.build()?; - let mut series_map: HashMap, Vec<(u64, f64)>> = HashMap::new(); + let mut series_map: FxHashMap, Vec<(u64, f64)>> = FxHashMap::default(); for batch_result in reader { let batch = batch_result?; @@ -194,7 +195,7 @@ pub(crate) fn analyze_metric>( } // Compute statistics for each series - let mut results = HashMap::new(); + let mut results = FxHashMap::default(); for (labels, mut values) in series_map { if values.is_empty() { continue; diff --git a/lading/src/generator/container.rs b/lading/src/generator/container.rs index 2f0537fd6..2d18e7716 100644 --- a/lading/src/generator/container.rs +++ b/lading/src/generator/container.rs @@ -14,8 +14,10 @@ use lading_throttle::Throttle; use metrics::counter; use serde::{Deserialize, Serialize}; use std::num::NonZeroU32; -use std::{collections::HashMap, process}; +use std::process; use tokio::time::Instant; + +use rustc_hash::FxHashMap; use tokio_stream::StreamExt; use tracing::{debug, error, info, warn}; @@ -53,7 +55,7 @@ pub struct Config { /// Environment variables for the container, maps to docker run --env. pub env: Option>, /// Labels to set on the container, maps to docker run --label. - pub labels: Option>, + pub labels: Option>, /// Whether to disable the container network, maps to docker run --network /// none. #[serde(default = "default_network_disabled")] @@ -162,7 +164,7 @@ pub struct Container { throttle: Option, shutdown: lading_signal::Watcher, metric_labels: Vec<(String, String)>, - containers: HashMap, + containers: FxHashMap, } impl Container { @@ -209,7 +211,7 @@ impl Container { throttle, shutdown, metric_labels, - containers: HashMap::new(), + containers: FxHashMap::default(), }) } @@ -682,6 +684,8 @@ impl Config { // Docker API requires exposed ports as {"/": {}} // Bollard represents the empty object as HashMap<(), ()> #[allow(clippy::zero_sized_map_values)] + // A lot of the properties of `ContainerCreateBody` require `HashMap` and as such this lint must be disabled here. + #[allow(clippy::disallowed_types)] let exposed_ports = if self.exposed_ports.is_empty() { None } else { @@ -694,7 +698,10 @@ impl Config { } else { format!("{port}/tcp") }; - (port_with_protocol, HashMap::<(), ()>::new()) + ( + port_with_protocol, + std::collections::HashMap::<(), ()>::new(), + ) }) .collect(), ) @@ -705,7 +712,10 @@ impl Config { tty: Some(true), cmd: self.args.clone(), env: self.env.clone(), - labels: self.labels.clone(), + labels: self + .labels + .as_ref() + .map(|labels| labels.iter().map(|(k, v)| (k.clone(), v.clone())).collect()), network_disabled: Some(self.network_disabled), exposed_ports, ..Default::default() diff --git a/lading/src/generator/file_gen/logrotate_fs.rs b/lading/src/generator/file_gen/logrotate_fs.rs index f9d203050..c8c616d0a 100644 --- a/lading/src/generator/file_gen/logrotate_fs.rs +++ b/lading/src/generator/file_gen/logrotate_fs.rs @@ -16,7 +16,6 @@ use nix::libc::{self, ENOENT}; use rand::{SeedableRng, rngs::SmallRng}; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, ffi::OsStr, fs, num::NonZeroU32, @@ -27,6 +26,8 @@ use std::{ use tokio::task::{self, JoinError}; use tracing::{debug, error, info, warn}; +use rustc_hash::FxHashMap; + mod model; const TTL: Duration = Duration::from_secs(1); // Attribute cache timeout @@ -222,7 +223,7 @@ impl Server { // Initialize the FUSE filesystem let fs = LogrotateFS { state: Arc::new(Mutex::new(state)), - open_files: Arc::new(Mutex::new(HashMap::new())), + open_files: Arc::new(Mutex::new(FxHashMap::default())), start_time, start_time_system, }; @@ -263,7 +264,7 @@ impl Server { #[derive(Debug)] struct LogrotateFS { state: Arc>, - open_files: Arc>>, + open_files: Arc>>, start_time: Instant, start_time_system: SystemTime, diff --git a/lading_capture/src/validate/jsonl.rs b/lading_capture/src/validate/jsonl.rs index b2f79a204..7a2d0ee31 100644 --- a/lading_capture/src/validate/jsonl.rs +++ b/lading_capture/src/validate/jsonl.rs @@ -3,10 +3,12 @@ //! This module provides validation for JSONL (JSON Lines) format capture files. //! It operates on already-deserialized `Line` objects. +use std::collections::BTreeSet; use std::collections::hash_map::RandomState; -use std::collections::{BTreeSet, HashMap}; use std::hash::{BuildHasher, Hasher}; +use rustc_hash::FxHashMap; + use crate::line::{Line, MetricKind}; use crate::validate::ValidationResult; @@ -32,9 +34,9 @@ use crate::validate::ValidationResult; #[allow(clippy::too_many_lines)] pub fn validate_lines(lines: &[Line], min_seconds: Option) -> ValidationResult { // Phase 1: Streaming assertions - let mut fetch_index_to_time: HashMap = HashMap::new(); + let mut fetch_index_to_time: FxHashMap = FxHashMap::default(); // Collect (fetch_index, time) pairs for each series - let mut series_data: HashMap, String)> = HashMap::new(); + let mut series_data: FxHashMap, String)> = FxHashMap::default(); let hash_builder = RandomState::new(); let mut fetch_index_errors = 0u128; let mut first_error: Option<(u128, String, String)> = None; diff --git a/lading_capture/src/validate/parquet.rs b/lading_capture/src/validate/parquet.rs index a2eb49aae..9321282f4 100644 --- a/lading_capture/src/validate/parquet.rs +++ b/lading_capture/src/validate/parquet.rs @@ -4,8 +4,10 @@ //! Apache Arrow compute kernels. Unlike the row-based validation in `jsonl`, //! this leverages columnar operations for better performance and memory efficiency. +use std::collections::BTreeSet; use std::collections::hash_map::RandomState; -use std::collections::{BTreeSet, HashMap}; + +use rustc_hash::FxHashMap; use std::fs::File; use std::hash::{BuildHasher, Hasher}; use std::path::Path; @@ -78,8 +80,8 @@ pub fn validate_parquet>( // // We also collect (fetch_index, time) pairs for each series to validate // per-series invariants in Phase 2. - let mut fetch_index_to_time: HashMap = HashMap::new(); - let mut series_data: HashMap, String)> = HashMap::new(); + let mut fetch_index_to_time: FxHashMap = FxHashMap::default(); + let mut series_data: FxHashMap, String)> = FxHashMap::default(); let hash_builder = RandomState::new(); let mut line_count = 0u128;