From 61f078bf406173dd489655f380491752c135b9c2 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 19 Feb 2025 15:11:30 +0100 Subject: [PATCH 1/4] feat: Support configuring the scaler reconcile interval --- .../trino-lb/configs/trino-lb-config.yaml | 13 ++++++----- deploy/helm/trino-lb/templates/trinos.yaml | 4 ++-- trino-lb-core/src/config.rs | 18 ++++++++++++++- trino-lb/src/main.rs | 5 +++- trino-lb/src/scaling/mod.rs | 23 +++++++++++++++---- 5 files changed, 48 insertions(+), 15 deletions(-) diff --git a/deploy/helm/trino-lb/configs/trino-lb-config.yaml b/deploy/helm/trino-lb/configs/trino-lb-config.yaml index ac0f056..74b98bd 100644 --- a/deploy/helm/trino-lb/configs/trino-lb-config.yaml +++ b/deploy/helm/trino-lb/configs/trino-lb-config.yaml @@ -55,12 +55,12 @@ trinoClusterGroups: - name: trino-m-2 endpoint: https://trino-m-2-coordinator-default.default.svc.cluster.local:8443 credentials: *common-credentials - oidc: - maxRunningQueries: 3 - trinoClusters: - - name: trino-oidc-1 - endpoint: https://5.250.182.203:8443 - credentials: *common-credentials + # oidc: + # maxRunningQueries: 3 + # trinoClusters: + # - name: trino-oidc-1 + # endpoint: https://5.250.182.203:8443 + # credentials: *common-credentials trinoClusterGroupsIgnoreCert: true routers: @@ -100,6 +100,7 @@ routers: routingFallback: m clusterAutoscaler: + reconcileInterval: 1s stackable: clusters: trino-s-1: diff --git a/deploy/helm/trino-lb/templates/trinos.yaml b/deploy/helm/trino-lb/templates/trinos.yaml index d835453..f4391e0 100644 --- a/deploy/helm/trino-lb/templates/trinos.yaml +++ b/deploy/helm/trino-lb/templates/trinos.yaml @@ -8,7 +8,7 @@ metadata: namespace: default spec: image: - productVersion: "451" + productVersion: "455" clusterConfig: catalogLabelSelector: matchLabels: @@ -40,7 +40,7 @@ metadata: namespace: default spec: image: - productVersion: "451" + productVersion: "455" clusterConfig: catalogLabelSelector: matchLabels: diff --git a/trino-lb-core/src/config.rs b/trino-lb-core/src/config.rs index 738028e..b61cf11 100644 --- a/trino-lb-core/src/config.rs +++ b/trino-lb-core/src/config.rs @@ -298,7 +298,23 @@ pub enum TagMatchingStrategy { #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields, rename_all = "camelCase")] -pub enum ScalerConfig { +pub struct ScalerConfig { + #[serde( + with = "humantime_serde", + default = "default_scaler_reconcile_interval" + )] + pub reconcile_interval: Duration, + #[serde(flatten)] + pub implementation: ScalerConfigImplementation, +} + +fn default_scaler_reconcile_interval() -> Duration { + Duration::from_secs(5) +} + +#[derive(Clone, Debug, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub enum ScalerConfigImplementation { Stackable(StackableScalerConfig), } diff --git a/trino-lb/src/main.rs b/trino-lb/src/main.rs index 4a45ed4..6ff6216 100644 --- a/trino-lb/src/main.rs +++ b/trino-lb/src/main.rs @@ -62,6 +62,9 @@ pub enum Error { #[snafu(display("Failed to create scaler"))] CreateScaler { source: scaling::Error }, + #[snafu(display("Failed to start scaler"))] + StartScaler { source: scaling::Error }, + #[snafu(display("Failed to start HTTP server"))] StartHttpServer { source: http_server::Error }, } @@ -157,7 +160,7 @@ async fn start() -> Result<(), MainError> { let scaler = Scaler::new(&config, Arc::clone(&persistence)) .await .context(CreateScalerSnafu)?; - scaler.start_loop(); + scaler.start_loop().context(StartScalerSnafu)?; let query_count_fetcher = QueryCountFetcher::new( Arc::clone(&persistence), diff --git a/trino-lb/src/scaling/mod.rs b/trino-lb/src/scaling/mod.rs index 89b587e..16dd914 100644 --- a/trino-lb/src/scaling/mod.rs +++ b/trino-lb/src/scaling/mod.rs @@ -16,7 +16,7 @@ use tokio::{ }; use tracing::{debug, error, info, instrument, Instrument, Span}; use trino_lb_core::{ - config::{Config, ScalerConfig}, + config::{Config, ScalerConfig, ScalerConfigImplementation}, trino_cluster::ClusterState, TrinoClusterName, }; @@ -110,6 +110,9 @@ pub enum Error { #[snafu(display("The variable \"scaler\" is None. This should never happen, as we only run the reconciliation when a scaler is configured!"))] ScalerVariableIsNone {}, + + #[snafu(display("The scaler config is missing. This is a bug in trino-lb, as it should exist at this particular code path"))] + ScalerConfigMissing {}, } /// The scaler periodically @@ -118,6 +121,8 @@ pub enum Error { /// 2. In case scaling is enabled for a cluster group it supervises the load and scaled the number of clusters /// accordingly pub struct Scaler { + /// The original config passed by the user + config: Option, /// In case this is [`None`], no scaling at all is configured. scaler: Option, persistence: Arc, @@ -153,8 +158,8 @@ impl Scaler { // Cluster groups that don't need scaling are missing from the `scaling_config`. } - Some(match scaler { - ScalerConfig::Stackable(scaler_config) => { + Some(match &scaler.implementation { + ScalerConfigImplementation::Stackable(scaler_config) => { StackableScaler::new(scaler_config, &config.trino_cluster_groups) .await .context(CreateStackableAutoscalerSnafu)? @@ -192,13 +197,19 @@ impl Scaler { persistence, groups, scaling_config, + config: config.cluster_autoscaler.clone(), }) } - pub fn start_loop(self) { + pub fn start_loop(self) -> Result<(), Error> { if self.scaler.is_some() { // As there is a scaler configured, let's start it normally. - let mut interval = time::interval(Duration::from_secs(10)); + let interval = self + .config + .as_ref() + .context(ScalerConfigMissingSnafu)? + .reconcile_interval; + let mut interval = time::interval(interval); interval.set_missed_tick_behavior(time::MissedTickBehavior::Delay); let me = Arc::new(self); @@ -230,6 +241,8 @@ impl Scaler { } }); } + + Ok(()) } #[instrument(name = "Scaler::reconcile", skip(self))] From 3f770f495ce5587e033f43ba447ae34e568a1bd4 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 19 Feb 2025 15:13:20 +0100 Subject: [PATCH 2/4] changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be1d354..1c27ac6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- Support configuring the scaler reconcile interval ([#61]). + ### Changed - Set defaults to oci ([#57]). @@ -13,6 +17,7 @@ All notable changes to this project will be documented in this file. - Reduce max poll delay from 10s to 3s to have better client responsiveness [#57]: https://github.com/stackabletech/trino-lb/pull/57 +[#61]: https://github.com/stackabletech/trino-lb/pull/61 ## [0.3.2] - 2024-08-20 From 768b8d935aeea3c44a82e8e874fc6a762f6d8c54 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 20 Feb 2025 14:38:55 +0100 Subject: [PATCH 3/4] Remove uneeded braces --- trino-lb-core/src/config.rs | 2 +- trino-lb/src/http_server/mod.rs | 2 +- trino-lb/src/http_server/ui/query.rs | 2 +- trino-lb/src/main.rs | 2 +- trino-lb/src/scaling/config.rs | 2 +- trino-lb/src/scaling/mod.rs | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/trino-lb-core/src/config.rs b/trino-lb-core/src/config.rs index b61cf11..767a9a5 100644 --- a/trino-lb-core/src/config.rs +++ b/trino-lb-core/src/config.rs @@ -137,7 +137,7 @@ impl Default for TrinoLbPortsConfig { #[derive(Clone, Debug, Deserialize)] #[serde(deny_unknown_fields, rename_all = "camelCase")] pub enum PersistenceConfig { - InMemory {}, + InMemory, Redis(RedisConfig), Postgres(PostgresConfig), } diff --git a/trino-lb/src/http_server/mod.rs b/trino-lb/src/http_server/mod.rs index 3d6067a..e08df3f 100644 --- a/trino-lb/src/http_server/mod.rs +++ b/trino-lb/src/http_server/mod.rs @@ -43,7 +43,7 @@ pub enum Error { #[snafu(display( "In case https is used the `tls.certPemFile` and `tls.keyPemFile` options must be set" ))] - CertsMissing {}, + CertsMissing, } pub struct AppState { diff --git a/trino-lb/src/http_server/ui/query.rs b/trino-lb/src/http_server/ui/query.rs index 5602fca..03f532a 100644 --- a/trino-lb/src/http_server/ui/query.rs +++ b/trino-lb/src/http_server/ui/query.rs @@ -16,7 +16,7 @@ use crate::http_server::AppState; #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("Query ID missing. It needs to be specified as query parameter such as https://127.0.0.1:8443/ui/query.html?trino_lb_20231227_122313_2JzDa3bT"))] - QueryIdMissing {}, + QueryIdMissing, #[snafu(display("Query with ID {query_id:?} not found. Maybe the query is not queued any more but was handed over to a Trino cluster."))] QueryIdNotFound { diff --git a/trino-lb/src/main.rs b/trino-lb/src/main.rs index 6ff6216..0b3b8f4 100644 --- a/trino-lb/src/main.rs +++ b/trino-lb/src/main.rs @@ -34,7 +34,7 @@ mod trino_client; #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("Failed to install rustls crypto provider"))] - InstallRustlsCryptoProvider {}, + InstallRustlsCryptoProvider, #[snafu(display("Failed to set up tracing"))] SetUpTracing { source: tracing::Error }, diff --git a/trino-lb/src/scaling/config.rs b/trino-lb/src/scaling/config.rs index bf3b462..ad5cceb 100644 --- a/trino-lb/src/scaling/config.rs +++ b/trino-lb/src/scaling/config.rs @@ -16,7 +16,7 @@ pub enum Error { InvalidTimeRange { time_range: String }, #[snafu(display("Any weekdays other tha \"Mon - Son\" are not supported yet"))] - WeekdaysNotSupportedYet {}, + WeekdaysNotSupportedYet, #[snafu(display( "Please configure a drainIdleDurationBeforeShutdown of at least {min_duration:?}" diff --git a/trino-lb/src/scaling/mod.rs b/trino-lb/src/scaling/mod.rs index 16dd914..6024f95 100644 --- a/trino-lb/src/scaling/mod.rs +++ b/trino-lb/src/scaling/mod.rs @@ -109,10 +109,10 @@ pub enum Error { JoinGetCurrentClusterStateTask { source: JoinError }, #[snafu(display("The variable \"scaler\" is None. This should never happen, as we only run the reconciliation when a scaler is configured!"))] - ScalerVariableIsNone {}, + ScalerVariableIsNone, #[snafu(display("The scaler config is missing. This is a bug in trino-lb, as it should exist at this particular code path"))] - ScalerConfigMissing {}, + ScalerConfigMissing, } /// The scaler periodically From 63e76fdbd7720848fb1c87f60915ae2c28aa5e8f Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 20 Feb 2025 14:40:55 +0100 Subject: [PATCH 4/4] Move into let --- trino-lb/src/scaling/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/trino-lb/src/scaling/mod.rs b/trino-lb/src/scaling/mod.rs index 6024f95..2d338b4 100644 --- a/trino-lb/src/scaling/mod.rs +++ b/trino-lb/src/scaling/mod.rs @@ -158,14 +158,15 @@ impl Scaler { // Cluster groups that don't need scaling are missing from the `scaling_config`. } - Some(match &scaler.implementation { + let scaler = match &scaler.implementation { ScalerConfigImplementation::Stackable(scaler_config) => { StackableScaler::new(scaler_config, &config.trino_cluster_groups) .await .context(CreateStackableAutoscalerSnafu)? .into() } - }) + }; + Some(scaler) } };