From 080a7607a82f14a687ca80d724c27b7aac0d0531 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Thu, 13 Nov 2025 13:00:45 +0530 Subject: [PATCH 01/12] TPM v2 changes --- operator/control-plane/Cargo.lock | 833 +++++++++++++++++++++++---- operator/control-plane/Cargo.toml | 4 +- operator/control-plane/src/aws.rs | 389 ++++++++++--- operator/control-plane/src/market.rs | 15 - 4 files changed, 1013 insertions(+), 228 deletions(-) diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index 413f8f706..a9799cab5 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -124,6 +124,38 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "argh" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ff18325c8a36b82f992e533ece1ec9f9a9db446bd1c14d4f936bac88fcd240" +dependencies = [ + "argh_derive", + "argh_shared", + "rust-fuzzy-search", +] + +[[package]] +name = "argh_derive" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb7b2b83a50d329d5d8ccc620f5c7064028828538bdf5646acd60dc1f767803" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn 2.0.110", +] + +[[package]] +name = "argh_shared" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a464143cc82dedcdc3928737445362466b7674b5db4e2eb8e869846d6d84f4f6" +dependencies = [ + "serde", +] + [[package]] name = "ark-ff" version = "0.3.0" @@ -254,6 +286,17 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.110", +] + [[package]] name = "atoi" version = "2.0.0" @@ -277,7 +320,7 @@ checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -288,9 +331,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.12" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "649316840239f4e58df0b7f620c428f5fababbbca2d504488c641534050bd141" +checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141" dependencies = [ "aws-credential-types", "aws-runtime", @@ -307,7 +350,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 0.2.12", + "http 1.2.0", "ring", "time", "tokio", @@ -318,9 +361,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.1" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" +checksum = "86590e57ea40121d47d3f2e131bfd873dea15d78dc2f4604f4734537ad9e56c4" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -328,11 +371,34 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-lc-rs" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5932a7d9d28b0d2ea34c6b3779d35e3dd6f6345317c34e73438c4f1f29144151" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1826f2e4cfc2cd19ee53c42fbf68e2f81ec21108e0b7ecf6a71cf062137360fc" +dependencies = [ + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "aws-runtime" -version = "1.5.2" +version = "1.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f6f1124d6e19ab6daf7f2e615644305dc6cb2d706892a8a8c0b98db35de020" +checksum = "8fe0fd441565b0b318c76e7206c8d1d0b0166b3e986cf30e890b61feb6192045" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -346,18 +412,40 @@ dependencies = [ "fastrand", "http 0.2.12", "http-body 0.4.6", - "once_cell", "percent-encoding", "pin-project-lite", "tracing", "uuid", ] +[[package]] +name = "aws-sdk-ebs" +version = "1.84.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9f6742e316dd503b95ddcc98defb01f5c0c77728c357281108e55dcac4249f1" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] + [[package]] name = "aws-sdk-ec2" -version = "1.99.0" +version = "1.117.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6f5ac33e9893f1c1e090a1f8a3c1796d4882cf0777eaf1d488158ed5533487b" +checksum = "90decc9e656577c7f61085483936209be42a167064ef93a98801b527cf1851f0" dependencies = [ "aws-credential-types", "aws-runtime", @@ -379,9 +467,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.52.0" +version = "1.89.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb25f7129c74d36afe33405af4517524df8f74b635af8c2c8e91c1552b8397b2" +checksum = "a9c1b1af02288f729e95b72bd17988c009aa72e26dcb59b3200f86d7aea726c9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -393,17 +481,17 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "fastrand", "http 0.2.12", - "once_cell", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-ssooidc" -version = "1.53.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d03a3d5ef14851625eafd89660a751776f938bf32f309308b20dcca41c44b568" +checksum = "4e8122301558dc7c6c68e878af918880b82ff41897a60c8c4e18e4dc4d93e9f1" dependencies = [ "aws-credential-types", "aws-runtime", @@ -415,17 +503,17 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", + "fastrand", "http 0.2.12", - "once_cell", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-sts" -version = "1.53.0" +version = "1.73.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3a9f073ae3a53b54421503063dfb87ff1ea83b876f567d92e8b8d9942ba91b" +checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -438,17 +526,17 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", + "fastrand", "http 0.2.12", - "once_cell", "regex-lite", "tracing", ] [[package]] name = "aws-sigv4" -version = "1.2.6" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -460,7 +548,6 @@ dependencies = [ "hmac", "http 0.2.12", "http 1.2.0", - "once_cell", "percent-encoding", "sha2", "time", @@ -469,9 +556,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.3" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "427cb637d15d63d6f9aae26358e1c9a9c09d5aa490d64b09354c8217cfef0f28" +checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" dependencies = [ "futures-util", "pin-project-lite", @@ -480,33 +567,73 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.60.11" +version = "0.62.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" +checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", + "http 1.2.0", "http-body 0.4.6", - "once_cell", "percent-encoding", "pin-project-lite", "pin-utils", "tracing", ] +[[package]] +name = "aws-smithy-http-client" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.26", + "h2 0.4.12", + "http 0.2.12", + "http 1.2.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.5", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.35", + "rustls-native-certs 0.8.2", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + [[package]] name = "aws-smithy-json" -version = "0.61.1" +version = "0.61.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54" dependencies = [ "aws-smithy-types", ] +[[package]] +name = "aws-smithy-observability" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" +dependencies = [ + "aws-smithy-runtime-api", +] + [[package]] name = "aws-smithy-query" version = "0.60.7" @@ -519,36 +646,33 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.6" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a05dd41a70fc74051758ee75b5c4db2c0ca070ed9229c3df50e9475cda1cb985" +checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0" dependencies = [ "aws-smithy-async", "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "fastrand", - "h2 0.3.26", "http 0.2.12", + "http 1.2.0", "http-body 0.4.6", "http-body 1.0.1", - "httparse", - "hyper 0.14.32", - "hyper-rustls 0.24.2", - "once_cell", "pin-project-lite", "pin-utils", - "rustls 0.21.12", "tokio", "tracing", ] [[package]] name = "aws-smithy-runtime-api" -version = "1.7.3" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" +checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -563,9 +687,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.11" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ddc9bd6c28aeb303477170ddd183760a956a03e083b3902a990238a7e3792d" +checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" dependencies = [ "base64-simd", "bytes", @@ -598,9 +722,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.3" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -623,7 +747,7 @@ dependencies = [ "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.2", + "hyper 1.8.0", "hyper-util", "itoa", "matchit", @@ -713,6 +837,26 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.110", +] + [[package]] name = "bit-set" version = "0.8.0" @@ -784,9 +928,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" dependencies = [ "serde", ] @@ -803,13 +947,25 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.6" +version = "1.2.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333" +checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe" dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -832,6 +988,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.23" @@ -863,7 +1030,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -872,6 +1039,40 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "coldsnap" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc76cb5c88a7903ac3b8c8d6fa7f4ea57c02256f6f37ca2e89113c89a6af62b" +dependencies = [ + "argh", + "async-trait", + "aws-config", + "aws-sdk-ebs", + "aws-sdk-ec2", + "aws-types", + "base64 0.22.1", + "bytes", + "env_logger", + "futures", + "indicatif", + "log", + "nix", + "sha2", + "snafu", + "tempfile", + "tokio", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -887,6 +1088,19 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "const-hex" version = "1.16.0" @@ -932,11 +1146,13 @@ dependencies = [ "alloy-primitives", "anyhow", "aws-config", + "aws-sdk-ebs", "aws-sdk-ec2", "aws-types", "axum", "base64 0.22.1", "clap", + "coldsnap", "hex", "httpc-test", "rand_core 0.6.4", @@ -993,6 +1209,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1089,7 +1315,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -1140,7 +1366,7 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", "unicode-xid", ] @@ -1173,7 +1399,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -1191,6 +1417,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "ecdsa" version = "0.16.9" @@ -1254,6 +1486,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1263,6 +1501,29 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_filter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -1345,6 +1606,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + [[package]] name = "fixed-hash" version = "0.8.0" @@ -1404,12 +1671,33 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "funty" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -1454,6 +1742,17 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.110", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -1472,8 +1771,10 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1524,6 +1825,12 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "group" version = "0.13.0" @@ -1556,9 +1863,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.7" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", "bytes", @@ -1733,7 +2040,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.5.8", "tokio", "tower-service", "tracing", @@ -1742,20 +2049,22 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" +checksum = "1744436df46f0bde35af3eda22aeaba453aada65d8f1c171cd8a5f59030bd69f" dependencies = [ + "atomic-waker", "bytes", "futures-channel", - "futures-util", - "h2 0.4.7", + "futures-core", + "h2 0.4.12", "http 1.2.0", "http-body 1.0.1", "httparse", "httpdate", "itoa", "pin-project-lite", + "pin-utils", "smallvec", "tokio", "want", @@ -1772,7 +2081,7 @@ dependencies = [ "hyper 0.14.32", "log", "rustls 0.21.12", - "rustls-native-certs", + "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -1785,12 +2094,13 @@ checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", "http 1.2.0", - "hyper 1.5.2", + "hyper 1.8.0", "hyper-util", - "rustls 0.23.20", + "rustls 0.23.35", + "rustls-native-certs 0.8.2", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.1", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots", ] @@ -1803,7 +2113,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.5.2", + "hyper 1.8.0", "hyper-util", "native-tls", "tokio", @@ -1813,18 +2123,23 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ + "base64 0.22.1", "bytes", "futures-channel", + "futures-core", "futures-util", "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.2", + "hyper 1.8.0", + "ipnet", + "libc", + "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.1", "tokio", "tower-service", "tracing", @@ -1945,7 +2260,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -1986,7 +2301,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -2000,6 +2315,19 @@ dependencies = [ "serde", ] +[[package]] +name = "indicatif" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" +dependencies = [ + "console", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "inout" version = "0.1.3" @@ -2045,6 +2373,40 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "jiff" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", +] + +[[package]] +name = "jiff-static" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.110", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.76" @@ -2098,9 +2460,19 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] [[package]] name = "libm" @@ -2215,6 +2587,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.2" @@ -2247,11 +2625,33 @@ dependencies = [ "openssl-probe", "openssl-sys", "schannel", - "security-framework", + "security-framework 2.11.1", "security-framework-sys", "tempfile", ] +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags 2.9.4", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2363,14 +2763,14 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-src" @@ -2469,7 +2869,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -2597,6 +2997,21 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2612,6 +3027,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.110", +] + [[package]] name = "primeorder" version = "0.13.6" @@ -2703,8 +3128,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.20", - "socket2", + "rustls 0.23.35", + "socket2 0.5.8", "thiserror 2.0.9", "tokio", "tracing", @@ -2721,7 +3146,7 @@ dependencies = [ "rand 0.8.5", "ring", "rustc-hash", - "rustls 0.23.20", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.9", @@ -2739,7 +3164,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.5.8", "tracing", "windows-sys 0.59.0", ] @@ -2916,11 +3341,11 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.4.7", + "h2 0.4.12", "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.2", + "hyper 1.8.0", "hyper-rustls 0.27.5", "hyper-tls", "hyper-util", @@ -2933,7 +3358,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.20", + "rustls 0.23.35", "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", @@ -2943,7 +3368,7 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", - "tokio-rustls 0.26.1", + "tokio-rustls 0.26.4", "tower", "tower-service", "url", @@ -3055,6 +3480,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48fd7bd8a6377e15ad9d42a8ec25371b94ddc67abe7c8b9127bec79bebaaae18" +[[package]] +name = "rust-fuzzy-search" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a157657054ffe556d8858504af8a672a054a6e0bd9e8ee531059100c0fa11bb2" + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3118,14 +3549,15 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.20" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.8", + "rustls-webpki 0.103.8", "subtle", "zeroize", ] @@ -3139,7 +3571,19 @@ dependencies = [ "openssl-probe", "rustls-pemfile 1.0.4", "schannel", - "security-framework", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework 3.5.1", ] [[package]] @@ -3162,11 +3606,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" dependencies = [ "web-time", + "zeroize", ] [[package]] @@ -3181,10 +3626,11 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -3260,7 +3706,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.9.4", - "core-foundation", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags 2.9.4", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -3268,9 +3727,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.13.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -3327,7 +3786,7 @@ checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -3464,6 +3923,27 @@ dependencies = [ "serde", ] +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.110", +] + [[package]] name = "socket2" version = "0.5.8" @@ -3474,6 +3954,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socket2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + [[package]] name = "spin" version = "0.9.8" @@ -3529,7 +4019,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rustls 0.23.20", + "rustls 0.23.35", "serde", "serde_json", "sha2", @@ -3552,7 +4042,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -3575,7 +4065,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.94", + "syn 2.0.110", "tokio", "url", ] @@ -3785,9 +4275,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.94" +version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "987bc0be1cdea8b10216bd06e2ca407d40b9543468fafd3ddfb02f36e77f71f3" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", @@ -3811,7 +4301,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -3821,7 +4311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.4", - "core-foundation", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -3880,7 +4370,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -3891,7 +4381,7 @@ checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -3982,7 +4472,7 @@ dependencies = [ "parking_lot 0.12.3", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.8", "tokio-macros", "windows-sys 0.52.0", ] @@ -3995,7 +4485,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -4020,11 +4510,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.1" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.20", + "rustls 0.23.35", "tokio", ] @@ -4117,7 +4607,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -4222,12 +4712,24 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unit-prefix" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" + [[package]] name = "untrusted" version = "0.9.0" @@ -4368,7 +4870,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", "wasm-bindgen-shared", ] @@ -4403,7 +4905,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4476,6 +4978,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-registry" version = "0.2.0" @@ -4533,6 +5041,24 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -4557,13 +5083,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -4576,6 +5119,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -4588,6 +5137,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -4600,12 +5155,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -4618,6 +5185,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -4630,6 +5203,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -4642,6 +5221,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -4654,6 +5239,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.6.21" @@ -4716,7 +5307,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", "synstructure", ] @@ -4738,7 +5329,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -4758,7 +5349,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", "synstructure", ] @@ -4779,7 +5370,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] [[package]] @@ -4801,5 +5392,5 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.94", + "syn 2.0.110", ] diff --git a/operator/control-plane/Cargo.toml b/operator/control-plane/Cargo.toml index 5c2f0097a..0865d884a 100644 --- a/operator/control-plane/Cargo.toml +++ b/operator/control-plane/Cargo.toml @@ -20,11 +20,13 @@ path = "src/enclaver.rs" alloy-primitives = { version = "1.3.1", features = ["serde"] } anyhow = "1.0.95" aws-config = { version = "1.5.12", features = ["behavior-version-latest"] } -aws-sdk-ec2 = "1.99.0" +aws-sdk-ebs = "=1.84.0" +aws-sdk-ec2 = "=1.117.0" aws-types = "1.3.3" axum = "0.8.1" base64 = "0.22.1" clap = { version = "4.5.23", features = ["derive"] } +coldsnap = "0.9.0" hex = "0.4.3" rand_core = { version = "0.6.4", features = ["getrandom"] } reqwest = { version = "0.12.12", features = ["rustls-tls"], default-features = false } diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index ab82ef207..8fe4c3688 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -8,6 +8,7 @@ use std::str::FromStr; use anyhow::{anyhow, Context, Result}; use aws_sdk_ec2::types::*; use aws_types::region::Region; +use coldsnap::{SnapshotUploader, SnapshotWaiter}; use rand_core::OsRng; use serde_json::Value; use ssh2::Session; @@ -22,6 +23,7 @@ use crate::market::{InfraProvider, JobId}; #[derive(Clone)] pub struct Aws { clients: HashMap, + ebs_clients: HashMap, key_name: String, // Path cannot be cloned, hence String key_location: String, @@ -51,10 +53,19 @@ impl Aws { .await; aws_sdk_ec2::Client::new(&config) }); + ebs_clients.insert(region.clone(), { + let config = aws_config::from_env() + .profile_name(&aws_profile) + .region(Region::new(region.clone())) + .load() + .await; + aws_sdk_ebs::Client::new(&config) + }); } Aws { clients, + ebs_clients, key_name, key_location, pub_key_location, @@ -67,6 +78,10 @@ impl Aws { &self.clients[region] } + async fn ebs_client(&self, region: &str) -> &aws_sdk_ebs::Client { + &self.ebs_clients[region] + } + pub async fn generate_key_pair(&self) -> Result<()> { let priv_check = Path::new(&self.key_location).exists(); let pub_check = Path::new(&self.pub_key_location).exists(); @@ -252,6 +267,7 @@ impl Aws { Ok(true) } + // [UPDATE NOTE] This function is obsolete, no enclaves pub async fn run_enclave_impl( &self, job_id: &str, @@ -790,7 +806,7 @@ EOF } /* AWS EC2 UTILITY */ - + // [UPDATE NOTE] Should return private IP, there won't be any Public IPs. pub async fn get_instance_ip(&self, instance_id: &str, region: &str) -> Result { Ok(self .client(region) @@ -821,22 +837,11 @@ EOF &self, job: &JobId, instance_type: InstanceType, - family: &str, architecture: &str, region: &str, + init_params: &[u8], + ami_id: &str, ) -> Result { - let instance_ami = self - .get_amis(region, family, architecture) - .await - .context("could not get amis")?; - - let enclave_options = EnclaveOptionsRequest::builder().enabled(true).build(); - let ebs = EbsBlockDevice::builder().volume_size(12).build(); - let block_device_mapping = BlockDeviceMapping::builder() - .device_name("/dev/sda1") - .ebs(ebs) - .build(); - let name_tag = Tag::builder().key("Name").value("JobRunner").build(); let managed_tag = Tag::builder().key("managedBy").value("marlin").build(); let project_tag = Tag::builder().key("project").value("oyster").build(); @@ -865,21 +870,20 @@ EOF .get_security_group(region) .await .context("could not get subnet")?; - + // [UPDATE NOTE] Add user data to launch instance Ok(self .client(region) .await .run_instances() - .image_id(instance_ami) + .image_id(ami_id) .instance_type(instance_type) .key_name(self.key_name.clone()) .min_count(1) .max_count(1) - .enclave_options(enclave_options) - .block_device_mappings(block_device_mapping) .tag_specifications(tags) .security_group_ids(sec_group) .subnet_id(subnet) + .user_data(String::from_utf8_lossy(init_params).to_string()) .send() .await .context("could not run instance")? @@ -905,73 +909,6 @@ EOF Ok(()) } - pub async fn get_amis(&self, region: &str, family: &str, architecture: &str) -> Result { - let project_filter = Filter::builder() - .name("tag:project") - .values("oyster") - .build(); - let name_filter = Filter::builder() - .name("name") - .values("marlin/oyster/worker-".to_owned() + family + "-" + architecture + "-????????") - .build(); - - let own_ami = self - .client(region) - .await - .describe_images() - .owners("self") - .filters(project_filter) - .filters(name_filter) - .send() - .await - .context("could not describe images")?; - - let own_ami = own_ami.images().iter().max_by_key(|x| &x.name); - - if own_ami.is_some() { - Ok(own_ami - .unwrap() - .image_id() - .ok_or(anyhow!("could not parse image id"))? - .to_string()) - } else { - self.get_community_amis(region, family, architecture) - .await - .context("could not get community ami") - } - } - - pub async fn get_community_amis( - &self, - region: &str, - family: &str, - architecture: &str, - ) -> Result { - let owner = "753722448458"; - let name_filter = Filter::builder() - .name("name") - .values("marlin/oyster/worker-".to_owned() + family + "-" + architecture + "-????????") - .build(); - - Ok(self - .client(region) - .await - .describe_images() - .owners(owner) - .filters(name_filter) - .send() - .await - .context("could not describe images")? - // response parsing from here - .images() - .iter() - .max_by_key(|x| &x.name) - .ok_or(anyhow!("no images found"))? - .image_id() - .ok_or(anyhow!("could not parse image id"))? - .to_string()) - } - pub async fn get_security_group(&self, region: &str) -> Result { let filter = Filter::builder() .name("tag:project") @@ -1018,6 +955,95 @@ EOF .to_string()) } + async fn get_job_snapshot_id( + &self, + job: &JobId, + region: &str, + ) -> Result<(bool, String)> { + let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); + let operator_filter = Filter::builder() + .name("tag:operator") + .values(&job.operator) + .build(); + let chain_filter = Filter::builder() + .name("tag:chainID") + .values(&job.chain) + .build(); + let contract_filter = Filter::builder() + .name("tag:contractAddress") + .values(&job.contract) + .build(); + let res = self + .client(region) + .await + .describe_snapshots() + .owner_ids("self") + .filters(job_filter) + .filters(operator_filter) + .filters(contract_filter) + .filters(chain_filter) + .send() + .await + .context("could not describe instances")?; + + let own_snapshot = res.snapshots().iter().max_by_key(|x| &x.start_time); + if let Some(snapshot) = own_snapshot { + Ok(( + true, + snapshot + .snapshot_id() + .ok_or(anyhow!("could not parse snapshot id"))? + .to_string(), + )) + } else { + Ok((false, "".to_owned())) + } + } + + async fn get_job_ami_id( + &self, + job: &JobId, + region: &str, + ) -> Result<(bool, String)> { + let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); + let operator_filter = Filter::builder() + .name("tag:operator") + .values(&job.operator) + .build(); + let chain_filter = Filter::builder() + .name("tag:chainID") + .values(&job.chain) + .build(); + let contract_filter = Filter::builder() + .name("tag:contractAddress") + .values(&job.contract) + .build(); + let res = self + .client(region) + .await + .describe_images() + .owners("self") + .filters(job_filter) + .filters(operator_filter) + .filters(contract_filter) + .filters(chain_filter) + .send() + .await + .context("could not describe instances")?; + + let own_ami = res.images().iter().max_by_key(|x| &x.name); + if let Some(ami) = own_ami { + Ok(( + true, + ami.image_id() + .ok_or(anyhow!("could not parse image id"))? + .to_string(), + )) + } else { + Ok((false, "".to_owned())) + } + } + pub async fn get_job_instance_id( &self, job: &JobId, @@ -1103,6 +1129,7 @@ EOF .into()) } + // [UPDATE NOTE] This function is obsolete, no enclaves pub async fn get_enclave_state(&self, instance_id: &str, region: &str) -> Result { let public_ip_address = self .get_instance_ip(instance_id, region) @@ -1308,11 +1335,11 @@ EOF Ok(()) } + // [UPDATE NOTE] Spin up instance is only kept, no run enclaves needed async fn spin_up_impl( &mut self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, @@ -1347,14 +1374,189 @@ EOF } } + // [UPDATE NOTE] Check AMI corresponding to given job. If dosen't exist then check if snapshot exists. + // If doesn't exist download image upload as snapshot and register AMI. If snapshot exists register AMI from it. + + let (mut ami_exist, mut ami_id) = self.get_job_ami_id(job, region).await + .context("failed to get job ami")?; + + if !ami_exist { + // check snapshot exists + let (snapshot_exist, mut snapshot_id) = self.get_job_snapshot_id(job, region).await + .context("failed to get job snapshot")?; + if !snapshot_exist { + // 1. Download image in image_url to a tmp file + // 2. check blacklist/whitelist + // 3. Upload image as snapshot + + let tmp_file_path = format!("/tmp/image-{}.raw", job.id); + let mut tmp_file = File::create(&tmp_file_path) + .context(format!("Failed to create temporary file for image {}", tmp_file_path))?; + + // Download the image from the image_url + let resp = reqwest::get(image_url) + .await + .context(format!("Failed to start download file from {} for job ID {}", image_url, job.id))?; + let mut stream = resp.bytes_stream(); + + while let Some(item) = stream.next().await { + let chunk = item.context(format!("Failed to read chunk from response stream for job ID {}", job.id))?; + tmp_file.write_all(&chunk).await + .context(format!("Failed to write chunk to temporary file for job ID {}", job.id))?; + } + + tmp_file.flush().await + .context(format!("Failed to flush temporary file for job ID {}", job.id))?; + + let mut hasher = Sha256::new(); + let mut file = File::open(&tmp_file_path) + .context("Failed to open temporary file for hashing")?; + let mut buffer = [0; 8192]; + loop { + let n = file.read(&mut buffer) + .context("Failed to read temporary file")?; + if n == 0 { + break; + } + hasher.update(&buffer[..n]); + } + let file_hash = hex::encode(hasher.finalize()); + + if let Some(whitelist_list) = self.whitelist { + let mut allowed = false; + for entry in whitelist_list { + if entry.contains(&file_hash) { + allowed = true; + break; + } + } + if !allowed { + return Err(anyhow!("Image hash {} not found in whitelist", file_hash)); + } + } + + if let Some(blacklist_list) = self.blacklist { + for entry in blacklist_list { + if entry.contains(&file_hash) { + return Err(anyhow!("Image hash {} found in blacklist", file_hash)); + } + } + } + + + let uploader = SnapshotUploader::new(self.ebs_client(region).await?); + let managed_tag = aws_sdk_ebs::types::Tag::builder().key("managedBy").value("marlin").build(); + let project_tag = aws_sdk_ebs::types::Tag::builder().key("project").value("oyster").build(); + let job_tag = aws_sdk_ebs::types::Tag::builder().key("jobId").value(&job.id).build(); + let operator_tag = aws_sdk_ebs::types::Tag::builder().key("operator").value(&job.operator).build(); + let chain_tag = aws_sdk_ebs::types::Tag::builder().key("chainID").value(&job.chain).build(); + let contract_tag = aws_sdk_ebs::types::Tag::builder() + .key("contractAddress") + .value(&job.contract) + .build(); + + let snapshot_tags = vec![ + managed_tag, + project_tag, + job_tag, + operator_tag, + contract_tag, + chain_tag, + ]; + snapshot_id = uploader.upload_from_file(Path::new(&tmp_file_path), None, None, snapshot_tags, None, None, None).await + .context("Failed to upload snapshot from image file")?; + info!(snapshot_id, "Snapshot uploaded"); + let waiter = SnapshotWaiter::new(self.ebs_client(region).await?); + waiter.wait_for_completed(snapshot_id.as_str()).await + .context("Failed to wait for snapshot completion")?; + info!(snapshot_id, "Snapshot is now completed"); + + } + // Register AMI from snapshot + + let block_dev_mapping = aws_sdk_ec2::models::BlockDeviceMapping::builder() + .device_name("/dev/xvda") + .ebs( + aws_sdk_ec2::models::EbsBlockDevice::builder() + .snapshot_id(snapshot_id) + .build(), + ) + .build(); + + let instance_type = + InstanceType::from_str(instance_type).context("cannot parse instance type")?; + let resp = self + .client(region) + .await + .describe_instance_types() + .instance_types(instance_type.clone()) + .send() + .await + .context("could not describe instance types")?; + let mut architecture = "arm64".to_string(); + let isntance_types = resp.instance_types(); + for instance in isntance_types { + let supported_architectures = instance + .processor_info() + .ok_or(anyhow!("error fetching instance processor info"))? + .supported_architectures(); + if let Some(arch) = supported_architectures.iter().next() { + arch.as_str().clone_into(&mut architecture); + info!(architecture); + } + } + + let resp = self.client(region).await + .register_image() + .name(format!("marlin/oyster/job-{}", job.id)) + .architecture(architecture) + .root_device_name("/dev/xvda") + .block_device_mappings(block_dev_mapping) + .tag_specifications( + TagSpecification::builder() + .resource_type(ResourceType::Image) + .tags( + Tag::builder().key("managedBy").value("marlin").build(), + ) + .tags( + Tag::builder().key("project").value("oyster").build(), + ) + .tags( + Tag::builder().key("jobId").value(&job.id).build(), + ) + .tags( + Tag::builder().key("operator").value(&job.operator).build(), + ) + .tags( + Tag::builder().key("contractAddress").value(&job.contract).build(), + ) + .tags( + Tag::builder().key("chainID").value(&job.chain).build(), + ) + .build() + ) + .send() + .await + .context(format!("Failed to register AMI from snapshot {} for job {}", snapshot_id, job.id))?; + + ami_id = resp.image_id() + .ok_or(anyhow!("could not parse image id"))? + .to_string(); + } + if !exist { // either no old instance or old instance was not enough, launch new one instance = self - .spin_up_instance(job, instance_type, family, region, req_mem, req_vcpu) + .spin_up_instance(job, instance_type, region, req_mem, req_vcpu, init_params, ami_id) .await .context("failed to spin up instance")?; } + // [UPDATE NOTE] No enclave deployment needed. Check all the steps in this function if needed + // Pick following: + // 1. Rate limit configuration + // 2. User Data setup + // 3. Pick user image self.run_enclave_impl( &job.id, family, @@ -1371,14 +1573,19 @@ EOF .context("failed to run enclave") } + + // [UPDATE NOTE] New things to add: + // 1. Pick AMI corresponding to given image_url + // 2. Setup user data pub async fn spin_up_instance( &self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, + init_params: &[u8], + ami_id: &str, ) -> Result { let instance_type = InstanceType::from_str(instance_type).context("cannot parse instance type")?; @@ -1426,7 +1633,7 @@ EOF return Err(anyhow!("Required memory or vcpus are more than available")); } let instance = self - .launch_instance(job, instance_type, family, &architecture, region) + .launch_instance(job, instance_type, &architecture, region, init_params, ami_id) .await .context("could not launch instance")?; sleep(Duration::from_secs(100)).await; @@ -1515,7 +1722,6 @@ impl InfraProvider for Aws { &mut self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, @@ -1527,7 +1733,6 @@ impl InfraProvider for Aws { self.spin_up_impl( job, instance_type, - family, region, req_mem, req_vcpu, @@ -1546,6 +1751,8 @@ impl InfraProvider for Aws { .context("could not spin down enclave") } + // [UPDATE NOTE] Due to Gateway VM rate limit, instance IP won't be equal to elastic IP. Instead, Gateway VM + // secondary IPs are used. async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { let instance = self .get_job_instance_id(job, region) diff --git a/operator/control-plane/src/market.rs b/operator/control-plane/src/market.rs index 8648e46d3..004f5c7be 100644 --- a/operator/control-plane/src/market.rs +++ b/operator/control-plane/src/market.rs @@ -52,7 +52,6 @@ pub trait InfraProvider { &mut self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, @@ -81,7 +80,6 @@ where &mut self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, @@ -94,7 +92,6 @@ where .spin_up( job, instance_type, - family, region, req_mem, req_vcpu, @@ -596,7 +593,6 @@ struct JobState<'a> { last_settled: Duration, rate: U256, original_rate: U256, - family: String, min_rate: U256, bandwidth: u64, eif_url: String, @@ -633,8 +629,6 @@ impl<'a> JobState<'a> { last_settled: context.now_timestamp(), rate: U256::from(1), original_rate: U256::from(1), - // salmon is the default for jobs (usually old) without any family specified - family: "salmon".to_owned(), min_rate: U256::MAX, bandwidth: 0, eif_url: String::new(), @@ -728,7 +722,6 @@ impl<'a> JobState<'a> { .spin_up( &self.job_id, self.instance_type.as_str(), - self.family.as_str(), &self.region, self.req_mem, self.req_vcpus, @@ -1078,14 +1071,6 @@ impl<'a> JobState<'a> { info!(self.req_vcpus, "Required vcpu"); } - let family = metadata_json["family"].as_str(); - if update && family.is_some() && self.family != family.unwrap() { - return Err(anyhow!("Family change not allowed")); - } else if family.is_some() { - self.family = family.unwrap().to_owned(); - info!(self.family, "Family"); - } - let debug = metadata_json["debug"].as_bool().unwrap_or(false); self.debug = debug; From 50c5b7ea042f41a9d24e899d49c8c917dfe2084c Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Thu, 20 Nov 2025 08:40:43 +0530 Subject: [PATCH 02/12] Add test for aws tpm instance --- operator/control-plane/Cargo.lock | 15 + operator/control-plane/Cargo.toml | 14 +- operator/control-plane/src/ami.rs | 63 -- operator/control-plane/src/aws.rs | 868 +++++-------------------- operator/control-plane/src/enclaver.rs | 57 -- operator/control-plane/src/main.rs | 2 +- operator/control-plane/src/market.rs | 121 +--- operator/control-plane/src/test.rs | 8 - 8 files changed, 182 insertions(+), 966 deletions(-) delete mode 100644 operator/control-plane/src/ami.rs delete mode 100644 operator/control-plane/src/enclaver.rs diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index a9799cab5..3b993f9f8 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -3369,11 +3369,13 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls 0.26.4", + "tokio-util", "tower", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots", "windows-registry", @@ -4916,6 +4918,19 @@ version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.76" diff --git a/operator/control-plane/Cargo.toml b/operator/control-plane/Cargo.toml index 0865d884a..bd8e657e9 100644 --- a/operator/control-plane/Cargo.toml +++ b/operator/control-plane/Cargo.toml @@ -8,20 +8,12 @@ license = "AGPL-3.0-or-later" name = "cp" path = "src/lib.rs" -[[bin]] -name = "ami" -path = "src/ami.rs" - -[[bin]] -name = "enclaver" -path = "src/enclaver.rs" - [dependencies] alloy-primitives = { version = "1.3.1", features = ["serde"] } anyhow = "1.0.95" aws-config = { version = "1.5.12", features = ["behavior-version-latest"] } -aws-sdk-ebs = "=1.84.0" -aws-sdk-ec2 = "=1.117.0" +aws-sdk-ebs = "1.84.0" +aws-sdk-ec2 = "1.117.0" aws-types = "1.3.3" axum = "0.8.1" base64 = "0.22.1" @@ -29,7 +21,7 @@ clap = { version = "4.5.23", features = ["derive"] } coldsnap = "0.9.0" hex = "0.4.3" rand_core = { version = "0.6.4", features = ["getrandom"] } -reqwest = { version = "0.12.12", features = ["rustls-tls"], default-features = false } +reqwest = { version = "0.12.12", features = ["rustls-tls", "stream"], default-features = false } serde = "1.0.217" serde_json = "1.0.134" shell-escape = "0.1.5" diff --git a/operator/control-plane/src/ami.rs b/operator/control-plane/src/ami.rs deleted file mode 100644 index 77600508f..000000000 --- a/operator/control-plane/src/ami.rs +++ /dev/null @@ -1,63 +0,0 @@ -use cp::aws; - -use anyhow::Context; -use anyhow::Result; -use clap::Parser; - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -/// Control plane for Oyster -struct Cli { - /// AWS profile - #[clap(long, value_parser)] - profile: String, - - /// AWS region - #[clap(long, value_parser, default_value = "ap-south-1")] - region: String, - - /// AMI family - #[clap(long, value_parser, default_value = "salmon")] - family: String, -} - -#[tokio::main] -pub async fn main() -> Result<()> { - let cli = Cli::parse(); - - let aws = aws::Aws::new( - cli.profile, - &[cli.region.clone()], - String::new(), - None, - None, - ) - .await; - println!( - "amd64 community ami: {}", - aws.get_community_amis(&cli.region, &cli.family, "amd64") - .await - .context("failed to fetch amd64 community ami")? - ); - println!( - "arm64 community ami: {}", - aws.get_community_amis(&cli.region, &cli.family, "arm64") - .await - .context("failed to fetch arm64 community ami")? - ); - - println!( - "amd64 resolved ami: {}", - aws.get_amis(&cli.region, &cli.family, "amd64") - .await - .context("failed to fetch amd64 resolved ami")? - ); - println!( - "arm64 resolved ami: {}", - aws.get_amis(&cli.region, &cli.family, "arm64") - .await - .context("failed to fetch arm64 resolved ami")? - ); - - Ok(()) -} diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 8fe4c3688..35733e6b5 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -1,20 +1,19 @@ use std::collections::HashMap; use std::fs::File; use std::io::{BufReader, Read, Write}; -use std::net::TcpStream; use std::path::Path; use std::str::FromStr; use anyhow::{anyhow, Context, Result}; use aws_sdk_ec2::types::*; use aws_types::region::Region; +use base64::{prelude::BASE64_STANDARD, Engine}; use coldsnap::{SnapshotUploader, SnapshotWaiter}; use rand_core::OsRng; -use serde_json::Value; -use ssh2::Session; use ssh_key::sha2::{Digest, Sha256}; use ssh_key::{Algorithm, LineEnding, PrivateKey}; use tokio::time::{sleep, Duration}; +use tokio_stream::StreamExt; use tracing::{error, info}; use whoami::username; @@ -44,6 +43,7 @@ impl Aws { let pub_key_location = "/home/".to_owned() + &username() + "/.ssh/" + &key_name + ".pub"; let mut clients = HashMap::::new(); + let mut ebs_clients = HashMap::::new(); for region in regions { clients.insert(region.clone(), { let config = aws_config::from_env() @@ -180,630 +180,61 @@ impl Aws { .is_empty()) } - /* SSH UTILITY */ - - pub async fn ssh_connect(&self, ip_address: &str) -> Result { - let tcp = TcpStream::connect(ip_address)?; - - let mut sess = Session::new()?; - - sess.set_tcp_stream(tcp); - sess.handshake()?; - sess.userauth_pubkey_file("ubuntu", None, Path::new(&self.key_location), None)?; - info!(ip_address, "SSH connection established"); - Ok(sess) - } - - fn ssh_exec(sess: &Session, command: &str) -> Result<(String, String)> { - let mut channel = sess - .channel_session() - .context("Failed to get channel session")?; - let mut stdout = String::new(); - let mut stderr = String::new(); - channel - .exec(command) - .context("Failed to execute command: {command}")?; - channel - .read_to_string(&mut stdout) - .context("Failed to read stdout")?; - channel - .stderr() - .read_to_string(&mut stderr) - .context("Failed to read stderr")?; - channel.wait_close().context("Failed to wait for close")?; - - Ok((stdout, stderr)) - } - - fn check_eif_blacklist_whitelist(&self, sess: &Session) -> Result { - if self.whitelist.is_some() || self.blacklist.is_some() { - let (stdout, stderr) = Self::ssh_exec(sess, "sha256sum /home/ubuntu/enclave.eif") - .context("Failed to calculate image hash")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error calculating hash of enclave image"); - } - - let line = stdout - .split_whitespace() - .next() - .ok_or(anyhow!("Failed to retrieve image hash: {stdout}"))?; - - info!(line, "Hash"); - - if let Some(whitelist_list) = self.whitelist { - info!("Checking whitelist..."); - let mut allowed = false; - for entry in whitelist_list { - if entry.contains(line) { - allowed = true; - break; - } - } - if allowed { - info!("EIF ALLOWED!"); - } else { - info!("EIF NOT ALLOWED!"); - return Ok(false); - } - } - - if let Some(blacklist_list) = self.blacklist { - info!("Checking blacklist..."); - let mut allowed = true; - for entry in blacklist_list { - if entry.contains(line) { - allowed = false; - break; - } - } - if allowed { - info!("EIF ALLOWED!"); - } else { - info!("EIF NOT ALLOWED!"); - return Ok(false); - } - } - } - Ok(true) - } // [UPDATE NOTE] This function is obsolete, no enclaves - pub async fn run_enclave_impl( - &self, - job_id: &str, - family: &str, - instance_id: &str, - region: &str, - image_url: &str, - req_vcpu: i32, - req_mem: i64, - bandwidth: u64, - debug: bool, - init_params: &[u8], - ) -> Result<()> { - if family != "salmon" && family != "tuna" { - return Err(anyhow!("unsupported image family")); - } - - // make a ssh session - let public_ip_address = self - .get_instance_ip(instance_id, region) - .await - .context("could not fetch instance ip")?; - let sess = &self - .ssh_connect(&(public_ip_address + ":22")) - .await - .context("error establishing ssh connection")?; - - // set up ephemeral ports for the host - Self::run_fragment_ephemeral_ports(sess)?; - // set up nitro enclaves allocator - Self::run_fragment_allocator(sess, req_vcpu, req_mem)?; - // download enclave image and perform whitelist/blacklist checks - self.run_fragment_download_and_check_image(sess, image_url)?; - // set up bandwidth rate limiting - Self::run_fragment_bandwidth(sess, bandwidth)?; - - if family == "tuna" { - // set up iptables rules - Self::run_fragment_iptables_tuna(sess)?; - // set up job id in the init server - Self::run_fragment_init_server(sess, job_id, init_params)?; - } else { - // set up iptables rules - Self::run_fragment_iptables_salmon(sess)?; - } - - // set up debug logger if enabled - Self::run_fragment_logger(sess, debug)?; - // run the enclave - Self::run_fragment_enclave(sess, req_vcpu, req_mem, debug)?; - - Ok(()) - } - - // Enclave deployment fragments start here - // - // IMPORTANT: Each fragment is expected to be declarative where it will take the system - // to the desired state by executing whatever commands necessary - - // Goal: set ephemeral ports to 61440-65535 - // cheap, so just always overwrites previous state - fn run_fragment_ephemeral_ports(sess: &Session) -> Result<()> { - let (_, stderr) = Self::ssh_exec( - sess, - "sudo sysctl -w net.ipv4.ip_local_port_range=\"61440 65535\"", - ) - .context("Failed to set ephemeral ports")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to set ephemeral ports"); - } - - Ok(()) - } - - // Goal: allocate the specified cpus and memory for the enclave - // WARN: Making this declarative would mean potentially restarting enclaves, - // not sure how to handle this, instead just prevent them from being different in market - fn run_fragment_allocator(sess: &Session, req_vcpu: i32, req_mem: i64) -> Result<()> { - if Self::is_enclave_running(sess)? { - // return if enclave is already running - return Ok(()); - } - - Self::ssh_exec( - sess, - // interpolation is safe since values are integers - &format!("echo -e '---\\nmemory_mib: {req_mem}\\ncpu_count: {req_vcpu}' | sudo tee /etc/nitro_enclaves/allocator.yaml"), - ) - .context("Failed to set allocator file")?; - - let (_, stderr) = Self::ssh_exec( - sess, - "sudo systemctl daemon-reload && sudo systemctl restart nitro-enclaves-allocator.service", - ) - .context("Failed to restart allocator service")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)) - .context("Error restarting nitro-enclaves-allocator service"); - } - - info!( - cpus = req_vcpu, - memory = req_mem, - "Nitro Enclave Allocator Service set up" - ); - - Ok(()) - } - - // Goal: make enclave.eif match the provided image url - // uses image_url.txt file to track state instead of redownloading every time - // WARN: the enclave image at the url might have changed, we would have to - // redownload the image every time to verify it, simply ignore for now - fn run_fragment_download_and_check_image(&self, sess: &Session, image_url: &str) -> Result<()> { - let (stdout, stderr) = - Self::ssh_exec(sess, "cat image_url.txt").context("Failed to read image_url.txt")?; - - // check stderr to handle old CVMs without a url txt file - // we assume url was different and redownload - if stderr.is_empty() && stdout == image_url { - // return if url has not changed - return Ok(()); - } - - Self::ssh_exec( - sess, - &format!( - "curl -sL -o enclave.eif --max-filesize 4000000000 --max-time 120 '{}'", - shell_escape::escape(image_url.into()), - ), - ) - .context("Failed to download enclave image")?; - - let is_eif_allowed = self - .check_eif_blacklist_whitelist(sess) - .context("Failed whitelist/blacklist check")?; - - if !is_eif_allowed { - return Err(anyhow!("EIF NOT ALLOWED")); - } - - // store eif_url only when the image is allowed - Self::ssh_exec( - sess, - &format!( - "echo \"{}\" > image_url.txt", - shell_escape::escape(image_url.into()), - ), - ) - .context("Failed to write EIF URL to txt file.")?; - - Ok(()) - } - - // Goal: set bandwidth rate - // TODO: this always resets tc rules, check if rate has changed - fn run_fragment_bandwidth(sess: &Session, bandwidth: u64) -> Result<()> { - let (stdout, stderr) = Self::ssh_exec(sess, "sudo tc qdisc show dev ens5 root") - .context("Failed to fetch tc config")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)) - .context("Error fetching network interface qdisc configuration"); - } - let entries: Vec<&str> = stdout.trim().split('\n').collect(); - let mut is_any_rule_set = true; - if entries[0].to_lowercase().contains("qdisc mq 0: root") && entries.len() == 1 { - is_any_rule_set = false; - } - - // remove previously defined rules - if is_any_rule_set { - let (_, stderr) = Self::ssh_exec(sess, "sudo tc qdisc del dev ens5 root")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)) - .context("Error removing network interface qdisc configuration"); - } - } - - let (_, stderr) = Self::ssh_exec( - sess, - // interpolation is safe since values are integers - &format!("sudo tc qdisc add dev ens5 root tbf rate {bandwidth}kbit burst 4000Mb latency 100ms"), - )?; - - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error setting up bandwidth limit"); - } - - Ok(()) - } - - // Goal: set up iptables rules for salmon - // first two rules are just expected to be there - // rest of the rules are replaced if needed - fn run_fragment_iptables_salmon(sess: &Session) -> Result<()> { - let iptables_rules: [&str; 5] = [ - "-P PREROUTING ACCEPT", - // expected to exist due to how the images are built - "-A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER", - "-A PREROUTING -i ens5 -p tcp -m tcp --dport 80 -j REDIRECT --to-ports 1200", - "-A PREROUTING -i ens5 -p tcp -m tcp --dport 443 -j REDIRECT --to-ports 1200", - "-A PREROUTING -i ens5 -p tcp -m tcp --dport 1024:65535 -j REDIRECT --to-ports 1200", - ]; - let (stdout, stderr) = Self::ssh_exec(sess, "sudo iptables -t nat -S PREROUTING") - .context("Failed to query iptables")?; - - if !stderr.is_empty() || stdout.is_empty() { - return Err(anyhow!(stderr)).context("Failed to get iptables rules"); - } - - let rules: Vec<&str> = stdout.trim().split('\n').map(|s| s.trim()).collect(); - - for i in 0..2 { - if rules[i] != iptables_rules[i] { - return Err(anyhow!( - "Failed to match rule: got '{}' expected '{}'", - rules[i], - iptables_rules[i], - )); - } - } - - // return if rest of the rules match - if rules[2..] == iptables_rules[2..] { - return Ok(()); - } - - // rules have to be replaced - // remove existing rules beyond the docker one - for _ in 2..rules.len() { - // keep deleting rule 2 till nothing would be left - let (_, stderr) = Self::ssh_exec(sess, "sudo iptables -t nat -D PREROUTING 2") - .context("Failed to delete iptables rule")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to delete iptables rule"); - } - } - - // set rules - for rule in iptables_rules[2..].iter() { - let (_, stderr) = Self::ssh_exec(sess, &format!("sudo iptables -t nat {rule}")) - .context("Failed to set iptables rule")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to set iptables rule"); - } - } - - Ok(()) - } - - // Goal: set up iptables rules for tuna - // first two rules are just expected to be there - // rest of the rules are replaced if needed - fn run_fragment_iptables_tuna(sess: &Session) -> Result<()> { - let iptables_rules: [&str; 4] = [ - "-P INPUT ACCEPT", - "-A INPUT -i ens5 -p tcp -m tcp --dport 80 -j NFQUEUE --queue-num 0", - "-A INPUT -i ens5 -p tcp -m tcp --dport 443 -j NFQUEUE --queue-num 0", - "-A INPUT -i ens5 -p tcp -m tcp --dport 1024:61439 -j NFQUEUE --queue-num 0", - ]; - let (stdout, stderr) = - Self::ssh_exec(sess, "sudo iptables -S INPUT").context("Failed to query iptables")?; - - if !stderr.is_empty() || stdout.is_empty() { - return Err(anyhow!(stderr)).context("Failed to get iptables rules"); - } - - let rules: Vec<&str> = stdout.trim().split('\n').map(|s| s.trim()).collect(); - - for i in 0..1 { - if rules[i] != iptables_rules[i] { - return Err(anyhow!( - "Failed to match rule: got '{}' expected '{}'", - rules[i], - iptables_rules[i], - )); - } - } - - // return if rest of the rules match - if rules[1..] == iptables_rules[1..] { - return Ok(()); - } - - // rules have to be replaced - // remove existing rules beyond the docker one - for _ in 1..rules.len() { - // keep deleting rule 1 till nothing would be left - let (_, stderr) = Self::ssh_exec(sess, "sudo iptables -D INPUT 1") - .context("Failed to delete iptables rule")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to delete iptables rule"); - } - } - - // set rules - for rule in iptables_rules[1..].iter() { - let (_, stderr) = Self::ssh_exec(sess, &format!("sudo iptables {rule}")) - .context("Failed to set iptables rule")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to set iptables rule"); - } - } - - Ok(()) - } - - // Goal: set up init server params - // assumes the .conf has not been modified externally - // cheap, so just always does `sed` - // init params are updated if they have changed - fn run_fragment_init_server(sess: &Session, job_id: &str, init_params: &[u8]) -> Result<()> { - // set job id - let (_, stderr) = Self::ssh_exec( - sess, - &format!( - "sudo sed -i -e 's/placeholder_job_id/{}/g' /etc/supervisor/conf.d/oyster-init-server.conf", - job_id.chars().filter(|c| c.is_ascii_alphanumeric()).collect::(), - ), - ) - .context("Failed to set job id for init server")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to set job id for init server"); - } - - // Check if init params have changed - let params_changed = { - // Calculate hash of new params - let mut hasher = Sha256::new(); - hasher.update(init_params); - let new_hash = hex::encode(hasher.finalize()); - - // get old hash - let (old_hash, _) = Self::ssh_exec( - sess, - "sha256sum /home/ubuntu/init-params 2>/dev/null | cut -d ' ' -f 1", - ) - .context("Failed to set job id for init server")?; - - old_hash.trim() != new_hash - }; - - if !params_changed { - return Ok(()); - } - - info!("Init parameters changed, terminating enclave for restart"); - let (_, stderr) = Self::ssh_exec(sess, "nitro-cli terminate-enclave --all")?; - - if !stderr.is_empty() && !stderr.contains("Successfully terminated enclave") { - return Err(anyhow!(stderr)).context("Error terminating enclave"); - } - - // set init params - let mut init_params_file = sess - .scp_send( - Path::new("/home/ubuntu/init-params"), - 0o644, - init_params.len() as u64, - None, - ) - .context("failed to scp init params")?; - init_params_file - .write_all(init_params) - .context("failed to write init params")?; - init_params_file.send_eof().context("failed to send eof")?; - init_params_file - .wait_eof() - .context("failed to wait for eof")?; - init_params_file.close().context("failed to close")?; - init_params_file - .wait_close() - .context("failed to wait for close")?; - - let (_, stderr) = Self::ssh_exec(sess, "sudo supervisorctl update") - .context("Failed to update init server")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to update init server"); - } - - Ok(()) - } - - // Goal: set up or tear down debug logger - // if debug is set, downloads logger and set it up, does not care about previous setup - // if debug is false, stops the logger if it is running - fn run_fragment_logger(sess: &Session, debug: bool) -> Result<()> { - if debug { - // check if logger is running - let (stdout, _) = Self::ssh_exec(sess, "sudo supervisorctl status logger") - .context("Failed to get logger status")?; - if stdout.contains("RUNNING") { - // logger is already running - return Ok(()); - } - - // check if logger is stopped - if stdout.contains("STOPPED") { - // logger is stopped, just start - let (_, stderr) = Self::ssh_exec(sess, "sudo supervisorctl start logger") - .context("Failed to start logger")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to start logger"); - } - return Ok(()); - } - - // set up logger if debug flag is set - let (_, stderr) = Self::ssh_exec(sess, "curl -fsS https://artifacts.marlin.org/oyster/binaries/nitro-logger_v1.0.0_linux_`uname -m | sed 's/x86_64/amd64/g; s/aarch64/arm64/g'` -o /home/ubuntu/nitro-logger && chmod +x /home/ubuntu/nitro-logger") - .context("Failed to download logger")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Failed to download logger"); - } - - let (_, stderr) = Self::ssh_exec( - sess, - "< Result<()> { - let (stdout, stderr) = - Self::ssh_exec(sess, "nitro-cli describe-eif --eif-path enclave.eif") - .context("could not describe eif")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error describing eif"); - } - - let eif_data: HashMap = - serde_json::from_str(&stdout).context("could not parse eif description")?; - - let (stdout, stderr) = Self::ssh_exec(sess, "nitro-cli describe-enclaves") - .context("could not describe enclaves")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error describing enclaves"); - } - - let enclave_data: Vec> = - serde_json::from_str(&stdout).context("could not parse enclave description")?; - - if let Some(item) = enclave_data.first() { - if item["Measurements"] == eif_data["Measurements"] - && item["Flags"] == (if debug { "DEBUG_MODE" } else { "NONE" }) - { - // same enclave, correct debug mode, just return - return Ok(()); - } else { - // different enclave, kill it - let (_, stderr) = Self::ssh_exec(sess, "nitro-cli terminate-enclave --all")?; - - if !stderr.is_empty() && !stderr.contains("Successfully terminated enclave") { - return Err(anyhow!(stderr)).context("Error terminating enclave"); - } - } - } - - let (_, stderr) = Self::ssh_exec( - sess, - &format!( - "nitro-cli run-enclave --cpu-count {req_vcpu} --memory {req_mem} --eif-path enclave.eif --enclave-cid 88{}", - if debug { " --debug-mode" } else { "" } - ), - )?; - - if !stderr.is_empty() { - if !stderr.contains("Started enclave with enclave-cid") { - return Err(anyhow!(stderr)).context("Error running enclave image"); - } else { - info!(stderr); - } - } - - info!("Enclave running"); - - Ok(()) - } - - // Enclave deployment fragments end here - - fn is_enclave_running(sess: &Session) -> Result { - let (stdout, stderr) = Self::ssh_exec(sess, "nitro-cli describe-enclaves") - .context("could not describe enclaves")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error describing enclaves"); - } - - Ok(stdout.trim() != "[]") - } + // pub async fn run_enclave_impl( + // &self, + // job_id: &str, + // family: &str, + // instance_id: &str, + // region: &str, + // image_url: &str, + // req_vcpu: i32, + // req_mem: i64, + // bandwidth: u64, + // debug: bool, + // init_params: &[u8], + // ) -> Result<()> { + // if family != "salmon" && family != "tuna" { + // return Err(anyhow!("unsupported image family")); + // } + + // // make a ssh session + // let public_ip_address = self + // .get_instance_ip(instance_id, region) + // .await + // .context("could not fetch instance ip")?; + // let sess = &self + // .ssh_connect(&(public_ip_address + ":22")) + // .await + // .context("error establishing ssh connection")?; + + // // set up ephemeral ports for the host + // Self::run_fragment_ephemeral_ports(sess)?; + // // set up nitro enclaves allocator + // Self::run_fragment_allocator(sess, req_vcpu, req_mem)?; + // // download enclave image and perform whitelist/blacklist checks + // self.run_fragment_download_and_check_image(sess, image_url)?; + // // set up bandwidth rate limiting + // Self::run_fragment_bandwidth(sess, bandwidth)?; + + // if family == "tuna" { + // // set up iptables rules + // Self::run_fragment_iptables_tuna(sess)?; + // // set up job id in the init server + // Self::run_fragment_init_server(sess, job_id, init_params)?; + // } else { + // // set up iptables rules + // Self::run_fragment_iptables_salmon(sess)?; + // } + + // // set up debug logger if enabled + // Self::run_fragment_logger(sess, debug)?; + // // run the enclave + // Self::run_fragment_enclave(sess, req_vcpu, req_mem, debug)?; + + // Ok(()) + // } /* AWS EC2 UTILITY */ // [UPDATE NOTE] Should return private IP, there won't be any Public IPs. @@ -837,7 +268,6 @@ EOF &self, job: &JobId, instance_type: InstanceType, - architecture: &str, region: &str, init_params: &[u8], ami_id: &str, @@ -877,13 +307,12 @@ EOF .run_instances() .image_id(ami_id) .instance_type(instance_type) - .key_name(self.key_name.clone()) .min_count(1) .max_count(1) .tag_specifications(tags) .security_group_ids(sec_group) .subnet_id(subnet) - .user_data(String::from_utf8_lossy(init_params).to_string()) + .user_data(BASE64_STANDARD.encode(init_params)) .send() .await .context("could not run instance")? @@ -1129,33 +558,6 @@ EOF .into()) } - // [UPDATE NOTE] This function is obsolete, no enclaves - pub async fn get_enclave_state(&self, instance_id: &str, region: &str) -> Result { - let public_ip_address = self - .get_instance_ip(instance_id, region) - .await - .context("could not fetch instance ip")?; - let sess = self - .ssh_connect(&(public_ip_address + ":22")) - .await - .context("error establishing ssh connection")?; - - let (stdout, stderr) = Self::ssh_exec(&sess, "nitro-cli describe-enclaves") - .context("could not describe enclaves")?; - if !stderr.is_empty() { - return Err(anyhow!(stderr)).context("Error describing enclaves"); - } - - let enclave_data: Vec> = - serde_json::from_str(&stdout).context("could not parse enclave description")?; - - Ok(enclave_data - .first() - .and_then(|data| data.get("State").and_then(Value::as_str)) - .unwrap_or("No state found") - .to_owned()) - } - async fn allocate_ip_addr(&self, job: &JobId, region: &str) -> Result<(String, String)> { let (exist, alloc_id, public_ip) = self .get_job_elastic_ip(job, region) @@ -1296,6 +698,7 @@ EOF ) } + // [UPDATE NOTE] Associate IP address to secondary IP of gateway VM async fn associate_address( &self, instance_id: &str, @@ -1345,10 +748,9 @@ EOF req_vcpu: i32, bandwidth: u64, image_url: &str, - debug: bool, init_params: &[u8], ) -> Result<()> { - let (mut exist, mut instance, state) = self + let (mut exist, instance, state) = self .get_job_instance_id(job, region) .await .context("failed to get job instance")?; @@ -1377,7 +779,7 @@ EOF // [UPDATE NOTE] Check AMI corresponding to given job. If dosen't exist then check if snapshot exists. // If doesn't exist download image upload as snapshot and register AMI. If snapshot exists register AMI from it. - let (mut ami_exist, mut ami_id) = self.get_job_ami_id(job, region).await + let (ami_exist, mut ami_id) = self.get_job_ami_id(job, region).await .context("failed to get job ami")?; if !ami_exist { @@ -1401,11 +803,11 @@ EOF while let Some(item) = stream.next().await { let chunk = item.context(format!("Failed to read chunk from response stream for job ID {}", job.id))?; - tmp_file.write_all(&chunk).await + tmp_file.write_all(&chunk) .context(format!("Failed to write chunk to temporary file for job ID {}", job.id))?; } - tmp_file.flush().await + tmp_file.flush() .context(format!("Failed to flush temporary file for job ID {}", job.id))?; let mut hasher = Sha256::new(); @@ -1444,7 +846,7 @@ EOF } - let uploader = SnapshotUploader::new(self.ebs_client(region).await?); + let uploader = SnapshotUploader::new(self.ebs_client(region).await.clone()); let managed_tag = aws_sdk_ebs::types::Tag::builder().key("managedBy").value("marlin").build(); let project_tag = aws_sdk_ebs::types::Tag::builder().key("project").value("oyster").build(); let job_tag = aws_sdk_ebs::types::Tag::builder().key("jobId").value(&job.id).build(); @@ -1463,10 +865,10 @@ EOF contract_tag, chain_tag, ]; - snapshot_id = uploader.upload_from_file(Path::new(&tmp_file_path), None, None, snapshot_tags, None, None, None).await + snapshot_id = uploader.upload_from_file(Path::new(&tmp_file_path), None, None, Some(snapshot_tags), None, None, None).await .context("Failed to upload snapshot from image file")?; info!(snapshot_id, "Snapshot uploaded"); - let waiter = SnapshotWaiter::new(self.ebs_client(region).await?); + let waiter = SnapshotWaiter::new(self.client(region).await.clone()); waiter.wait_for_completed(snapshot_id.as_str()).await .context("Failed to wait for snapshot completion")?; info!(snapshot_id, "Snapshot is now completed"); @@ -1474,11 +876,11 @@ EOF } // Register AMI from snapshot - let block_dev_mapping = aws_sdk_ec2::models::BlockDeviceMapping::builder() + let block_dev_mapping = BlockDeviceMapping::builder() .device_name("/dev/xvda") .ebs( - aws_sdk_ec2::models::EbsBlockDevice::builder() - .snapshot_id(snapshot_id) + EbsBlockDevice::builder() + .snapshot_id(snapshot_id.clone()) .build(), ) .build(); @@ -1505,13 +907,15 @@ EOF info!(architecture); } } - let resp = self.client(region).await .register_image() .name(format!("marlin/oyster/job-{}", job.id)) - .architecture(architecture) + .architecture(FromStr::from_str(&architecture)?) .root_device_name("/dev/xvda") .block_device_mappings(block_dev_mapping) + .tpm_support(TpmSupportValues::V20) + .virtualization_type("hvm".to_string()) + .boot_mode(BootModeValues::Uefi) .tag_specifications( TagSpecification::builder() .resource_type(ResourceType::Image) @@ -1546,31 +950,32 @@ EOF if !exist { // either no old instance or old instance was not enough, launch new one - instance = self - .spin_up_instance(job, instance_type, region, req_mem, req_vcpu, init_params, ami_id) + self + .spin_up_instance(job, instance_type, region, req_mem, req_vcpu, init_params, ami_id.as_str()) .await .context("failed to spin up instance")?; } + Ok(()) // [UPDATE NOTE] No enclave deployment needed. Check all the steps in this function if needed // Pick following: // 1. Rate limit configuration // 2. User Data setup // 3. Pick user image - self.run_enclave_impl( - &job.id, - family, - &instance, - region, - image_url, - req_vcpu, - req_mem, - bandwidth, - debug, - init_params, - ) - .await - .context("failed to run enclave") + // self.run_enclave_impl( + // &job.id, + // family, + // &instance, + // region, + // image_url, + // req_vcpu, + // req_mem, + // bandwidth, + // debug, + // init_params, + // ) + // .await + // .context("failed to run enclave") } @@ -1597,24 +1002,11 @@ EOF .send() .await .context("could not describe instance types")?; - let mut architecture = "amd64".to_string(); let mut v_cpus: i32 = 4; let mut mem: i64 = 8192; let instance_types = resp.instance_types(); for instance in instance_types { - let supported_architectures = instance - .processor_info() - .ok_or(anyhow!("error fetching instance processor info"))? - .supported_architectures(); - if let Some(arch) = supported_architectures.iter().next() { - if arch.as_str() == "x86_64" { - "amd64".clone_into(&mut architecture); - } else { - "arm64".clone_into(&mut architecture); - } - info!(architecture); - } v_cpus = instance .v_cpu_info() .ok_or(anyhow!("error fetching instance v_cpu info"))? @@ -1633,7 +1025,7 @@ EOF return Err(anyhow!("Required memory or vcpus are more than available")); } let instance = self - .launch_instance(job, instance_type, &architecture, region, init_params, ami_id) + .launch_instance(job, instance_type, region, init_params, ami_id) .await .context("could not launch instance")?; sleep(Duration::from_secs(100)).await; @@ -1727,7 +1119,6 @@ impl InfraProvider for Aws { req_vcpu: i32, bandwidth: u64, image_url: &str, - debug: bool, init_params: &[u8], ) -> Result<()> { self.spin_up_impl( @@ -1738,7 +1129,6 @@ impl InfraProvider for Aws { req_vcpu, bandwidth, image_url, - debug, init_params, ) .await @@ -1791,12 +1181,78 @@ impl InfraProvider for Aws { if !exists || (state != "running" && state != "pending") { return Ok(false); } + // TODO: check wether state == pending is fine or not + Ok(true) + } +} - let res = self - .get_enclave_state(&instance_id, region) +// write a test module for AWS struct spin up function +#[cfg(test)] +mod tests { + use super::*; + use crate::market::InfraProvider; + use crate::market::JobId; + + #[tokio::test] + async fn test_aws_spin_up_down() { + let mut aws = Aws::new( + "cp".to_string(), + &["ap-southeast-2".to_string()], + "cp".to_string(), + None, + None + ).await; + let job = JobId { + id: "test-job".to_string(), + operator: "test-operator".to_string(), + chain: "test-chain".to_string(), + contract: "test-contract".to_string(), + }; + let region = "ap-southeast-2"; + let instance_type = "t4g.micro"; + let req_mem = 1024; + let req_vcpu = 2; + let bandwidth = 100; + let image_url = "https://example.com"; + let init_params = b"test-init-params"; + + // Spin up + let spin_up_result = aws + .spin_up( + &job, + instance_type, + region, + req_mem, + req_vcpu, + bandwidth, + image_url, + init_params, + ) + .await; + assert!(spin_up_result.is_ok(), "Spin up failed: {:?}", spin_up_result.err()); + + // Check if running + let is_running = aws + .check_enclave_running(&job, region) + .await + .expect("Failed to check if enclave is running"); + assert!(is_running, "Enclave should be running after spin up"); + + // Get job IP + let job_ip_result = aws.get_job_ip(&job, region).await; + assert!(job_ip_result.is_ok(), "Get job IP failed: {:?}", job_ip_result.err()); + let job_ip = job_ip_result.unwrap(); + println!("Job IP: {}", job_ip); + + // Spin down + let spin_down_result = aws.spin_down(&job, region).await; + assert!(spin_down_result.is_ok(), "Spin down failed: {:?}", spin_down_result.err()); + + // Check if not running + let is_running_after_down = aws + .check_enclave_running(&job, region) .await - .context("could not get current enclace state")?; - // There can be 2 states - RUNNING or TERMINATING - Ok(res == "RUNNING") + .expect("Failed to check if enclave is running after spin down"); + assert!(!is_running_after_down, "Enclave should not be running after spin down"); } -} +} \ No newline at end of file diff --git a/operator/control-plane/src/enclaver.rs b/operator/control-plane/src/enclaver.rs deleted file mode 100644 index 0f6c47ec3..000000000 --- a/operator/control-plane/src/enclaver.rs +++ /dev/null @@ -1,57 +0,0 @@ -use cp::aws; - -use anyhow::Context; -use anyhow::Result; -use clap::Parser; - -#[derive(Parser)] -#[clap(author, version, about, long_about = None)] -/// Control plane for Oyster -struct Cli { - /// AWS profile - #[clap(long, value_parser)] - profile: String, - - /// SSH key name - #[clap(long, value_parser)] - key_name: String, - - /// Instance id - #[clap(long, value_parser)] - instance: String, - - /// AWS region - #[clap(long, value_parser, default_value = "ap-south-1")] - region: String, - - /// AMI family - #[clap(long, value_parser, default_value = "salmon")] - family: String, - - /// Enclave image URL - #[clap(long, value_parser)] - url: String, -} - -#[tokio::main] -pub async fn main() -> Result<()> { - let cli = Cli::parse(); - - let aws = aws::Aws::new(cli.profile, &[cli.region.clone()], cli.key_name, None, None).await; - aws.run_enclave_impl( - "0x01020304", - &cli.family, - &cli.instance, - &cli.region, - &cli.url, - 2, - 4096, - 32, - false, - &[], - ) - .await - .context("could not deploy enclave")?; - - Ok(()) -} diff --git a/operator/control-plane/src/main.rs b/operator/control-plane/src/main.rs index 76a4f893c..faa883887 100644 --- a/operator/control-plane/src/main.rs +++ b/operator/control-plane/src/main.rs @@ -149,7 +149,7 @@ async fn run() -> Result<()> { info!(?cli.address_whitelist); info!(?cli.port); - let regions: Vec = cli.regions.split(',').map(|r| (r.into())).collect(); + let regions: Vec = cli.regions.split(',').map(|r| r.into()).collect(); let eif_whitelist = if !cli.whitelist.is_empty() { let eif_whitelist_vec: Vec = parse_file(cli.whitelist) diff --git a/operator/control-plane/src/market.rs b/operator/control-plane/src/market.rs index 004f5c7be..c7f694edf 100644 --- a/operator/control-plane/src/market.rs +++ b/operator/control-plane/src/market.rs @@ -57,7 +57,6 @@ pub trait InfraProvider { req_vcpu: i32, bandwidth: u64, image_url: &str, - debug: bool, init_params: &[u8], ) -> impl Future> + Send; @@ -85,7 +84,6 @@ where req_vcpu: i32, bandwidth: u64, image_url: &str, - debug: bool, init_params: &[u8], ) -> Result<()> { (**self) @@ -97,7 +95,6 @@ where req_vcpu, bandwidth, image_url, - debug, init_params, ) .await @@ -600,7 +597,6 @@ struct JobState<'a> { region: String, req_vcpus: i32, req_mem: i64, - debug: bool, init_params: Box<[u8]>, // whether instance should exist or not @@ -636,7 +632,6 @@ impl<'a> JobState<'a> { region: "ap-south-1".to_string(), req_vcpus: 2, req_mem: 4096, - debug: false, init_params: Box::new([0; 0]), infra_state: false, infra_change_time: Instant::now(), @@ -727,7 +722,6 @@ impl<'a> JobState<'a> { self.req_vcpus, self.bandwidth, &self.eif_url, - self.debug, &self.init_params, ) .await; @@ -1071,8 +1065,6 @@ impl<'a> JobState<'a> { info!(self.req_vcpus, "Required vcpu"); } - let debug = metadata_json["debug"].as_bool().unwrap_or(false); - self.debug = debug; let Some(url) = metadata_json["url"].as_str() else { return Err(anyhow!("EIF url not found! Exiting job")); @@ -1322,13 +1314,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1374,13 +1364,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: b"some params".to_vec().into_boxed_slice(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1426,13 +1414,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: true, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1455,7 +1441,7 @@ mod tests { let job_id = format!("{:064x}", 1); let logs = vec![ - (0, Action::Open("{\"region\":\"ap-south-1\",\"url\":\"https://example.com/enclave.eif\",\"instance\":\"c6a.xlarge\",\"memory\":4096,\"vcpu\":2,\"family\":\"tuna\"}".to_string(),31000000000000u64,31000u64,0)), + (0, Action::Open("{\"region\":\"ap-south-1\",\"url\":\"https://example.com/enclave.eif\",\"instance\":\"c6a.xlarge\",\"memory\":4096,\"vcpu\":2}".to_string(),31000000000000u64,31000u64,0)), (301, Action::Close), ]; @@ -1478,13 +1464,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "tuna".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1533,13 +1517,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1589,13 +1571,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1882,13 +1862,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1937,13 +1915,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -1992,13 +1968,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2121,13 +2095,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2337,13 +2309,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/updated-enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2390,13 +2360,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2479,13 +2447,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: b"some params".to_vec().into_boxed_slice(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2532,13 +2498,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2585,13 +2549,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2601,82 +2563,11 @@ mod tests { time: start_time + Duration::from_secs(400), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/updated-enclave.eif".into(), - debug: false, - init_params: [].into(), - contract_address: "xyz".into(), - chain_id: "123".into(), - instance_id: compute_instance_id(0), - }), - TestAwsOutcome::SpinDown(test::SpinDownOutcome { - time: start_time + Duration::from_secs(505), - job: job_id, - region: "ap-south-1".into(), - }), - ], - }; - - run_test(start_time, logs, job_manager_params, test_results).await; - } - - #[tokio::test(start_paused = true)] - async fn test_debug_update_after_spin_up() { - let start_time = Instant::now(); - let job_id = format!("{:064x}", 1); - - let logs = vec![ - (0, Action::Open("{\"region\":\"ap-south-1\",\"url\":\"https://example.com/enclave.eif\",\"instance\":\"c6a.xlarge\",\"memory\":4096,\"vcpu\":2,\"debug\":true}".to_string(),31000000000000u64,31000u64,0)), - (400, Action::MetadataUpdated("{\"region\":\"ap-south-1\",\"url\":\"https://example.com/enclave.eif\",\"instance\":\"c6a.xlarge\",\"memory\":4096,\"vcpu\":2}".to_string())), - (505, Action::Close), - ]; - - let job_manager_params = JobManagerParams { - job_id: market::JobId { - id: job_id.clone(), - operator: "abc".into(), - contract: "xyz".into(), - chain: "123".into(), - }, - allowed_regions: vec!["ap-south-1".to_owned()], - address_whitelist: vec![], - address_blacklist: vec![], - }; - - let test_results = TestResults { - res: JobResult::Done, - outcomes: vec![ - TestAwsOutcome::SpinUp(test::SpinUpOutcome { - time: start_time + Duration::from_secs(300), - job: job_id.clone(), - instance_type: "c6a.xlarge".into(), - family: "salmon".into(), - region: "ap-south-1".into(), - req_mem: 4096, - req_vcpu: 2, - bandwidth: 76, - image_url: "https://example.com/enclave.eif".into(), - debug: true, - init_params: [].into(), - contract_address: "xyz".into(), - chain_id: "123".into(), - instance_id: compute_instance_id(0), - }), - TestAwsOutcome::SpinUp(test::SpinUpOutcome { - time: start_time + Duration::from_secs(400), - job: job_id.clone(), - instance_type: "c6a.xlarge".into(), - family: "salmon".into(), - region: "ap-south-1".into(), - req_mem: 4096, - req_vcpu: 2, - bandwidth: 76, - image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2724,13 +2615,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2777,13 +2666,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2793,13 +2680,11 @@ mod tests { time: start_time + Duration::from_secs(400), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: b"some params".to_vec().into_boxed_slice(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2846,13 +2731,11 @@ mod tests { time: start_time + Duration::from_secs(300), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), @@ -2862,13 +2745,11 @@ mod tests { time: start_time + Duration::from_secs(400), job: job_id.clone(), instance_type: "c6a.xlarge".into(), - family: "salmon".into(), region: "ap-south-1".into(), req_mem: 4096, req_vcpu: 2, bandwidth: 76, image_url: "https://example.com/enclave.eif".into(), - debug: false, init_params: [].into(), contract_address: "xyz".into(), chain_id: "123".into(), diff --git a/operator/control-plane/src/test.rs b/operator/control-plane/src/test.rs index f03237eae..f76b4cea4 100644 --- a/operator/control-plane/src/test.rs +++ b/operator/control-plane/src/test.rs @@ -19,13 +19,11 @@ pub struct SpinUpOutcome { pub time: Instant, pub job: String, pub instance_type: String, - pub family: String, pub region: String, pub req_mem: i64, pub req_vcpu: i32, pub bandwidth: u64, pub image_url: String, - pub debug: bool, pub init_params: Box<[u8]>, pub contract_address: String, pub chain_id: String, @@ -118,13 +116,11 @@ impl InfraProvider for TestAws { &mut self, job: &JobId, instance_type: &str, - family: &str, region: &str, req_mem: i64, req_vcpu: i32, bandwidth: u64, image_url: &str, - debug: bool, init_params: &[u8], ) -> Result<()> { let res = self.instances.get_key_value(&job.id); @@ -133,13 +129,11 @@ impl InfraProvider for TestAws { time: Instant::now(), job: job.id.clone(), instance_type: instance_type.to_owned(), - family: family.to_owned(), region: region.to_owned(), req_mem, req_vcpu, bandwidth, image_url: image_url.to_owned(), - debug, init_params: init_params.into(), contract_address: job.contract.clone(), chain_id: job.chain.clone(), @@ -159,13 +153,11 @@ impl InfraProvider for TestAws { time: Instant::now(), job: job.id.clone(), instance_type: instance_type.to_owned(), - family: family.to_owned(), region: region.to_owned(), req_mem, req_vcpu, bandwidth, image_url: image_url.to_owned(), - debug, init_params: init_params.into(), contract_address: job.contract.clone(), chain_id: job.chain.clone(), From 27fef142fd55b7055d0050aa51a8602a0eee3c13 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Fri, 21 Nov 2025 17:09:11 +0530 Subject: [PATCH 03/12] Add SSH connection and rate limiting configuration to AWS struct --- operator/control-plane/src/aws.rs | 482 +++++++++++++++++++++++++----- 1 file changed, 399 insertions(+), 83 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 35733e6b5..ad1096af1 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::fs::File; use std::io::{BufReader, Read, Write}; +use std::net::TcpStream; use std::path::Path; use std::str::FromStr; @@ -12,9 +13,10 @@ use coldsnap::{SnapshotUploader, SnapshotWaiter}; use rand_core::OsRng; use ssh_key::sha2::{Digest, Sha256}; use ssh_key::{Algorithm, LineEnding, PrivateKey}; +use ssh2::Session; use tokio::time::{sleep, Duration}; use tokio_stream::StreamExt; -use tracing::{error, info}; +use tracing::{debug, error, info}; use whoami::username; use crate::market::{InfraProvider, JobId}; @@ -180,6 +182,40 @@ impl Aws { .is_empty()) } + /* SSH UTILITY */ + + pub async fn ssh_connect(&self, ip_address: &str) -> Result { + let tcp = TcpStream::connect(ip_address)?; + + let mut sess = Session::new()?; + + sess.set_tcp_stream(tcp); + sess.handshake()?; + sess.userauth_pubkey_file("ubuntu", None, Path::new(&self.key_location), None)?; + info!(ip_address, "SSH connection established"); + Ok(sess) + } + + fn ssh_exec(sess: &Session, command: &str) -> Result<(String, String)> { + let mut channel = sess + .channel_session() + .context("Failed to get channel session")?; + let mut stdout = String::new(); + let mut stderr = String::new(); + channel + .exec(command) + .context("Failed to execute command: {command}")?; + channel + .read_to_string(&mut stdout) + .context("Failed to read stdout")?; + channel + .stderr() + .read_to_string(&mut stderr) + .context("Failed to read stderr")?; + channel.wait_close().context("Failed to wait for close")?; + + Ok((stdout, stderr)) + } // [UPDATE NOTE] This function is obsolete, no enclaves // pub async fn run_enclave_impl( @@ -238,7 +274,7 @@ impl Aws { /* AWS EC2 UTILITY */ // [UPDATE NOTE] Should return private IP, there won't be any Public IPs. - pub async fn get_instance_ip(&self, instance_id: &str, region: &str) -> Result { + pub async fn get_instance_public_ip(&self, instance_id: &str, region: &str) -> Result { Ok(self .client(region) .await @@ -384,11 +420,7 @@ impl Aws { .to_string()) } - async fn get_job_snapshot_id( - &self, - job: &JobId, - region: &str, - ) -> Result<(bool, String)> { + async fn get_job_snapshot_id(&self, job: &JobId, region: &str) -> Result<(bool, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -429,11 +461,7 @@ impl Aws { } } - async fn get_job_ami_id( - &self, - job: &JobId, - region: &str, - ) -> Result<(bool, String)> { + async fn get_job_ami_id(&self, job: &JobId, region: &str) -> Result<(bool, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -701,15 +729,17 @@ impl Aws { // [UPDATE NOTE] Associate IP address to secondary IP of gateway VM async fn associate_address( &self, - instance_id: &str, alloc_id: &str, region: &str, + eni_id: &str, + sec_id: &str, ) -> Result<()> { self.client(region) .await .associate_address() .allocation_id(alloc_id) - .instance_id(instance_id) + .network_interface_id(eni_id) + .private_ip_address(sec_id) .send() .await .context("could not associate elastic ip")?; @@ -779,12 +809,16 @@ impl Aws { // [UPDATE NOTE] Check AMI corresponding to given job. If dosen't exist then check if snapshot exists. // If doesn't exist download image upload as snapshot and register AMI. If snapshot exists register AMI from it. - let (ami_exist, mut ami_id) = self.get_job_ami_id(job, region).await + let (ami_exist, mut ami_id) = self + .get_job_ami_id(job, region) + .await .context("failed to get job ami")?; if !ami_exist { // check snapshot exists - let (snapshot_exist, mut snapshot_id) = self.get_job_snapshot_id(job, region).await + let (snapshot_exist, mut snapshot_id) = self + .get_job_snapshot_id(job, region) + .await .context("failed to get job snapshot")?; if !snapshot_exist { // 1. Download image in image_url to a tmp file @@ -792,30 +826,41 @@ impl Aws { // 3. Upload image as snapshot let tmp_file_path = format!("/tmp/image-{}.raw", job.id); - let mut tmp_file = File::create(&tmp_file_path) - .context(format!("Failed to create temporary file for image {}", tmp_file_path))?; + let mut tmp_file = File::create(&tmp_file_path).context(format!( + "Failed to create temporary file for image {}", + tmp_file_path + ))?; // Download the image from the image_url - let resp = reqwest::get(image_url) - .await - .context(format!("Failed to start download file from {} for job ID {}", image_url, job.id))?; + let resp = reqwest::get(image_url).await.context(format!( + "Failed to start download file from {} for job ID {}", + image_url, job.id + ))?; let mut stream = resp.bytes_stream(); while let Some(item) = stream.next().await { - let chunk = item.context(format!("Failed to read chunk from response stream for job ID {}", job.id))?; - tmp_file.write_all(&chunk) - .context(format!("Failed to write chunk to temporary file for job ID {}", job.id))?; + let chunk = item.context(format!( + "Failed to read chunk from response stream for job ID {}", + job.id + ))?; + tmp_file.write_all(&chunk).context(format!( + "Failed to write chunk to temporary file for job ID {}", + job.id + ))?; } - tmp_file.flush() - .context(format!("Failed to flush temporary file for job ID {}", job.id))?; + tmp_file.flush().context(format!( + "Failed to flush temporary file for job ID {}", + job.id + ))?; let mut hasher = Sha256::new(); let mut file = File::open(&tmp_file_path) .context("Failed to open temporary file for hashing")?; let mut buffer = [0; 8192]; loop { - let n = file.read(&mut buffer) + let n = file + .read(&mut buffer) .context("Failed to read temporary file")?; if n == 0 { break; @@ -845,13 +890,27 @@ impl Aws { } } - let uploader = SnapshotUploader::new(self.ebs_client(region).await.clone()); - let managed_tag = aws_sdk_ebs::types::Tag::builder().key("managedBy").value("marlin").build(); - let project_tag = aws_sdk_ebs::types::Tag::builder().key("project").value("oyster").build(); - let job_tag = aws_sdk_ebs::types::Tag::builder().key("jobId").value(&job.id).build(); - let operator_tag = aws_sdk_ebs::types::Tag::builder().key("operator").value(&job.operator).build(); - let chain_tag = aws_sdk_ebs::types::Tag::builder().key("chainID").value(&job.chain).build(); + let managed_tag = aws_sdk_ebs::types::Tag::builder() + .key("managedBy") + .value("marlin") + .build(); + let project_tag = aws_sdk_ebs::types::Tag::builder() + .key("project") + .value("oyster") + .build(); + let job_tag = aws_sdk_ebs::types::Tag::builder() + .key("jobId") + .value(&job.id) + .build(); + let operator_tag = aws_sdk_ebs::types::Tag::builder() + .key("operator") + .value(&job.operator) + .build(); + let chain_tag = aws_sdk_ebs::types::Tag::builder() + .key("chainID") + .value(&job.chain) + .build(); let contract_tag = aws_sdk_ebs::types::Tag::builder() .key("contractAddress") .value(&job.contract) @@ -865,14 +924,25 @@ impl Aws { contract_tag, chain_tag, ]; - snapshot_id = uploader.upload_from_file(Path::new(&tmp_file_path), None, None, Some(snapshot_tags), None, None, None).await + snapshot_id = uploader + .upload_from_file( + Path::new(&tmp_file_path), + None, + None, + Some(snapshot_tags), + None, + None, + None, + ) + .await .context("Failed to upload snapshot from image file")?; info!(snapshot_id, "Snapshot uploaded"); let waiter = SnapshotWaiter::new(self.client(region).await.clone()); - waiter.wait_for_completed(snapshot_id.as_str()).await + waiter + .wait_for_completed(snapshot_id.as_str()) + .await .context("Failed to wait for snapshot completion")?; info!(snapshot_id, "Snapshot is now completed"); - } // Register AMI from snapshot @@ -883,7 +953,7 @@ impl Aws { .snapshot_id(snapshot_id.clone()) .build(), ) - .build(); + .build(); let instance_type = InstanceType::from_str(instance_type).context("cannot parse instance type")?; @@ -907,7 +977,9 @@ impl Aws { info!(architecture); } } - let resp = self.client(region).await + let resp = self + .client(region) + .await .register_image() .name(format!("marlin/oyster/job-{}", job.id)) .architecture(FromStr::from_str(&architecture)?) @@ -919,41 +991,46 @@ impl Aws { .tag_specifications( TagSpecification::builder() .resource_type(ResourceType::Image) + .tags(Tag::builder().key("managedBy").value("marlin").build()) + .tags(Tag::builder().key("project").value("oyster").build()) + .tags(Tag::builder().key("jobId").value(&job.id).build()) + .tags(Tag::builder().key("operator").value(&job.operator).build()) .tags( - Tag::builder().key("managedBy").value("marlin").build(), - ) - .tags( - Tag::builder().key("project").value("oyster").build(), - ) - .tags( - Tag::builder().key("jobId").value(&job.id).build(), - ) - .tags( - Tag::builder().key("operator").value(&job.operator).build(), + Tag::builder() + .key("contractAddress") + .value(&job.contract) + .build(), ) - .tags( - Tag::builder().key("contractAddress").value(&job.contract).build(), - ) - .tags( - Tag::builder().key("chainID").value(&job.chain).build(), - ) - .build() + .tags(Tag::builder().key("chainID").value(&job.chain).build()) + .build(), ) .send() .await - .context(format!("Failed to register AMI from snapshot {} for job {}", snapshot_id, job.id))?; + .context(format!( + "Failed to register AMI from snapshot {} for job {}", + snapshot_id, job.id + ))?; - ami_id = resp.image_id() + ami_id = resp + .image_id() .ok_or(anyhow!("could not parse image id"))? .to_string(); } if !exist { // either no old instance or old instance was not enough, launch new one - self - .spin_up_instance(job, instance_type, region, req_mem, req_vcpu, init_params, ami_id.as_str()) - .await - .context("failed to spin up instance")?; + self.spin_up_instance( + job, + instance_type, + region, + req_mem, + req_vcpu, + init_params, + ami_id.as_str(), + bandwidth, + ) + .await + .context("failed to spin up instance")?; } Ok(()) @@ -978,7 +1055,6 @@ impl Aws { // .context("failed to run enclave") } - // [UPDATE NOTE] New things to add: // 1. Pick AMI corresponding to given image_url // 2. Setup user data @@ -991,6 +1067,7 @@ impl Aws { req_vcpu: i32, init_params: &[u8], ami_id: &str, + bandwidth: u64, ) -> Result { let instance_type = InstanceType::from_str(instance_type).context("cannot parse instance type")?; @@ -1030,7 +1107,7 @@ impl Aws { .context("could not launch instance")?; sleep(Duration::from_secs(100)).await; - let res = self.post_spin_up(job, &instance, region).await; + let res = self.post_spin_up(job, &instance, region, bandwidth).await; if let Err(err) = res { error!(?err, "Error during post spin up"); @@ -1042,19 +1119,248 @@ impl Aws { Ok(instance) } - async fn post_spin_up(&self, job: &JobId, instance: &str, region: &str) -> Result<()> { + async fn post_spin_up( + &self, + job: &JobId, + instance_id: &str, + region: &str, + bandwidth: u64, + ) -> Result<()> { + // [Update Note] do the networking here + // Allocate Elastic IP + // Check capacity on existing Rate Limit VM + // Create secondary IP on Rate Limit VM + // Modifictations on Rate Limit VM with NAT and tc + // associate secondary IP and Elastic IP + // Return let (alloc_id, ip) = self .allocate_ip_addr(job, region) .await .context("error allocating ip address")?; info!(ip, "Elastic Ip allocated"); + + let (rl_instance_id, sec_ip, eni_id) = self + .select_rate_limiter(region, bandwidth) + .await + .context("could not select rate limiter")?; + info!(sec_ip, "Secondary IP allocated on Rate Limiter"); + + self.configure_rate_limiter( + &instance_id, + &rl_instance_id, + &sec_ip, + &eni_id, + bandwidth, + region, + ) + .await + .context("could not configure rate limiter")?; - self.associate_address(instance, &alloc_id, region) + + self.associate_address(&alloc_id, region, &eni_id, &sec_ip) .await .context("could not associate ip address")?; Ok(()) } + async fn configure_rate_limiter( + &self, + instance_id: &str, + rl_instance_id: &str, + sec_ip: &str, + eni_id: &str, + bandwidth: u64, + region: &str, + ) -> Result<()> { + // TODO: rollback on failure + // SSH into Rate Limiter instance and configure NAT and tc + let rl_ip = self + .get_instance_public_ip(rl_instance_id, region) + .await + .context("could not get rate limiter instance ip")?; + + let sess = &self + .ssh_connect(&(rl_ip + ":22")) + .await + .context("error establishing ssh connection")?; + + // Get instance private IP + let private_ip = self + .get_instance_private_ip(instance_id, region) + .await + .context("could not get instance private ip")?; + + // OPTION: Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup + // everything + // setup NAT + let nat_cmd = format!( + "sudo nft add rule ip raw prerouting ip saddr {} notrack ip saddr set {} && \ +sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", + private_ip, sec_ip, sec_ip, private_ip + ); + let (_, stderr) = Self::ssh_exec(sess, &nat_cmd).context("Failed to run nftable command")?; + + if !stderr.is_empty() { + error!(stderr = ?stderr, "Error setting up NAT on Rate Limiter"); + // TODO: rollback on failure + return Err(anyhow!(stderr)).context("Error setting up NAT on Rate Limiter"); + } + + // setup ip route table + let ip_rule_cmd = format!( + "sudo ip rule add from {} table {}", + sec_ip, eni_id + ); + // TODO: rollback on failure + let (_, stderr) = Self::ssh_exec(sess, &ip_rule_cmd).context("Failed to run ip rule command")?; + if !stderr.is_empty() { + error!(stderr = ?stderr, "Error setting up IP rule on Rate Limiter"); + return Err(anyhow!(stderr)).context("Error setting up IP rule on Rate Limiter"); + } + // setup tc + // TODO: rollback on failure + // TODO: get unique non-existent class_id + // TODO: get device name from eni_id + let class_id = 1; + let tc_class_cmd = format!( + "tc class add dev ens5 parent 1: classid 1:{} htb rate {} burst 15k", + class_id, + bandwidth + ); + let tc_filter_cmd = format!( + "tc filter add dev ens5 protocol ip parent 1:0 prio 1 u32 match ip src {} flowid 1:{}", + sec_ip, + class_id + ); + + let (_, stderr) = Self::ssh_exec(sess, &tc_class_cmd).context("Failed to run tc class command")?; + if !stderr.is_empty() { + error!(stderr = ?stderr, "Error setting up tc class on Rate Limiter"); + return Err(anyhow!(stderr)).context("Error setting up tc class on Rate Limiter"); + } + let (_, stderr) = Self::ssh_exec(sess, &tc_filter_cmd).context("Failed to run tc filter command")?; + if !stderr.is_empty() { + error!(stderr = ?stderr, "Error setting up tc filter on Rate Limiter"); + return Err(anyhow!(stderr)).context("Error setting up tc filter on Rate Limiter"); + } + + + Ok(()) + + } + + async fn get_instance_private_ip(&self, instance_id: &str, region: &str) -> Result { + Ok(self + .client(region) + .await + .describe_instances() + .filters( + Filter::builder() + .name("instance-id") + .values(instance_id) + .build(), + ) + .send() + .await + .context("could not describe instances")? + // response parsing from here + .reservations() + .first() + .ok_or(anyhow!("no reservation found"))? + .instances() + .first() + .ok_or(anyhow!("no instances with the given id"))? + .private_ip_address() + .ok_or(anyhow!("could not parse private ip address"))? + .to_string()) + } + + async fn select_rate_limiter(&self, region: &str, bandwidth: u64) -> Result<(String, String, String)> { + // get all the rate limiter vm from region + // check available bandwidth and secondary IP is allowed + // [Note] TODO manage concurrency resource issue + // bandwidth is in kbit/sec + let project_filter = Filter::builder() + .name("tag:project") + .values("oyster") + .build(); + let rl_filter = Filter::builder() + .name("tag:type") + .values("rate-limiter") + .build(); + let res = self + .client(region) + .await + .describe_instances() + .filters(project_filter) + .filters(rl_filter) + .send() + .await + .context("could not describe rate limit instances")?; + + let reservations = res.reservations(); + for reservation in reservations { + for instance in reservation.instances() { + let instance_id = instance + .instance_id() + .ok_or(anyhow!("could not parse instance id"))? + .to_string(); + // [TODO] atomically check & reserve available bandwidth (its kbit/sec, RL script takes in bits/sec) + // attach a secondary IP to instance + if instance.network_interfaces.is_none() { + debug!( + "No network interfaces found Rate Limit instance [{}]", + instance_id + ); + continue; + } + for eni in instance.network_interfaces() { + if let Some(eni_id) = eni.network_interface_id() { + let res = self + .client(region) + .await + .assign_private_ip_addresses() + .network_interface_id(eni_id) + .secondary_private_ip_address_count(1) + .send() + .await; + if let Ok(assigned_ip) = res { + if assigned_ip.assigned_private_ip_addresses.is_none() { + debug!( + "No secondary private IP address assigned Rate Limit instance [{}], ENI [{}]", + instance_id, + eni_id + ); + continue; + } else { + let sec_ip = assigned_ip + .assigned_private_ip_addresses() + .first() + .ok_or(anyhow!("no assigned private ip address found"))? + .private_ip_address() + .ok_or(anyhow!("no private ip address found"))? + .to_string(); + return Ok((instance_id, sec_ip, eni_id.to_string())); + } + } else { + debug!( + ?res, + "Error assigning secondary private IP address Rate Limit instance [{}], ENI [{}]", + instance_id, + eni_id + ); + continue; + } + } + } + + } + } + Err(anyhow!( + "no rate limiter instance found with enough available bandwidth" + )) + } + async fn spin_down_impl(&self, job: &JobId, region: &str) -> Result<()> { let (exist, instance, state) = self .get_job_instance_id(job, region) @@ -1075,7 +1381,7 @@ impl Aws { Ok(()) } - + // TODO: manage RL VM ops for remove VM pub async fn spin_down_instance( &self, instance_id: &str, @@ -1142,7 +1448,7 @@ impl InfraProvider for Aws { } // [UPDATE NOTE] Due to Gateway VM rate limit, instance IP won't be equal to elastic IP. Instead, Gateway VM - // secondary IPs are used. + // secondary IPs are used. async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { let instance = self .get_job_instance_id(job, region) @@ -1153,20 +1459,14 @@ impl InfraProvider for Aws { return Err(anyhow!("Instance not found for job - {}", job.id)); } - let instance_ip = self - .get_instance_ip(&instance.1, region) - .await - .context("could not get instance ip")?; - let (found, _, elastic_ip) = self .get_job_elastic_ip(job, region) .await .context("could not get job elastic ip")?; - // it is possible for the two above to differ while the instance is initializing (maybe - // terminating?), better to error out instead of potentially showing a temporary IP - if found && instance_ip == elastic_ip { - return Ok(instance_ip); + // It is possible that instance is still initializing and elastic IP is not yet associated + if found { + return Ok(elastic_ip); } Err(anyhow!("Instance is still initializing")) @@ -1200,8 +1500,9 @@ mod tests { &["ap-southeast-2".to_string()], "cp".to_string(), None, - None - ).await; + None, + ) + .await; let job = JobId { id: "test-job".to_string(), operator: "test-operator".to_string(), @@ -1229,7 +1530,11 @@ mod tests { init_params, ) .await; - assert!(spin_up_result.is_ok(), "Spin up failed: {:?}", spin_up_result.err()); + assert!( + spin_up_result.is_ok(), + "Spin up failed: {:?}", + spin_up_result.err() + ); // Check if running let is_running = aws @@ -1240,19 +1545,30 @@ mod tests { // Get job IP let job_ip_result = aws.get_job_ip(&job, region).await; - assert!(job_ip_result.is_ok(), "Get job IP failed: {:?}", job_ip_result.err()); + assert!( + job_ip_result.is_ok(), + "Get job IP failed: {:?}", + job_ip_result.err() + ); let job_ip = job_ip_result.unwrap(); println!("Job IP: {}", job_ip); // Spin down let spin_down_result = aws.spin_down(&job, region).await; - assert!(spin_down_result.is_ok(), "Spin down failed: {:?}", spin_down_result.err()); + assert!( + spin_down_result.is_ok(), + "Spin down failed: {:?}", + spin_down_result.err() + ); // Check if not running let is_running_after_down = aws .check_enclave_running(&job, region) .await .expect("Failed to check if enclave is running after spin down"); - assert!(!is_running_after_down, "Enclave should not be running after spin down"); + assert!( + !is_running_after_down, + "Enclave should not be running after spin down" + ); } -} \ No newline at end of file +} From e2b7eb9af1b81d0535f2fe6dc93866aab4102c42 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Mon, 1 Dec 2025 14:23:03 +0530 Subject: [PATCH 04/12] Add rate limiter config workflow --- operator/control-plane/Cargo.lock | 5 +- operator/control-plane/src/aws.rs | 366 ++++++++++++++++++--------- operator/control-plane/src/market.rs | 10 +- operator/control-plane/src/test.rs | 2 +- 4 files changed, 257 insertions(+), 126 deletions(-) diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index 3b993f9f8..c69161ea5 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -443,9 +443,9 @@ dependencies = [ [[package]] name = "aws-sdk-ec2" -version = "1.117.0" +version = "1.138.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90decc9e656577c7f61085483936209be42a167064ef93a98801b527cf1851f0" +checksum = "acdab897b457b28c20cb0cb2ac4da294644f6187dc5136143780e935a3a24399" dependencies = [ "aws-credential-types", "aws-runtime", @@ -460,7 +460,6 @@ dependencies = [ "aws-types", "fastrand", "http 0.2.12", - "once_cell", "regex-lite", "tracing", ] diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index ad1096af1..e87abc13a 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -16,7 +16,7 @@ use ssh_key::{Algorithm, LineEnding, PrivateKey}; use ssh2::Session; use tokio::time::{sleep, Duration}; use tokio_stream::StreamExt; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use whoami::username; use crate::market::{InfraProvider, JobId}; @@ -273,7 +273,6 @@ impl Aws { // } /* AWS EC2 UTILITY */ - // [UPDATE NOTE] Should return private IP, there won't be any Public IPs. pub async fn get_instance_public_ip(&self, instance_id: &str, region: &str) -> Result { Ok(self .client(region) @@ -587,8 +586,8 @@ impl Aws { } async fn allocate_ip_addr(&self, job: &JobId, region: &str) -> Result<(String, String)> { - let (exist, alloc_id, public_ip) = self - .get_job_elastic_ip(job, region) + let (exist, alloc_id, public_ip, _, _, _, _) = self + .get_job_elastic_ip(job, region, false) .await .context("could not get elastic ip for job")?; @@ -636,11 +635,13 @@ impl Aws { )) } + // if with_association is true means, caller expected this elastic associated and return association details async fn get_job_elastic_ip( &self, job: &JobId, region: &str, - ) -> Result<(bool, String, String)> { + with_association: bool, + ) -> Result<(bool, String, String, String, String, String, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -671,18 +672,72 @@ impl Aws { .addresses() .first() { - None => (false, String::new(), String::new()), - Some(addrs) => ( - true, - addrs - .allocation_id() - .ok_or(anyhow!("could not parse allocation id"))? - .to_string(), - addrs - .public_ip() - .ok_or(anyhow!("could not parse public ip"))? - .to_string(), + None => ( + false, + String::new(), + String::new(), + String::new(), + String::new(), + String::new(), + String::new(), ), + Some(addrs) => { + if with_association == false { + ( + true, + addrs + .allocation_id() + .ok_or(anyhow!("could not parse allocation id"))? + .to_string(), + addrs + .public_ip() + .ok_or(anyhow!("could not parse public ip"))? + .to_string(), + String::new(), + String::new(), + String::new(), + String::new(), + ) + } else if addrs.association_id().is_none() { + ( + false, + String::new(), + String::new(), + String::new(), + String::new(), + String::new(), + String::new(), + ) + } else { + ( + true, + addrs + .allocation_id() + .ok_or(anyhow!("could not parse allocation id"))? + .to_string(), + addrs + .public_ip() + .ok_or(anyhow!("could not parse public ip"))? + .to_string(), + addrs + .private_ip_address() + .ok_or(anyhow!("could not parse private ip"))? + .to_string(), + addrs + .association_id() + .ok_or(anyhow!("could not parse association id"))? + .to_string(), + addrs + .instance_id() + .ok_or(anyhow!("could not parse instance id"))? + .to_string(), + addrs + .network_interface_id() + .ok_or(anyhow!("could not parse network interface id"))? + .to_string(), + ) + } + }, }, ) } @@ -740,6 +795,7 @@ impl Aws { .allocation_id(alloc_id) .network_interface_id(eni_id) .private_ip_address(sec_id) + .allow_reassociation(true) .send() .await .context("could not associate elastic ip")?; @@ -793,7 +849,7 @@ impl Aws { } else if state == "stopping" || state == "stopped" { // instance unhealthy, terminate info!(instance, "Found existing unhealthy instance"); - self.spin_down_instance(&instance, job, region) + self.spin_down_instance(&instance, job, region, bandwidth) .await .context("failed to terminate instance")?; @@ -986,6 +1042,7 @@ impl Aws { .root_device_name("/dev/xvda") .block_device_mappings(block_dev_mapping) .tpm_support(TpmSupportValues::V20) + .ena_support(true) .virtualization_type("hvm".to_string()) .boot_mode(BootModeValues::Uefi) .tag_specifications( @@ -1111,7 +1168,7 @@ impl Aws { if let Err(err) = res { error!(?err, "Error during post spin up"); - self.spin_down_instance(&instance, job, region) + self.spin_down_instance(&instance, job, region, bandwidth) .await .context("could not spin down instance after error during post spin up")?; return Err(err).context("error during post spin up"); @@ -1139,23 +1196,10 @@ impl Aws { .context("error allocating ip address")?; info!(ip, "Elastic Ip allocated"); - let (rl_instance_id, sec_ip, eni_id) = self - .select_rate_limiter(region, bandwidth) + let (sec_ip, eni_id) = self + .select_rate_limiter(region, bandwidth, instance_id) .await .context("could not select rate limiter")?; - info!(sec_ip, "Secondary IP allocated on Rate Limiter"); - - self.configure_rate_limiter( - &instance_id, - &rl_instance_id, - &sec_ip, - &eni_id, - bandwidth, - region, - ) - .await - .context("could not configure rate limiter")?; - self.associate_address(&alloc_id, region, &eni_id, &sec_ip) .await @@ -1168,8 +1212,9 @@ impl Aws { instance_id: &str, rl_instance_id: &str, sec_ip: &str, - eni_id: &str, - bandwidth: u64, + eni_mac: &str, + bandwidth: u64, // in kbit/sec + instance_bandwidth_limit: u64, region: &str, ) -> Result<()> { // TODO: rollback on failure @@ -1192,59 +1237,19 @@ impl Aws { // OPTION: Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup // everything - // setup NAT - let nat_cmd = format!( - "sudo nft add rule ip raw prerouting ip saddr {} notrack ip saddr set {} && \ -sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", - private_ip, sec_ip, sec_ip, private_ip + let add_rl_cmd = format!( + "sudo ~/add_rl.sh {} {} {} {} {}", + sec_ip, private_ip, eni_mac, bandwidth * 1000, instance_bandwidth_limit ); - let (_, stderr) = Self::ssh_exec(sess, &nat_cmd).context("Failed to run nftable command")?; - if !stderr.is_empty() { - error!(stderr = ?stderr, "Error setting up NAT on Rate Limiter"); - // TODO: rollback on failure - return Err(anyhow!(stderr)).context("Error setting up NAT on Rate Limiter"); - } + let (_, stderr) = Self::ssh_exec(sess, &add_rl_cmd).context("Failed to run add_rl.sh command")?; - // setup ip route table - let ip_rule_cmd = format!( - "sudo ip rule add from {} table {}", - sec_ip, eni_id - ); - // TODO: rollback on failure - let (_, stderr) = Self::ssh_exec(sess, &ip_rule_cmd).context("Failed to run ip rule command")?; if !stderr.is_empty() { - error!(stderr = ?stderr, "Error setting up IP rule on Rate Limiter"); - return Err(anyhow!(stderr)).context("Error setting up IP rule on Rate Limiter"); - } - // setup tc - // TODO: rollback on failure - // TODO: get unique non-existent class_id - // TODO: get device name from eni_id - let class_id = 1; - let tc_class_cmd = format!( - "tc class add dev ens5 parent 1: classid 1:{} htb rate {} burst 15k", - class_id, - bandwidth - ); - let tc_filter_cmd = format!( - "tc filter add dev ens5 protocol ip parent 1:0 prio 1 u32 match ip src {} flowid 1:{}", - sec_ip, - class_id - ); - - let (_, stderr) = Self::ssh_exec(sess, &tc_class_cmd).context("Failed to run tc class command")?; - if !stderr.is_empty() { - error!(stderr = ?stderr, "Error setting up tc class on Rate Limiter"); - return Err(anyhow!(stderr)).context("Error setting up tc class on Rate Limiter"); - } - let (_, stderr) = Self::ssh_exec(sess, &tc_filter_cmd).context("Failed to run tc filter command")?; - if !stderr.is_empty() { - error!(stderr = ?stderr, "Error setting up tc filter on Rate Limiter"); - return Err(anyhow!(stderr)).context("Error setting up tc filter on Rate Limiter"); + error!(stderr = ?stderr, "Error setting up Rate Limiter"); + // TODO: rollback on failure + return Err(anyhow!(stderr)).context("Error setting up Rate Limiter"); } - Ok(()) } @@ -1275,10 +1280,15 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", .to_string()) } - async fn select_rate_limiter(&self, region: &str, bandwidth: u64) -> Result<(String, String, String)> { + // TODO: update the route table of user subnet to send traffic via rate limiter instance + async fn select_rate_limiter( + &self, + region: &str, + bandwidth: u64, + instance_id: &str, + ) -> Result<(String, String)> { // get all the rate limiter vm from region // check available bandwidth and secondary IP is allowed - // [Note] TODO manage concurrency resource issue // bandwidth is in kbit/sec let project_filter = Filter::builder() .name("tag:project") @@ -1301,21 +1311,29 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", let reservations = res.reservations(); for reservation in reservations { for instance in reservation.instances() { - let instance_id = instance + let rl_instance_id = instance .instance_id() .ok_or(anyhow!("could not parse instance id"))? .to_string(); - // [TODO] atomically check & reserve available bandwidth (its kbit/sec, RL script takes in bits/sec) // attach a secondary IP to instance if instance.network_interfaces.is_none() { debug!( "No network interfaces found Rate Limit instance [{}]", - instance_id + rl_instance_id ); continue; } + let instance_bandwidth_limit: u64 = 10e10 as u64; // TODO fetch from tag or instance metadata for eni in instance.network_interfaces() { + if let Some(eni_id) = eni.network_interface_id() { + let Some(eni_mac) = eni.mac_address() else { + debug!( + "MAC address not found for ENI {}. Skipping ENI", + eni_id + ); + continue; + }; let res = self .client(region) .await @@ -1328,7 +1346,7 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", if assigned_ip.assigned_private_ip_addresses.is_none() { debug!( "No secondary private IP address assigned Rate Limit instance [{}], ENI [{}]", - instance_id, + rl_instance_id, eni_id ); continue; @@ -1340,13 +1358,34 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", .private_ip_address() .ok_or(anyhow!("no private ip address found"))? .to_string(); - return Ok((instance_id, sec_ip, eni_id.to_string())); + + // RL IP, secondary IP, + if self.configure_rate_limiter( + instance_id, + &rl_instance_id, + &sec_ip, + eni_mac, + bandwidth, + instance_bandwidth_limit, + region + ).await.is_err() { + warn!( + "Error configuring Rate Limit instance [{}], ENI [{}]", + rl_instance_id, + eni_id + ); + self.unassign_secondary_ip(eni_id, sec_ip.as_str(), region) + .await + .context("could not unassign secondary ip")?; + continue; + } + return Ok((sec_ip, eni_id.to_string())); } } else { debug!( ?res, "Error assigning secondary private IP address Rate Limit instance [{}], ENI [{}]", - instance_id, + rl_instance_id, eni_id ); continue; @@ -1361,7 +1400,7 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", )) } - async fn spin_down_impl(&self, job: &JobId, region: &str) -> Result<()> { + async fn spin_down_impl(&self, job: &JobId, region: &str, bandwidth: u64) -> Result<()> { let (exist, instance, state) = self .get_job_instance_id(job, region) .await @@ -1369,47 +1408,129 @@ sudo nft add rule ip raw prerouting ip daddr {} notrack ip daddr set {}", if !exist || state == "shutting-down" || state == "terminated" { // instance does not really exist anyway, we are done + // TODO: cleanup required for RL config and elastic IP? info!("Instance does not exist or is already terminated"); return Ok(()); } // terminate instance info!(instance, "Terminating existing instance"); - self.spin_down_instance(&instance, job, region) + self.spin_down_instance(&instance, job, region, bandwidth) .await .context("failed to terminate instance")?; Ok(()) } - // TODO: manage RL VM ops for remove VM - pub async fn spin_down_instance( + + async fn remove_rate_limiter_config( + &self, + rl_instance_id: &str, + sec_ip: &str, + private_ip: &str, + eni_mac: &str, + bandwidth: u64, // in kbit/sec + ) -> Result<()> { + let rl_ip = self + .get_instance_public_ip(rl_instance_id, "ap-southeast-2") + .await + .context("could not get rate limiter instance ip")?; + + let sess = &self + .ssh_connect(&(rl_ip + ":22")) + .await + .context("error establishing ssh connection")?; + + let remove_rl_cmd = format!( + "sudo ~/remove_rl.sh {} {} {} {}", + sec_ip, private_ip, eni_mac, bandwidth * 1000 + ); + + let (_, stderr) = Self::ssh_exec(sess, &remove_rl_cmd) + .context("Failed to run remove_rl.sh command")?; + + if !stderr.is_empty() { + error!(stderr = ?stderr, "Error removing Rate Limiter configuration"); + } + return Ok(()); + } + + async fn get_eni_mac_address(&self, eni_id: &str, region: &str) -> Result { + Ok(self + .client(region) + .await + .describe_network_interfaces() + .network_interface_ids(eni_id) + .send() + .await + .context("could not describe network interfaces")? + // response parsing from here + .network_interfaces() + .first() + .ok_or(anyhow!("no network interface found"))? + .mac_address() + .ok_or(anyhow!("could not parse mac address"))? + .to_string()) + } + + async fn unassign_secondary_ip( + &self, + eni_id: &str, + sec_ip: &str, + region: &str, + ) -> Result<()> { + self.client(region) + .await + .unassign_private_ip_addresses() + .network_interface_id(eni_id) + .private_ip_addresses(sec_ip) + .send() + .await + .context("could not unassign secondary private ip address")?; + Ok(()) + } + + // TODO: handle all error cases, continue cleanup even if some steps fail or will it be retried later? + async fn spin_down_instance( &self, instance_id: &str, job: &JobId, region: &str, + bandwidth: u64, ) -> Result<()> { - let (exist, _, association_id) = self - .get_instance_elastic_ip(instance_id, region) + let (exist, alloc_id, _, sec_ip, association_id, rl_instance_id, eni_id) = self + .get_job_elastic_ip(job, region, true) .await - .context("could not get elastic ip of instance")?; + .context("could not get elastic ip of job")?; + if exist { self.disassociate_address(association_id.as_str(), region) .await .context("could not disassociate address")?; - } - self.terminate_instance(instance_id, region) - .await - .context("could not terminate instance")?; - let (exist, alloc_id, _) = self - .get_job_elastic_ip(job, region) - .await - .context("could not get elastic ip of job")?; - if exist { + let eni_mac = self + .get_eni_mac_address(eni_id.as_str(), region) + .await + .context("could not get eni mac address")?; + let private_ip = self.get_instance_private_ip(instance_id, region) + .await + .context("could not get private ip of instance")?; + + self.remove_rate_limiter_config(&rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) + .await + .context("could not remove rate limiter config")?; + + self.unassign_secondary_ip(eni_id.as_str(), sec_ip.as_str(), region) + .await + .context("could not unassign secondary ip")?; + self.release_address(alloc_id.as_str(), region) .await .context("could not release address")?; info!("Elastic IP released"); } + + self.terminate_instance(instance_id, region) + .await + .context("could not terminate instance")?; Ok(()) } @@ -1441,8 +1562,8 @@ impl InfraProvider for Aws { .context("could not spin up enclave") } - async fn spin_down(&mut self, job: &JobId, region: &str) -> Result<()> { - self.spin_down_impl(job, region) + async fn spin_down(&mut self, job: &JobId, region: &str, bandwidth: u64) -> Result<()> { + self.spin_down_impl(job, region, bandwidth) .await .context("could not spin down enclave") } @@ -1459,8 +1580,8 @@ impl InfraProvider for Aws { return Err(anyhow!("Instance not found for job - {}", job.id)); } - let (found, _, elastic_ip) = self - .get_job_elastic_ip(job, region) + let (found, _, elastic_ip, _, _, _, _) = self + .get_job_elastic_ip(job, region, true) .await .context("could not get job elastic ip")?; @@ -1489,16 +1610,27 @@ impl InfraProvider for Aws { // write a test module for AWS struct spin up function #[cfg(test)] mod tests { + use tracing_subscriber::EnvFilter; + use super::*; use crate::market::InfraProvider; use crate::market::JobId; #[tokio::test] async fn test_aws_spin_up_down() { + let mut filter = EnvFilter::new("info,aws_config=warn"); + if let Ok(var) = std::env::var("RUST_LOG") { + filter = filter.add_directive(var.parse().unwrap()); + } + tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .with_env_filter(filter) + .init(); + let mut aws = Aws::new( "cp".to_string(), &["ap-southeast-2".to_string()], - "cp".to_string(), + "rlgen".to_string(), None, None, ) @@ -1537,11 +1669,11 @@ mod tests { ); // Check if running - let is_running = aws - .check_enclave_running(&job, region) - .await - .expect("Failed to check if enclave is running"); - assert!(is_running, "Enclave should be running after spin up"); + // let is_running = aws + // .check_enclave_running(&job, region) + // .await + // .expect("Failed to check if enclave is running"); + // assert!(is_running, "Enclave should be running after spin up"); // Get job IP let job_ip_result = aws.get_job_ip(&job, region).await; @@ -1554,7 +1686,7 @@ mod tests { println!("Job IP: {}", job_ip); // Spin down - let spin_down_result = aws.spin_down(&job, region).await; + let spin_down_result = aws.spin_down(&job, region, bandwidth).await; assert!( spin_down_result.is_ok(), "Spin down failed: {:?}", diff --git a/operator/control-plane/src/market.rs b/operator/control-plane/src/market.rs index c7f694edf..f922cf4ea 100644 --- a/operator/control-plane/src/market.rs +++ b/operator/control-plane/src/market.rs @@ -60,7 +60,7 @@ pub trait InfraProvider { init_params: &[u8], ) -> impl Future> + Send; - fn spin_down(&mut self, job: &JobId, region: &str) -> impl Future> + Send; + fn spin_down(&mut self, job: &JobId, region: &str, bandwidth: u64) -> impl Future> + Send; fn get_job_ip(&self, job: &JobId, region: &str) -> impl Future> + Send; @@ -100,8 +100,8 @@ where .await } - async fn spin_down(&mut self, job: &JobId, region: &str) -> Result<()> { - (**self).spin_down(job, region).await + async fn spin_down(&mut self, job: &JobId, region: &str, bandwidth: u64) -> Result<()> { + (**self).spin_down(job, region, bandwidth).await } async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { @@ -592,7 +592,7 @@ struct JobState<'a> { original_rate: U256, min_rate: U256, bandwidth: u64, - eif_url: String, + eif_url: String, // [Update Note] TODO: Change name of eif instance_type: String, region: String, req_vcpus: i32, @@ -733,7 +733,7 @@ impl<'a> JobState<'a> { true } else { // terminate mode - let res = infra_provider.spin_down(&self.job_id, &self.region).await; + let res = infra_provider.spin_down(&self.job_id, &self.region, self.bandwidth).await; if let Err(err) = res { error!(?err, "Failed to terminate instance"); return false; diff --git a/operator/control-plane/src/test.rs b/operator/control-plane/src/test.rs index f76b4cea4..25e5cb039 100644 --- a/operator/control-plane/src/test.rs +++ b/operator/control-plane/src/test.rs @@ -167,7 +167,7 @@ impl InfraProvider for TestAws { Ok(()) } - async fn spin_down(&mut self, job: &JobId, region: &str) -> Result<()> { + async fn spin_down(&mut self, job: &JobId, region: &str, bandwidth: u64) -> Result<()> { self.outcomes .push(TestAwsOutcome::SpinDown(SpinDownOutcome { time: Instant::now(), From 427c66f05f091d6f17f6d3c8a8b845363a2a1f7e Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Tue, 2 Dec 2025 12:00:30 +0530 Subject: [PATCH 05/12] Add instance type bandwidth parser --- operator/control-plane/Cargo.lock | 11 +- operator/control-plane/Cargo.toml | 1 + operator/control-plane/src/aws.rs | 202 +++++++++++++++--------------- 3 files changed, 107 insertions(+), 107 deletions(-) diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index c69161ea5..912388529 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -1155,6 +1155,7 @@ dependencies = [ "hex", "httpc-test", "rand_core 0.6.4", + "regex", "reqwest", "serde", "serde_json", @@ -3279,13 +3280,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "regex-syntax 0.8.5", ] @@ -3300,9 +3301,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", diff --git a/operator/control-plane/Cargo.toml b/operator/control-plane/Cargo.toml index bd8e657e9..26ff39390 100644 --- a/operator/control-plane/Cargo.toml +++ b/operator/control-plane/Cargo.toml @@ -21,6 +21,7 @@ clap = { version = "4.5.23", features = ["derive"] } coldsnap = "0.9.0" hex = "0.4.3" rand_core = { version = "0.6.4", features = ["getrandom"] } +regex = "1.12.2" reqwest = { version = "0.12.12", features = ["rustls-tls", "stream"], default-features = false } serde = "1.0.217" serde_json = "1.0.134" diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index e87abc13a..3e5394fe5 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -11,6 +11,7 @@ use aws_types::region::Region; use base64::{prelude::BASE64_STANDARD, Engine}; use coldsnap::{SnapshotUploader, SnapshotWaiter}; use rand_core::OsRng; +use regex::Regex; use ssh_key::sha2::{Digest, Sha256}; use ssh_key::{Algorithm, LineEnding, PrivateKey}; use ssh2::Session; @@ -217,61 +218,6 @@ impl Aws { Ok((stdout, stderr)) } - // [UPDATE NOTE] This function is obsolete, no enclaves - // pub async fn run_enclave_impl( - // &self, - // job_id: &str, - // family: &str, - // instance_id: &str, - // region: &str, - // image_url: &str, - // req_vcpu: i32, - // req_mem: i64, - // bandwidth: u64, - // debug: bool, - // init_params: &[u8], - // ) -> Result<()> { - // if family != "salmon" && family != "tuna" { - // return Err(anyhow!("unsupported image family")); - // } - - // // make a ssh session - // let public_ip_address = self - // .get_instance_ip(instance_id, region) - // .await - // .context("could not fetch instance ip")?; - // let sess = &self - // .ssh_connect(&(public_ip_address + ":22")) - // .await - // .context("error establishing ssh connection")?; - - // // set up ephemeral ports for the host - // Self::run_fragment_ephemeral_ports(sess)?; - // // set up nitro enclaves allocator - // Self::run_fragment_allocator(sess, req_vcpu, req_mem)?; - // // download enclave image and perform whitelist/blacklist checks - // self.run_fragment_download_and_check_image(sess, image_url)?; - // // set up bandwidth rate limiting - // Self::run_fragment_bandwidth(sess, bandwidth)?; - - // if family == "tuna" { - // // set up iptables rules - // Self::run_fragment_iptables_tuna(sess)?; - // // set up job id in the init server - // Self::run_fragment_init_server(sess, job_id, init_params)?; - // } else { - // // set up iptables rules - // Self::run_fragment_iptables_salmon(sess)?; - // } - - // // set up debug logger if enabled - // Self::run_fragment_logger(sess, debug)?; - // // run the enclave - // Self::run_fragment_enclave(sess, req_vcpu, req_mem, debug)?; - - // Ok(()) - // } - /* AWS EC2 UTILITY */ pub async fn get_instance_public_ip(&self, instance_id: &str, region: &str) -> Result { Ok(self @@ -335,7 +281,6 @@ impl Aws { .get_security_group(region) .await .context("could not get subnet")?; - // [UPDATE NOTE] Add user data to launch instance Ok(self .client(region) .await @@ -781,20 +726,19 @@ impl Aws { ) } - // [UPDATE NOTE] Associate IP address to secondary IP of gateway VM async fn associate_address( &self, alloc_id: &str, region: &str, eni_id: &str, - sec_id: &str, + private_ip: &str, ) -> Result<()> { self.client(region) .await .associate_address() .allocation_id(alloc_id) .network_interface_id(eni_id) - .private_ip_address(sec_id) + .private_ip_address(private_ip) .allow_reassociation(true) .send() .await @@ -824,7 +768,6 @@ impl Aws { Ok(()) } - // [UPDATE NOTE] Spin up instance is only kept, no run enclaves needed async fn spin_up_impl( &mut self, job: &JobId, @@ -862,7 +805,7 @@ impl Aws { } } - // [UPDATE NOTE] Check AMI corresponding to given job. If dosen't exist then check if snapshot exists. + // Check AMI corresponding to given job. If dosen't exist then check if snapshot exists. // If doesn't exist download image upload as snapshot and register AMI. If snapshot exists register AMI from it. let (ami_exist, mut ami_id) = self @@ -1091,30 +1034,8 @@ impl Aws { } Ok(()) - // [UPDATE NOTE] No enclave deployment needed. Check all the steps in this function if needed - // Pick following: - // 1. Rate limit configuration - // 2. User Data setup - // 3. Pick user image - // self.run_enclave_impl( - // &job.id, - // family, - // &instance, - // region, - // image_url, - // req_vcpu, - // req_mem, - // bandwidth, - // debug, - // init_params, - // ) - // .await - // .context("failed to run enclave") } - // [UPDATE NOTE] New things to add: - // 1. Pick AMI corresponding to given image_url - // 2. Setup user data pub async fn spin_up_instance( &self, job: &JobId, @@ -1183,13 +1104,9 @@ impl Aws { region: &str, bandwidth: u64, ) -> Result<()> { - // [Update Note] do the networking here - // Allocate Elastic IP - // Check capacity on existing Rate Limit VM - // Create secondary IP on Rate Limit VM - // Modifictations on Rate Limit VM with NAT and tc + // allocate Elastic IP + // select and configure rate limiter // associate secondary IP and Elastic IP - // Return let (alloc_id, ip) = self .allocate_ip_addr(job, region) .await @@ -1280,6 +1197,48 @@ impl Aws { .to_string()) } + pub async fn get_instance_bandwidth_limit(&self, instance_type: InstanceType) -> Result { + let res = self + .client("ap-southeast-2") + .await + .describe_instance_types() + .instance_types(instance_type) + .send() + .await + .context("could not describe instance types")?; + let mut bandwidth_limit_res: &str = ""; + let instance_types = res.instance_types(); + for instance in instance_types { + bandwidth_limit_res = instance + .network_info() + .ok_or(anyhow!("error fetching instance network info"))? + .network_performance() + .ok_or(anyhow!("error fetching instance network performance"))?; + info!(bandwidth_limit_res); + } + // bandwidth_limit is string like "Up to 12.5 Gigabit", "Up to 10 Gigabit", "10 Gigabit" + // We need to parse this string and return bandwidth in bit/sec + let re = Regex::new(r"^(?i)(?:Up to\s+)?([\d\.]+)\s+Gigabit$") + .context(anyhow!("Failed to initialise bandwidth capturing regular expression"))?; + let captures = re + .captures(bandwidth_limit_res) + .ok_or(anyhow!("Could not parse bandwidth limit from string"))?; + + let bandwidth_limit_str = captures + .get(1) + .ok_or(anyhow!("Could not capture bandwidth limit value"))? + .as_str(); + + let value: f64 = bandwidth_limit_str + .parse() + .context("Could not parse bandwidth limit value to float")?; + + const MULTIPLIER: f64 = 1_000_000_000.0; // Gigabit to bit + + let bandwidth_limit_bps = (value * MULTIPLIER).round() as u64; + + Ok(bandwidth_limit_bps) + } // TODO: update the route table of user subnet to send traffic via rate limiter instance async fn select_rate_limiter( &self, @@ -1323,7 +1282,10 @@ impl Aws { ); continue; } - let instance_bandwidth_limit: u64 = 10e10 as u64; // TODO fetch from tag or instance metadata + let instance_bandwidth_limit = self.get_instance_bandwidth_limit( + instance.instance_type().ok_or(anyhow!("could not parse instance type"))?.clone() + ).await + .context("could not get instance bandwidth limit")?; for eni in instance.network_interfaces() { if let Some(eni_id) = eni.network_interface_id() { @@ -1568,24 +1530,13 @@ impl InfraProvider for Aws { .context("could not spin down enclave") } - // [UPDATE NOTE] Due to Gateway VM rate limit, instance IP won't be equal to elastic IP. Instead, Gateway VM - // secondary IPs are used. async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { - let instance = self - .get_job_instance_id(job, region) - .await - .context("could not get instance id for job instance ip")?; - - if !instance.0 { - return Err(anyhow!("Instance not found for job - {}", job.id)); - } - + let (found, _, elastic_ip, _, _, _, _) = self .get_job_elastic_ip(job, region, true) .await .context("could not get job elastic ip")?; - // It is possible that instance is still initializing and elastic IP is not yet associated if found { return Ok(elastic_ip); } @@ -1703,4 +1654,51 @@ mod tests { "Enclave should not be running after spin down" ); } + + #[tokio::test] + async fn test_get_bandwidth_limit() { + let aws = Aws::new( + "cp".to_string(), + &["ap-southeast-2".to_string()], + "rlgen".to_string(), + None, + None, + ) + .await; + let instance_type = InstanceType::C6aXlarge; + let bandwidth_limit_result = aws + .get_instance_bandwidth_limit(instance_type) + .await; + assert!( + bandwidth_limit_result.is_ok(), + "Get instance bandwidth limit failed: {:?}", + bandwidth_limit_result.err() + ); + let bandwidth_limit = bandwidth_limit_result.unwrap(); + println!("Instance Bandwidth Limit: {} bps", bandwidth_limit); + + let instance_type = InstanceType::M6a12xlarge; + let bandwidth_limit_result = aws + .get_instance_bandwidth_limit(instance_type) + .await; + assert!( + bandwidth_limit_result.is_ok(), + "Get instance bandwidth limit failed: {:?}", + bandwidth_limit_result.err() + ); + let bandwidth_limit = bandwidth_limit_result.unwrap(); + println!("Instance Bandwidth Limit: {} bps", bandwidth_limit); + + let instance_type = InstanceType::M5Xlarge; + let bandwidth_limit_result = aws + .get_instance_bandwidth_limit(instance_type) + .await; + assert!( + bandwidth_limit_result.is_ok(), + "Get instance bandwidth limit failed: {:?}", + bandwidth_limit_result.err() + ); + let bandwidth_limit = bandwidth_limit_result.unwrap(); + println!("Instance Bandwidth Limit: {} bps", bandwidth_limit); + } } From 9722eae80fb0d42966139f21f106b5d9759e9c0f Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Thu, 4 Dec 2025 15:48:07 +0530 Subject: [PATCH 06/12] Add deregister AMI and delete snapshot in spin_down --- operator/control-plane/src/aws.rs | 63 ++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 3e5394fe5..9f6298661 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -1112,7 +1112,7 @@ impl Aws { .await .context("error allocating ip address")?; info!(ip, "Elastic Ip allocated"); - + let (sec_ip, eni_id) = self .select_rate_limiter(region, bandwidth, instance_id) .await @@ -1287,7 +1287,7 @@ impl Aws { ).await .context("could not get instance bandwidth limit")?; for eni in instance.network_interfaces() { - + if let Some(eni_id) = eni.network_interface_id() { let Some(eni_mac) = eni.mac_address() else { debug!( @@ -1320,7 +1320,7 @@ impl Aws { .private_ip_address() .ok_or(anyhow!("no private ip address found"))? .to_string(); - + // RL IP, secondary IP, if self.configure_rate_limiter( instance_id, @@ -1354,7 +1354,7 @@ impl Aws { } } } - + } } Err(anyhow!( @@ -1381,6 +1381,49 @@ impl Aws { .await .context("failed to terminate instance")?; + self.deregister_ami(job, region).await.context("failed to deregister ami")?; + self.delete_snapshot(job, region) + .await + .context("failed to delete snapshot")?; + + Ok(()) + } + + async fn deregister_ami(&self, job: &JobId, region: &str) -> Result<()> { + let (ami_exist, ami_id) = self + .get_job_ami_id(job, region) + .await + .context("failed to get job ami")?; + if !ami_exist { + return Ok(()); + } + self.client(region) + .await + .deregister_image() + .image_id(ami_id) + .send() + .await + .context("could not deregister ami")?; + Ok(()) + } + + async fn delete_snapshot(&self, job: &JobId, region: &str) -> Result<()> { + let (ss_exist, snapshot_id) = self + .get_job_snapshot_id(job, region) + .await + .context("failed to get job snapshot")?; + if !ss_exist { + info!("No snapshot to delete"); + return Ok(()); + } + info!(snapshot_id, "Deleting snapshot"); + self.client(region) + .await + .delete_snapshot() + .snapshot_id(snapshot_id) + .send() + .await + .context("could not delete snapshot")?; Ok(()) } @@ -1451,7 +1494,7 @@ impl Aws { Ok(()) } - // TODO: handle all error cases, continue cleanup even if some steps fail or will it be retried later? + // TODO: handle all error cases, continue cleanup even if some steps fail or will it be retried later? async fn spin_down_instance( &self, instance_id: &str, @@ -1463,7 +1506,7 @@ impl Aws { .get_job_elastic_ip(job, region, true) .await .context("could not get elastic ip of job")?; - + if exist { self.disassociate_address(association_id.as_str(), region) .await @@ -1475,7 +1518,7 @@ impl Aws { let private_ip = self.get_instance_private_ip(instance_id, region) .await .context("could not get private ip of instance")?; - + self.remove_rate_limiter_config(&rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) .await .context("could not remove rate limiter config")?; @@ -1489,7 +1532,7 @@ impl Aws { .context("could not release address")?; info!("Elastic IP released"); } - + self.terminate_instance(instance_id, region) .await .context("could not terminate instance")?; @@ -1531,7 +1574,7 @@ impl InfraProvider for Aws { } async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { - + let (found, _, elastic_ip, _, _, _, _) = self .get_job_elastic_ip(job, region, true) .await @@ -1577,7 +1620,7 @@ mod tests { .with_max_level(tracing::Level::INFO) .with_env_filter(filter) .init(); - + let mut aws = Aws::new( "cp".to_string(), &["ap-southeast-2".to_string()], From 257b61f1a3867d8b9b0c2858cd4ce741d92e03cd Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Tue, 9 Dec 2025 17:23:34 +0530 Subject: [PATCH 07/12] Handle spin_down after a spin_down failure --- operator/control-plane/src/aws.rs | 117 ++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 9f6298661..5ad57eea3 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -342,16 +342,22 @@ impl Aws { } pub async fn get_subnet(&self, region: &str) -> Result { - let filter = Filter::builder() + let project_filter = Filter::builder() .name("tag:project") .values("oyster") .build(); + let type_filter = Filter::builder() + .name("tag:type") + .values("cvm") + .build(); + Ok(self .client(region) .await .describe_subnets() - .filters(filter) + .filters(type_filter) + .filters(project_filter) .send() .await .context("could not describe subnets")? @@ -531,7 +537,7 @@ impl Aws { } async fn allocate_ip_addr(&self, job: &JobId, region: &str) -> Result<(String, String)> { - let (exist, alloc_id, public_ip, _, _, _, _) = self + let (exist, alloc_id, public_ip, _, _, _) = self .get_job_elastic_ip(job, region, false) .await .context("could not get elastic ip for job")?; @@ -580,13 +586,14 @@ impl Aws { )) } + // TODO: clean up return params (private IP, instance ID, interface ID) // if with_association is true means, caller expected this elastic associated and return association details async fn get_job_elastic_ip( &self, job: &JobId, region: &str, with_association: bool, - ) -> Result<(bool, String, String, String, String, String, String)> { + ) -> Result<(bool, String, String, String, String, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -624,10 +631,21 @@ impl Aws { String::new(), String::new(), String::new(), - String::new(), ), Some(addrs) => { if with_association == false { + // store private ip, eni id from tags + let tags = addrs.tags(); + let mut private_ip = String::new(); + let mut eni_id = String::new(); + for tag in tags { + if tag.key().unwrap_or("") == "PrivateIpAddress" { + private_ip = tag.value().unwrap_or("").to_string(); + } else if tag.key().unwrap_or("") == "NetworkInterfaceId" { + eni_id = tag.value().unwrap_or("").to_string(); + } + } + ( true, addrs @@ -638,9 +656,8 @@ impl Aws { .public_ip() .ok_or(anyhow!("could not parse public ip"))? .to_string(), - String::new(), - String::new(), - String::new(), + private_ip, + eni_id, String::new(), ) } else if addrs.association_id().is_none() { @@ -651,7 +668,6 @@ impl Aws { String::new(), String::new(), String::new(), - String::new(), ) } else { ( @@ -668,18 +684,14 @@ impl Aws { .private_ip_address() .ok_or(anyhow!("could not parse private ip"))? .to_string(), - addrs - .association_id() - .ok_or(anyhow!("could not parse association id"))? - .to_string(), - addrs - .instance_id() - .ok_or(anyhow!("could not parse instance id"))? - .to_string(), addrs .network_interface_id() .ok_or(anyhow!("could not parse network interface id"))? .to_string(), + addrs + .association_id() + .ok_or(anyhow!("could not parse association id"))? + .to_string(), ) } }, @@ -733,6 +745,25 @@ impl Aws { eni_id: &str, private_ip: &str, ) -> Result<()> { + let tag_private_ip = Tag::builder() + .key("PrivateIpAddress") + .value(private_ip) + .build(); + let tag_eni_id = Tag::builder() + .key("NetworkInterfaceId") + .value(eni_id) + .build(); + let tags = vec![tag_private_ip, tag_eni_id]; + + self.client(region) + .await + .create_tags() + .resources(alloc_id) + .set_tags(Some(tags)) + .send() + .await + .context("failed to set private IP & eni id tag for elastic IP")?; + self.client(region) .await .associate_address() @@ -1370,12 +1401,12 @@ impl Aws { if !exist || state == "shutting-down" || state == "terminated" { // instance does not really exist anyway, we are done - // TODO: cleanup required for RL config and elastic IP? info!("Instance does not exist or is already terminated"); return Ok(()); } - // terminate instance + + // cleanup instance and related resources info!(instance, "Terminating existing instance"); self.spin_down_instance(&instance, job, region, bandwidth) .await @@ -1459,6 +1490,26 @@ impl Aws { return Ok(()); } + async fn get_rate_limiter_instance_id(&self, eni_id: &str, region: &str) -> Result { + Ok(self + .client(region) + .await + .describe_network_interfaces() + .network_interface_ids(eni_id) + .send() + .await + .context("could not describe network interfaces")? + // response parsing from here + .network_interfaces() + .first() + .ok_or(anyhow!("no network interface found"))? + .attachment() + .ok_or(anyhow!("no attachment found for network interface"))? + .instance_id() + .ok_or(anyhow!("could not parse instance id"))? + .to_string()) + } + async fn get_eni_mac_address(&self, eni_id: &str, region: &str) -> Result { Ok(self .client(region) @@ -1502,7 +1553,14 @@ impl Aws { region: &str, bandwidth: u64, ) -> Result<()> { - let (exist, alloc_id, _, sec_ip, association_id, rl_instance_id, eni_id) = self + // Check elastic ip association and cleanup + // check rate limiter config and cleanup + // check aws secondary ip assignment and unassign + // check elastic ip and release + // terminate instance if exist + + // disassociation of elastic IP if association exists + let (exist, _, _, _, _, association_id) = self .get_job_elastic_ip(job, region, true) .await .context("could not get elastic ip of job")?; @@ -1511,18 +1569,35 @@ impl Aws { self.disassociate_address(association_id.as_str(), region) .await .context("could not disassociate address")?; + + } + + // get eni_id, sec_ip from elastic ip tags + let (exist, alloc_id, _, sec_ip, eni_id, _) = self + .get_job_elastic_ip(job, region, false) + .await + .context("could not get elastic ip of job")?; + + if exist { let eni_mac = self .get_eni_mac_address(eni_id.as_str(), region) .await .context("could not get eni mac address")?; + let rl_instance_id = self + .get_rate_limiter_instance_id(eni_id.as_str(), region) + .await + .context("could not get rate limiter instance id")?; let private_ip = self.get_instance_private_ip(instance_id, region) .await .context("could not get private ip of instance")?; + // FIXME: make sure bandwidth isn't reduced twice + // check exist and remove self.remove_rate_limiter_config(&rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) .await .context("could not remove rate limiter config")?; + // while unassiging check if error is related to ip not assigned then ignore self.unassign_secondary_ip(eni_id.as_str(), sec_ip.as_str(), region) .await .context("could not unassign secondary ip")?; @@ -1575,7 +1650,7 @@ impl InfraProvider for Aws { async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { - let (found, _, elastic_ip, _, _, _, _) = self + let (found, _, elastic_ip, _, _, _) = self .get_job_elastic_ip(job, region, true) .await .context("could not get job elastic ip")?; From 4e3f7ec948ec9bb25e39f05e90f6045b1428e82f Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Fri, 19 Dec 2025 15:05:04 +0530 Subject: [PATCH 08/12] make unassign secondary ip succed if given ip is removed already --- operator/control-plane/src/aws.rs | 63 ++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 5ad57eea3..4a8eca330 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -586,7 +586,6 @@ impl Aws { )) } - // TODO: clean up return params (private IP, instance ID, interface ID) // if with_association is true means, caller expected this elastic associated and return association details async fn get_job_elastic_ip( &self, @@ -634,7 +633,7 @@ impl Aws { ), Some(addrs) => { if with_association == false { - // store private ip, eni id from tags + // load private ip, eni id from tags let tags = addrs.tags(); let mut private_ip = String::new(); let mut eni_id = String::new(); @@ -1228,9 +1227,9 @@ impl Aws { .to_string()) } - pub async fn get_instance_bandwidth_limit(&self, instance_type: InstanceType) -> Result { + pub async fn get_instance_bandwidth_limit(&self, instance_type: InstanceType, region: &str) -> Result { let res = self - .client("ap-southeast-2") + .client(region) .await .describe_instance_types() .instance_types(instance_type) @@ -1270,7 +1269,7 @@ impl Aws { Ok(bandwidth_limit_bps) } - // TODO: update the route table of user subnet to send traffic via rate limiter instance + async fn select_rate_limiter( &self, region: &str, @@ -1314,7 +1313,8 @@ impl Aws { continue; } let instance_bandwidth_limit = self.get_instance_bandwidth_limit( - instance.instance_type().ok_or(anyhow!("could not parse instance type"))?.clone() + instance.instance_type().ok_or(anyhow!("could not parse instance type"))?.clone(), + region ).await .context("could not get instance bandwidth limit")?; for eni in instance.network_interfaces() { @@ -1534,14 +1534,22 @@ impl Aws { sec_ip: &str, region: &str, ) -> Result<()> { - self.client(region) + let res = self.client(region) .await .unassign_private_ip_addresses() .network_interface_id(eni_id) .private_ip_addresses(sec_ip) .send() - .await - .context("could not unassign secondary private ip address")?; + .await; + if let Err(err) = res { + let svc_err = err.as_service_error(); + if !svc_err.is_none() && svc_err.unwrap().meta().code() == Some("InvalidParameterValue") { + info!("Secondary IP [{}] already unassigned from ENI [{}]", sec_ip, eni_id); + return Ok(()); + } + error!(?err, "Error unassigning secondary private IP address ENI [{}], IP [{}]", eni_id, sec_ip); + return Err(err).context("could not unassign secondary ip"); + } Ok(()) } @@ -1597,7 +1605,6 @@ impl Aws { .await .context("could not remove rate limiter config")?; - // while unassiging check if error is related to ip not assigned then ignore self.unassign_secondary_ip(eni_id.as_str(), sec_ip.as_str(), region) .await .context("could not unassign secondary ip")?; @@ -1775,6 +1782,7 @@ mod tests { #[tokio::test] async fn test_get_bandwidth_limit() { + let region = "ap-southeast-2"; let aws = Aws::new( "cp".to_string(), &["ap-southeast-2".to_string()], @@ -1785,7 +1793,7 @@ mod tests { .await; let instance_type = InstanceType::C6aXlarge; let bandwidth_limit_result = aws - .get_instance_bandwidth_limit(instance_type) + .get_instance_bandwidth_limit(instance_type, region) .await; assert!( bandwidth_limit_result.is_ok(), @@ -1797,7 +1805,7 @@ mod tests { let instance_type = InstanceType::M6a12xlarge; let bandwidth_limit_result = aws - .get_instance_bandwidth_limit(instance_type) + .get_instance_bandwidth_limit(instance_type, region) .await; assert!( bandwidth_limit_result.is_ok(), @@ -1809,7 +1817,7 @@ mod tests { let instance_type = InstanceType::M5Xlarge; let bandwidth_limit_result = aws - .get_instance_bandwidth_limit(instance_type) + .get_instance_bandwidth_limit(instance_type, region) .await; assert!( bandwidth_limit_result.is_ok(), @@ -1819,4 +1827,33 @@ mod tests { let bandwidth_limit = bandwidth_limit_result.unwrap(); println!("Instance Bandwidth Limit: {} bps", bandwidth_limit); } + + // TODO: complete test by adding create instance and assign secondary ip before unassigning + #[tokio::test] + async fn test_unassign_secondary_ip() { + let mut filter = EnvFilter::new("info,aws_config=warn"); + if let Ok(var) = std::env::var("RUST_LOG") { + filter = filter.add_directive(var.parse().unwrap()); + } + tracing_subscriber::fmt() + .with_max_level(tracing::Level::INFO) + .with_env_filter(filter) + .init(); + let aws = Aws::new( + "cp".to_string(), + &["ap-southeast-2".to_string()], + "rlgen".to_string(), + None, + None, + ) + .await; + let eni_id = "eni-0e378f556e57a37df"; // replace with a valid ENI ID for testing + let sec_ip = "172.31.42.188"; // replace with a valid secondary IP for testing + let unassign_result = aws.unassign_secondary_ip(eni_id, sec_ip, "ap-southeast-2").await; + assert!( + unassign_result.is_ok(), + "Unassign secondary IP failed: {:?}", + unassign_result.err() + ); + } } From 50a0a2842a74e7e5cb8cc65b4755661f21ddf437 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Fri, 19 Dec 2025 16:36:38 +0530 Subject: [PATCH 09/12] Update rate limiter calls with job ID --- operator/control-plane/Cargo.lock | 57 +++++++++++++++---------------- operator/control-plane/Cargo.toml | 1 + operator/control-plane/src/aws.rs | 24 ++++++++----- 3 files changed, 43 insertions(+), 39 deletions(-) diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index 912388529..e3704c28f 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -1167,6 +1167,7 @@ dependencies = [ "tokio-stream", "tracing", "tracing-subscriber", + "uuid", "whoami", ] @@ -2409,9 +2410,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.76" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -4775,9 +4776,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.11.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "wasm-bindgen", +] [[package]] name = "valuable" @@ -4853,34 +4859,22 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.99" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.110", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.49" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -4891,9 +4885,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.99" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4901,22 +4895,25 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.99" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn 2.0.110", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.99" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" @@ -4933,9 +4930,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.76" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/operator/control-plane/Cargo.toml b/operator/control-plane/Cargo.toml index 26ff39390..90fe3e0a1 100644 --- a/operator/control-plane/Cargo.toml +++ b/operator/control-plane/Cargo.toml @@ -37,6 +37,7 @@ whoami = "1.5.2" [dev-dependencies] httpc-test = "0.1.10" +uuid = { version = "1.19.0", features = ["v4"] } [profile.release] lto = true diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 4a8eca330..fed3530c8 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -1144,7 +1144,7 @@ impl Aws { info!(ip, "Elastic Ip allocated"); let (sec_ip, eni_id) = self - .select_rate_limiter(region, bandwidth, instance_id) + .select_rate_limiter(job, region, bandwidth, instance_id) .await .context("could not select rate limiter")?; @@ -1156,6 +1156,7 @@ impl Aws { async fn configure_rate_limiter( &self, + job: &JobId, instance_id: &str, rl_instance_id: &str, sec_ip: &str, @@ -1185,8 +1186,8 @@ impl Aws { // OPTION: Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup // everything let add_rl_cmd = format!( - "sudo ~/add_rl.sh {} {} {} {} {}", - sec_ip, private_ip, eni_mac, bandwidth * 1000, instance_bandwidth_limit + "sudo ~/add_rl.sh {} {} {} {} {} {}", + job.id, sec_ip, private_ip, eni_mac, bandwidth * 1000, instance_bandwidth_limit ); let (_, stderr) = Self::ssh_exec(sess, &add_rl_cmd).context("Failed to run add_rl.sh command")?; @@ -1272,6 +1273,7 @@ impl Aws { async fn select_rate_limiter( &self, + job: &JobId, region: &str, bandwidth: u64, instance_id: &str, @@ -1354,6 +1356,7 @@ impl Aws { // RL IP, secondary IP, if self.configure_rate_limiter( + job, instance_id, &rl_instance_id, &sec_ip, @@ -1460,6 +1463,7 @@ impl Aws { async fn remove_rate_limiter_config( &self, + job: &JobId, rl_instance_id: &str, sec_ip: &str, private_ip: &str, @@ -1477,8 +1481,8 @@ impl Aws { .context("error establishing ssh connection")?; let remove_rl_cmd = format!( - "sudo ~/remove_rl.sh {} {} {} {}", - sec_ip, private_ip, eni_mac, bandwidth * 1000 + "sudo ~/remove_rl.sh {} {} {} {} {}", + job.id, sec_ip, private_ip, eni_mac, bandwidth * 1000 ); let (_, stderr) = Self::ssh_exec(sess, &remove_rl_cmd) @@ -1599,9 +1603,7 @@ impl Aws { .await .context("could not get private ip of instance")?; - // FIXME: make sure bandwidth isn't reduced twice - // check exist and remove - self.remove_rate_limiter_config(&rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) + self.remove_rate_limiter_config(job, &rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) .await .context("could not remove rate limiter config")?; @@ -1711,8 +1713,9 @@ mod tests { None, ) .await; + let job_id = uuid::Uuid::new_v4().to_string(); let job = JobId { - id: "test-job".to_string(), + id: "test-job-".to_string() + &job_id, operator: "test-operator".to_string(), chain: "test-chain".to_string(), contract: "test-contract".to_string(), @@ -1761,6 +1764,9 @@ mod tests { let job_ip = job_ip_result.unwrap(); println!("Job IP: {}", job_ip); + print!("Sleeping for 30 seconds..."); + sleep(Duration::from_secs(30)).await; + // Spin down let spin_down_result = aws.spin_down(&job, region, bandwidth).await; assert!( From ca38d3ec322dd882ebbc6fdd434527f31d70e4a1 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Mon, 12 Jan 2026 17:15:49 +0530 Subject: [PATCH 10/12] Remove secondary IP config on RL VM --- operator/control-plane/src/aws.rs | 463 ++++++++---------------------- 1 file changed, 124 insertions(+), 339 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index fed3530c8..a3f1f4069 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -219,7 +219,8 @@ impl Aws { } /* AWS EC2 UTILITY */ - pub async fn get_instance_public_ip(&self, instance_id: &str, region: &str) -> Result { + + pub async fn get_instance_ip(&self, instance_id: &str, region: &str) -> Result { Ok(self .client(region) .await @@ -245,6 +246,7 @@ impl Aws { .to_string()) } + // launch instance with given params and return instance id and private ip pub async fn launch_instance( &self, job: &JobId, @@ -252,7 +254,7 @@ impl Aws { region: &str, init_params: &[u8], ami_id: &str, - ) -> Result { + ) -> Result<(String, String)> { let name_tag = Tag::builder().key("Name").value("JobRunner").build(); let managed_tag = Tag::builder().key("managedBy").value("marlin").build(); let project_tag = Tag::builder().key("project").value("oyster").build(); @@ -281,7 +283,7 @@ impl Aws { .get_security_group(region) .await .context("could not get subnet")?; - Ok(self + let instance = self .client(region) .await .run_instances() @@ -300,9 +302,18 @@ impl Aws { .instances() .first() .ok_or(anyhow!("no instance found"))? + .clone(); + + let instance_id = instance .instance_id() - .ok_or(anyhow!("could not parse group id"))? - .to_string()) + .ok_or(anyhow!("could not parse instance id"))? + .to_string(); + + let private_ip = instance + .private_ip_address() + .ok_or(anyhow!("could not parse private ip"))? + .to_string(); + Ok((instance_id, private_ip)) } async fn terminate_instance(&self, instance_id: &str, region: &str) -> Result<()> { @@ -451,11 +462,12 @@ impl Aws { } } + // return (exist, instance_id, state, rl_instance_id, private_ip) pub async fn get_job_instance_id( &self, job: &JobId, region: &str, - ) -> Result<(bool, String, String)> { + ) -> Result<(bool, String, String, String, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -484,12 +496,18 @@ impl Aws { let reservations = res.reservations(); if reservations.is_empty() { - Ok((false, "".to_owned(), "".to_owned())) + Ok((false, "".to_owned(), "".to_owned(), "".to_owned(), "".to_owned())) } else { let instance = reservations[0] .instances() .first() .ok_or(anyhow!("instance not found"))?; + let mut rl_instance_id = String::new(); + for tag in instance.tags() { + if tag.key().unwrap_or("") == "rlInstanceId" { + rl_instance_id = tag.value().unwrap_or("").to_string(); + } + } Ok(( true, instance @@ -503,6 +521,11 @@ impl Aws { .ok_or(anyhow!("could not parse instance state name"))? .as_str() .to_owned(), + rl_instance_id, + instance + .private_ip_address() + .ok_or(anyhow!("could not parse private ip"))? + .to_string() )) } } @@ -537,7 +560,7 @@ impl Aws { } async fn allocate_ip_addr(&self, job: &JobId, region: &str) -> Result<(String, String)> { - let (exist, alloc_id, public_ip, _, _, _) = self + let (exist, alloc_id, public_ip, _) = self .get_job_elastic_ip(job, region, false) .await .context("could not get elastic ip for job")?; @@ -592,7 +615,7 @@ impl Aws { job: &JobId, region: &str, with_association: bool, - ) -> Result<(bool, String, String, String, String, String)> { + ) -> Result<(bool, String, String, String)> { let job_filter = Filter::builder().name("tag:jobId").values(&job.id).build(); let operator_filter = Filter::builder() .name("tag:operator") @@ -628,23 +651,11 @@ impl Aws { String::new(), String::new(), String::new(), - String::new(), - String::new(), ), Some(addrs) => { if with_association == false { // load private ip, eni id from tags - let tags = addrs.tags(); - let mut private_ip = String::new(); - let mut eni_id = String::new(); - for tag in tags { - if tag.key().unwrap_or("") == "PrivateIpAddress" { - private_ip = tag.value().unwrap_or("").to_string(); - } else if tag.key().unwrap_or("") == "NetworkInterfaceId" { - eni_id = tag.value().unwrap_or("").to_string(); - } - } - + ( true, addrs @@ -655,8 +666,6 @@ impl Aws { .public_ip() .ok_or(anyhow!("could not parse public ip"))? .to_string(), - private_ip, - eni_id, String::new(), ) } else if addrs.association_id().is_none() { @@ -665,8 +674,6 @@ impl Aws { String::new(), String::new(), String::new(), - String::new(), - String::new(), ) } else { ( @@ -679,14 +686,6 @@ impl Aws { .public_ip() .ok_or(anyhow!("could not parse public ip"))? .to_string(), - addrs - .private_ip_address() - .ok_or(anyhow!("could not parse private ip"))? - .to_string(), - addrs - .network_interface_id() - .ok_or(anyhow!("could not parse network interface id"))? - .to_string(), addrs .association_id() .ok_or(anyhow!("could not parse association id"))? @@ -698,78 +697,17 @@ impl Aws { ) } - async fn get_instance_elastic_ip( - &self, - instance: &str, - region: &str, - ) -> Result<(bool, String, String)> { - let instance_id_filter = Filter::builder() - .name("instance-id") - .values(instance) - .build(); - - Ok( - match self - .client(region) - .await - .describe_addresses() - .filters(instance_id_filter) - .send() - .await - .context("could not describe elastic ips")? - // response parsing starts here - .addresses() - .first() - { - None => (false, String::new(), String::new()), - Some(addrs) => ( - true, - addrs - .allocation_id() - .ok_or(anyhow!("could not parse allocation id"))? - .to_string(), - addrs - .association_id() - .ok_or(anyhow!("could not parse public ip"))? - .to_string(), - ), - }, - ) - } - async fn associate_address( &self, + instance_id: &str, alloc_id: &str, region: &str, - eni_id: &str, - private_ip: &str, ) -> Result<()> { - let tag_private_ip = Tag::builder() - .key("PrivateIpAddress") - .value(private_ip) - .build(); - let tag_eni_id = Tag::builder() - .key("NetworkInterfaceId") - .value(eni_id) - .build(); - let tags = vec![tag_private_ip, tag_eni_id]; - - self.client(region) - .await - .create_tags() - .resources(alloc_id) - .set_tags(Some(tags)) - .send() - .await - .context("failed to set private IP & eni id tag for elastic IP")?; - - self.client(region) + self.client(region) .await .associate_address() .allocation_id(alloc_id) - .network_interface_id(eni_id) - .private_ip_address(private_ip) - .allow_reassociation(true) + .instance_id(instance_id) .send() .await .context("could not associate elastic ip")?; @@ -809,7 +747,7 @@ impl Aws { image_url: &str, init_params: &[u8], ) -> Result<()> { - let (mut exist, instance, state) = self + let (mut exist, instance, state, rl_instance_id, private_ip) = self .get_job_instance_id(job, region) .await .context("failed to get job instance")?; @@ -822,7 +760,7 @@ impl Aws { } else if state == "stopping" || state == "stopped" { // instance unhealthy, terminate info!(instance, "Found existing unhealthy instance"); - self.spin_down_instance(&instance, job, region, bandwidth) + self.spin_down_instance(&instance, job, &private_ip, region, bandwidth, &rl_instance_id) .await .context("failed to terminate instance")?; @@ -1109,58 +1047,58 @@ impl Aws { if req_mem > mem || req_vcpu > v_cpus { return Err(anyhow!("Required memory or vcpus are more than available")); } - let instance = self + let (instance_id, private_ip) = self .launch_instance(job, instance_type, region, init_params, ami_id) .await .context("could not launch instance")?; sleep(Duration::from_secs(100)).await; - let res = self.post_spin_up(job, &instance, region, bandwidth).await; + let res = self.post_spin_up(job, &instance_id, &private_ip, region, bandwidth).await; if let Err(err) = res { error!(?err, "Error during post spin up"); - self.spin_down_instance(&instance, job, region, bandwidth) + self.spin_down_instance(&instance_id, job, &private_ip, region, bandwidth, "") .await .context("could not spin down instance after error during post spin up")?; return Err(err).context("error during post spin up"); } - Ok(instance) + Ok(instance_id) } async fn post_spin_up( &self, job: &JobId, instance_id: &str, + private_ip: &str, region: &str, bandwidth: u64, ) -> Result<()> { // allocate Elastic IP + // associate Elastic IP // select and configure rate limiter - // associate secondary IP and Elastic IP let (alloc_id, ip) = self .allocate_ip_addr(job, region) .await .context("error allocating ip address")?; info!(ip, "Elastic Ip allocated"); - let (sec_ip, eni_id) = self - .select_rate_limiter(job, region, bandwidth, instance_id) + self.associate_address(instance_id, &alloc_id, region) + .await + .context("could not associate ip address")?; + + self + .select_rate_limiter(job, instance_id, private_ip, region, bandwidth) .await .context("could not select rate limiter")?; - self.associate_address(&alloc_id, region, &eni_id, &sec_ip) - .await - .context("could not associate ip address")?; Ok(()) } async fn configure_rate_limiter( &self, job: &JobId, - instance_id: &str, + private_ip: &str, rl_instance_id: &str, - sec_ip: &str, - eni_mac: &str, bandwidth: u64, // in kbit/sec instance_bandwidth_limit: u64, region: &str, @@ -1168,7 +1106,7 @@ impl Aws { // TODO: rollback on failure // SSH into Rate Limiter instance and configure NAT and tc let rl_ip = self - .get_instance_public_ip(rl_instance_id, region) + .get_instance_ip(rl_instance_id, region) .await .context("could not get rate limiter instance ip")?; @@ -1177,17 +1115,10 @@ impl Aws { .await .context("error establishing ssh connection")?; - // Get instance private IP - let private_ip = self - .get_instance_private_ip(instance_id, region) - .await - .context("could not get instance private ip")?; - - // OPTION: Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup - // everything + // Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup everything let add_rl_cmd = format!( - "sudo ~/add_rl.sh {} {} {} {} {} {}", - job.id, sec_ip, private_ip, eni_mac, bandwidth * 1000, instance_bandwidth_limit + "sudo ~/add_rl.sh {} {} {} {}", + job.id, private_ip, bandwidth * 1000, instance_bandwidth_limit ); let (_, stderr) = Self::ssh_exec(sess, &add_rl_cmd).context("Failed to run add_rl.sh command")?; @@ -1202,32 +1133,6 @@ impl Aws { } - async fn get_instance_private_ip(&self, instance_id: &str, region: &str) -> Result { - Ok(self - .client(region) - .await - .describe_instances() - .filters( - Filter::builder() - .name("instance-id") - .values(instance_id) - .build(), - ) - .send() - .await - .context("could not describe instances")? - // response parsing from here - .reservations() - .first() - .ok_or(anyhow!("no reservation found"))? - .instances() - .first() - .ok_or(anyhow!("no instances with the given id"))? - .private_ip_address() - .ok_or(anyhow!("could not parse private ip address"))? - .to_string()) - } - pub async fn get_instance_bandwidth_limit(&self, instance_type: InstanceType, region: &str) -> Result { let res = self .client(region) @@ -1271,15 +1176,17 @@ impl Aws { Ok(bandwidth_limit_bps) } + // TODO: return error if all rate limiters are full async fn select_rate_limiter( &self, job: &JobId, + instance_id: &str, + private_ip: &str, region: &str, bandwidth: u64, - instance_id: &str, - ) -> Result<(String, String)> { + ) -> Result<()> { // get all the rate limiter vm from region - // check available bandwidth and secondary IP is allowed + // check available bandwidth // bandwidth is in kbit/sec let project_filter = Filter::builder() .name("tag:project") @@ -1320,72 +1227,42 @@ impl Aws { ).await .context("could not get instance bandwidth limit")?; for eni in instance.network_interfaces() { - if let Some(eni_id) = eni.network_interface_id() { - let Some(eni_mac) = eni.mac_address() else { + if eni.mac_address().is_none() { debug!( "MAC address not found for ENI {}. Skipping ENI", eni_id ); continue; }; - let res = self - .client(region) - .await - .assign_private_ip_addresses() - .network_interface_id(eni_id) - .secondary_private_ip_address_count(1) - .send() - .await; - if let Ok(assigned_ip) = res { - if assigned_ip.assigned_private_ip_addresses.is_none() { - debug!( - "No secondary private IP address assigned Rate Limit instance [{}], ENI [{}]", - rl_instance_id, - eni_id - ); - continue; - } else { - let sec_ip = assigned_ip - .assigned_private_ip_addresses() - .first() - .ok_or(anyhow!("no assigned private ip address found"))? - .private_ip_address() - .ok_or(anyhow!("no private ip address found"))? - .to_string(); - - // RL IP, secondary IP, - if self.configure_rate_limiter( - job, - instance_id, - &rl_instance_id, - &sec_ip, - eni_mac, - bandwidth, - instance_bandwidth_limit, - region - ).await.is_err() { - warn!( - "Error configuring Rate Limit instance [{}], ENI [{}]", - rl_instance_id, - eni_id - ); - self.unassign_secondary_ip(eni_id, sec_ip.as_str(), region) - .await - .context("could not unassign secondary ip")?; - continue; - } - return Ok((sec_ip, eni_id.to_string())); - } - } else { - debug!( - ?res, - "Error assigning secondary private IP address Rate Limit instance [{}], ENI [{}]", + if self.configure_rate_limiter( + job, + private_ip, + &rl_instance_id, + bandwidth, + instance_bandwidth_limit, + region + ).await.is_err() { + warn!( + "Error configuring Rate Limit instance [{}], ENI [{}]", rl_instance_id, eni_id ); continue; } + let tag_rl_id = Tag::builder() + .key("rlInstanceId") + .value(&rl_instance_id) + .build(); + self.client(region) + .await + .create_tags() + .resources(instance_id) + .tags(tag_rl_id) + .send() + .await + .context("could not tag job instance with rl instance id")?; // TODO: revert rate limiter config on failure + return Ok(()); } } @@ -1397,7 +1274,7 @@ impl Aws { } async fn spin_down_impl(&self, job: &JobId, region: &str, bandwidth: u64) -> Result<()> { - let (exist, instance, state) = self + let (exist, instance, state, rl_instance_id, private_ip) = self .get_job_instance_id(job, region) .await .context("failed to get job instance")?; @@ -1411,7 +1288,7 @@ impl Aws { // cleanup instance and related resources info!(instance, "Terminating existing instance"); - self.spin_down_instance(&instance, job, region, bandwidth) + self.spin_down_instance(&instance, job, &private_ip, region, bandwidth, &rl_instance_id) .await .context("failed to terminate instance")?; @@ -1464,14 +1341,13 @@ impl Aws { async fn remove_rate_limiter_config( &self, job: &JobId, - rl_instance_id: &str, - sec_ip: &str, private_ip: &str, - eni_mac: &str, + rl_instance_id: &str, bandwidth: u64, // in kbit/sec + region: &str, ) -> Result<()> { let rl_ip = self - .get_instance_public_ip(rl_instance_id, "ap-southeast-2") + .get_instance_ip(rl_instance_id, region) .await .context("could not get rate limiter instance ip")?; @@ -1481,8 +1357,8 @@ impl Aws { .context("error establishing ssh connection")?; let remove_rl_cmd = format!( - "sudo ~/remove_rl.sh {} {} {} {} {}", - job.id, sec_ip, private_ip, eni_mac, bandwidth * 1000 + "sudo ~/remove_rl.sh {} {} {}", + job.id, private_ip, bandwidth * 1000 ); let (_, stderr) = Self::ssh_exec(sess, &remove_rl_cmd) @@ -1494,85 +1370,28 @@ impl Aws { return Ok(()); } - async fn get_rate_limiter_instance_id(&self, eni_id: &str, region: &str) -> Result { - Ok(self - .client(region) - .await - .describe_network_interfaces() - .network_interface_ids(eni_id) - .send() - .await - .context("could not describe network interfaces")? - // response parsing from here - .network_interfaces() - .first() - .ok_or(anyhow!("no network interface found"))? - .attachment() - .ok_or(anyhow!("no attachment found for network interface"))? - .instance_id() - .ok_or(anyhow!("could not parse instance id"))? - .to_string()) - } - - async fn get_eni_mac_address(&self, eni_id: &str, region: &str) -> Result { - Ok(self - .client(region) - .await - .describe_network_interfaces() - .network_interface_ids(eni_id) - .send() - .await - .context("could not describe network interfaces")? - // response parsing from here - .network_interfaces() - .first() - .ok_or(anyhow!("no network interface found"))? - .mac_address() - .ok_or(anyhow!("could not parse mac address"))? - .to_string()) - } - - async fn unassign_secondary_ip( - &self, - eni_id: &str, - sec_ip: &str, - region: &str, - ) -> Result<()> { - let res = self.client(region) - .await - .unassign_private_ip_addresses() - .network_interface_id(eni_id) - .private_ip_addresses(sec_ip) - .send() - .await; - if let Err(err) = res { - let svc_err = err.as_service_error(); - if !svc_err.is_none() && svc_err.unwrap().meta().code() == Some("InvalidParameterValue") { - info!("Secondary IP [{}] already unassigned from ENI [{}]", sec_ip, eni_id); - return Ok(()); - } - error!(?err, "Error unassigning secondary private IP address ENI [{}], IP [{}]", eni_id, sec_ip); - return Err(err).context("could not unassign secondary ip"); - } - Ok(()) - } - - // TODO: handle all error cases, continue cleanup even if some steps fail or will it be retried later? + // TODO: handle all error cases async fn spin_down_instance( &self, instance_id: &str, job: &JobId, + private_ip: &str, region: &str, bandwidth: u64, + rl_instance_id: &str, ) -> Result<()> { - // Check elastic ip association and cleanup // check rate limiter config and cleanup - // check aws secondary ip assignment and unassign + // Check elastic ip association and cleanup // check elastic ip and release // terminate instance if exist - // disassociation of elastic IP if association exists - let (exist, _, _, _, _, association_id) = self + if !rl_instance_id.is_empty() { + self.remove_rate_limiter_config(job, private_ip, &rl_instance_id, bandwidth, region) + .await + .context("could not remove rate limiter config")?; + } + + let (exist, _, _, association_id) = self .get_job_elastic_ip(job, region, true) .await .context("could not get elastic ip of job")?; @@ -1583,34 +1402,13 @@ impl Aws { .context("could not disassociate address")?; } - - // get eni_id, sec_ip from elastic ip tags - let (exist, alloc_id, _, sec_ip, eni_id, _) = self + + let (exist, alloc_id, _, _) = self .get_job_elastic_ip(job, region, false) .await .context("could not get elastic ip of job")?; if exist { - let eni_mac = self - .get_eni_mac_address(eni_id.as_str(), region) - .await - .context("could not get eni mac address")?; - let rl_instance_id = self - .get_rate_limiter_instance_id(eni_id.as_str(), region) - .await - .context("could not get rate limiter instance id")?; - let private_ip = self.get_instance_private_ip(instance_id, region) - .await - .context("could not get private ip of instance")?; - - self.remove_rate_limiter_config(job, &rl_instance_id, &sec_ip, &private_ip, &eni_mac, bandwidth) - .await - .context("could not remove rate limiter config")?; - - self.unassign_secondary_ip(eni_id.as_str(), sec_ip.as_str(), region) - .await - .context("could not unassign secondary ip")?; - self.release_address(alloc_id.as_str(), region) .await .context("could not release address")?; @@ -1659,20 +1457,36 @@ impl InfraProvider for Aws { async fn get_job_ip(&self, job: &JobId, region: &str) -> Result { - let (found, _, elastic_ip, _, _, _) = self + let instance = self + .get_job_instance_id(job, region) + .await + .context("could not get instance id for job instance ip")?; + + if !instance.0 { + return Err(anyhow!("Instance not found for job - {}", job.id)); + } + + let instance_ip = self + .get_instance_ip(&instance.1, region) + .await + .context("could not get instance ip")?; + + let (found, _, elastic_ip, _) = self .get_job_elastic_ip(job, region, true) .await .context("could not get job elastic ip")?; - if found { - return Ok(elastic_ip); + // it is possible for the two above to differ while the instance is initializing (maybe + // terminating?), better to error out instead of potentially showing a temporary IP + if found && instance_ip == elastic_ip { + return Ok(instance_ip); } Err(anyhow!("Instance is still initializing")) } async fn check_enclave_running(&mut self, job: &JobId, region: &str) -> Result { - let (exists, instance_id, state) = self + let (exists, _, state, _, _) = self .get_job_instance_id(job, region) .await .context("could not get instance id for job")?; @@ -1833,33 +1647,4 @@ mod tests { let bandwidth_limit = bandwidth_limit_result.unwrap(); println!("Instance Bandwidth Limit: {} bps", bandwidth_limit); } - - // TODO: complete test by adding create instance and assign secondary ip before unassigning - #[tokio::test] - async fn test_unassign_secondary_ip() { - let mut filter = EnvFilter::new("info,aws_config=warn"); - if let Ok(var) = std::env::var("RUST_LOG") { - filter = filter.add_directive(var.parse().unwrap()); - } - tracing_subscriber::fmt() - .with_max_level(tracing::Level::INFO) - .with_env_filter(filter) - .init(); - let aws = Aws::new( - "cp".to_string(), - &["ap-southeast-2".to_string()], - "rlgen".to_string(), - None, - None, - ) - .await; - let eni_id = "eni-0e378f556e57a37df"; // replace with a valid ENI ID for testing - let sec_ip = "172.31.42.188"; // replace with a valid secondary IP for testing - let unassign_result = aws.unassign_secondary_ip(eni_id, sec_ip, "ap-southeast-2").await; - assert!( - unassign_result.is_ok(), - "Unassign secondary IP failed: {:?}", - unassign_result.err() - ); - } } From c4ff0f879debd34ab700e2ef811382af233e61a7 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Wed, 14 Jan 2026 15:52:01 +0530 Subject: [PATCH 11/12] Fix rl config cmd --- operator/control-plane/src/aws.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index a3f1f4069..4ba6fd804 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -1117,11 +1117,11 @@ impl Aws { // Use a script file in rate limit VM, which take sec ip and private ip, bandwidth as args and setup everything let add_rl_cmd = format!( - "sudo ~/add_rl.sh {} {} {} {}", + "add_rl {} {} {} {}", job.id, private_ip, bandwidth * 1000, instance_bandwidth_limit ); - let (_, stderr) = Self::ssh_exec(sess, &add_rl_cmd).context("Failed to run add_rl.sh command")?; + let (_, stderr) = Self::ssh_exec(sess, &add_rl_cmd).context("Failed to run add_rl command")?; if !stderr.is_empty() { error!(stderr = ?stderr, "Error setting up Rate Limiter"); @@ -1357,17 +1357,17 @@ impl Aws { .context("error establishing ssh connection")?; let remove_rl_cmd = format!( - "sudo ~/remove_rl.sh {} {} {}", + "remove_rl {} {} {}", job.id, private_ip, bandwidth * 1000 ); let (_, stderr) = Self::ssh_exec(sess, &remove_rl_cmd) - .context("Failed to run remove_rl.sh command")?; + .context("Failed to run remove_rl command")?; if !stderr.is_empty() { error!(stderr = ?stderr, "Error removing Rate Limiter configuration"); } - return Ok(()); + Ok(()) } // TODO: handle all error cases From 02ac03ef4372bc2e1cee7dc8e59d3371a869aea5 Mon Sep 17 00:00:00 2001 From: Vikrant Garg Date: Wed, 14 Jan 2026 17:35:11 +0530 Subject: [PATCH 12/12] Upgrade ssh2 version to support ssh connection with nixos --- operator/control-plane/Cargo.lock | 93 ++++++++----------------------- operator/control-plane/Cargo.toml | 2 +- operator/control-plane/src/aws.rs | 19 +++---- 3 files changed, 32 insertions(+), 82 deletions(-) diff --git a/operator/control-plane/Cargo.lock b/operator/control-plane/Cargo.lock index e3704c28f..294f68062 100644 --- a/operator/control-plane/Cargo.lock +++ b/operator/control-plane/Cargo.lock @@ -842,7 +842,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.4", + "bitflags", "cexpr", "clang-sys", "itertools", @@ -871,12 +871,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.9.4" @@ -1734,7 +1728,7 @@ checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" dependencies = [ "futures-core", "lock_api", - "parking_lot 0.12.3", + "parking_lot", ] [[package]] @@ -2338,15 +2332,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "ipnet" version = "2.10.1" @@ -2493,9 +2478,9 @@ dependencies = [ [[package]] name = "libssh2-sys" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" +checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" dependencies = [ "cc", "libc", @@ -2637,7 +2622,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.4", + "bitflags", "cfg-if", "cfg_aliases", "libc", @@ -2747,7 +2732,7 @@ version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ - "bitflags 2.9.4", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -2879,17 +2864,6 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.3" @@ -2897,21 +2871,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", - "parking_lot_core 0.9.10", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -2922,7 +2882,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.8", + "redox_syscall", "smallvec", "windows-targets 0.52.6", ] @@ -3084,7 +3044,7 @@ checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.9.4", + "bitflags", "lazy_static", "num-traits", "rand 0.9.2", @@ -3261,22 +3221,13 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.9.4", + "bitflags", ] [[package]] @@ -3531,7 +3482,7 @@ version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ - "bitflags 2.9.4", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3708,7 +3659,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.4", + "bitflags", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -3721,7 +3672,7 @@ version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags 2.9.4", + "bitflags", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -4081,7 +4032,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.4", + "bitflags", "byteorder", "bytes", "crc", @@ -4123,7 +4074,7 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.4", + "bitflags", "byteorder", "crc", "dotenvy", @@ -4220,14 +4171,14 @@ dependencies = [ [[package]] name = "ssh2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7fe461910559f6d5604c3731d00d2aafc4a83d1665922e280f42f9a168d5455" +checksum = "2f84d13b3b8a0d4e91a2629911e951db1bb8671512f5c09d7d4ba34500ba68c8" dependencies = [ - "bitflags 1.3.2", + "bitflags", "libc", "libssh2-sys", - "parking_lot 0.11.2", + "parking_lot", ] [[package]] @@ -4313,7 +4264,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.4", + "bitflags", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -4472,7 +4423,7 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot 0.12.3", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.5.8", @@ -4963,7 +4914,7 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" dependencies = [ - "redox_syscall 0.5.8", + "redox_syscall", "wasite", "web-sys", ] diff --git a/operator/control-plane/Cargo.toml b/operator/control-plane/Cargo.toml index 90fe3e0a1..36a81e014 100644 --- a/operator/control-plane/Cargo.toml +++ b/operator/control-plane/Cargo.toml @@ -28,7 +28,7 @@ serde_json = "1.0.134" shell-escape = "0.1.5" sqlx = { version = "0.8.6", features = ["runtime-tokio-rustls", "postgres", "json"] } ssh-key = { version = "0.6.7", features = ["ed25519"] } -ssh2 = { version = "0.9.4", features = ["vendored-openssl"] } +ssh2 = { version = "0.9.5", features = ["vendored-openssl"] } tokio = { version = "1.42.0", features = ["full", "test-util"] } tokio-stream = "0.1" tracing = "0.1.41" diff --git a/operator/control-plane/src/aws.rs b/operator/control-plane/src/aws.rs index 4ba6fd804..5ced2779f 100644 --- a/operator/control-plane/src/aws.rs +++ b/operator/control-plane/src/aws.rs @@ -192,7 +192,7 @@ impl Aws { sess.set_tcp_stream(tcp); sess.handshake()?; - sess.userauth_pubkey_file("ubuntu", None, Path::new(&self.key_location), None)?; + sess.userauth_pubkey_file("root", None, Path::new(&self.key_location), None)?; info!(ip_address, "SSH connection established"); Ok(sess) } @@ -257,7 +257,7 @@ impl Aws { ) -> Result<(String, String)> { let name_tag = Tag::builder().key("Name").value("JobRunner").build(); let managed_tag = Tag::builder().key("managedBy").value("marlin").build(); - let project_tag = Tag::builder().key("project").value("oyster").build(); + let project_tag = Tag::builder().key("project").value("marlin-cvm").build(); let job_tag = Tag::builder().key("jobId").value(&job.id).build(); let operator_tag = Tag::builder().key("operator").value(&job.operator).build(); let chain_tag = Tag::builder().key("chainID").value(&job.chain).build(); @@ -332,7 +332,7 @@ impl Aws { pub async fn get_security_group(&self, region: &str) -> Result { let filter = Filter::builder() .name("tag:project") - .values("oyster") + .values("marlin-cvm") .build(); Ok(self @@ -355,7 +355,7 @@ impl Aws { pub async fn get_subnet(&self, region: &str) -> Result { let project_filter = Filter::builder() .name("tag:project") - .values("oyster") + .values("marlin-cvm") .build(); let type_filter = Filter::builder() @@ -571,7 +571,7 @@ impl Aws { } let managed_tag = Tag::builder().key("managedBy").value("marlin").build(); - let project_tag = Tag::builder().key("project").value("oyster").build(); + let project_tag = Tag::builder().key("project").value("marlin-cvm").build(); let job_tag = Tag::builder().key("jobId").value(&job.id).build(); let operator_tag = Tag::builder().key("operator").value(&job.operator).build(); let chain_tag = Tag::builder().key("chainID").value(&job.chain).build(); @@ -864,7 +864,7 @@ impl Aws { .build(); let project_tag = aws_sdk_ebs::types::Tag::builder() .key("project") - .value("oyster") + .value("marlin-cvm") .build(); let job_tag = aws_sdk_ebs::types::Tag::builder() .key("jobId") @@ -960,7 +960,7 @@ impl Aws { TagSpecification::builder() .resource_type(ResourceType::Image) .tags(Tag::builder().key("managedBy").value("marlin").build()) - .tags(Tag::builder().key("project").value("oyster").build()) + .tags(Tag::builder().key("project").value("marlin-cvm").build()) .tags(Tag::builder().key("jobId").value(&job.id).build()) .tags(Tag::builder().key("operator").value(&job.operator).build()) .tags( @@ -1176,7 +1176,6 @@ impl Aws { Ok(bandwidth_limit_bps) } - // TODO: return error if all rate limiters are full async fn select_rate_limiter( &self, job: &JobId, @@ -1190,11 +1189,11 @@ impl Aws { // bandwidth is in kbit/sec let project_filter = Filter::builder() .name("tag:project") - .values("oyster") + .values("marlin-cvm") .build(); let rl_filter = Filter::builder() .name("tag:type") - .values("rate-limiter") + .values("limiter") .build(); let res = self .client(region)