From 9d6ebbc1719c475e8f451cec7bb51a10af166a32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matou=C5=A1=20Dzivjak?= Date: Fri, 10 Oct 2025 16:43:08 +0200 Subject: [PATCH 1/2] feat: authenticating when using remote archive Add `auth-token` option to the CLI that allows configuring authentication when fetching registry from a remote archive. `auth-token` is passed and handled separately from the virtual directory path string as the usual `@` character separating userinfo portion of the URL from the host is reserved for the `refspec`. Taking the token from the userinfo portion of the URL would be an alternative to the approach in this commit but would make the parsing a little bit more cumbersome. I am open to both approach and happy to adjust the code to whatever the OpenTelemetry team deems better. Fixes: https://github.com/open-telemetry/weaver/issues/430 --- crates/weaver_codegen_test/build.rs | 4 ++-- crates/weaver_common/src/vdir.rs | 27 ++++++++++++++++++---- crates/weaver_resolver/src/lib.rs | 14 +++++++---- crates/weaver_semconv/src/registry.rs | 2 +- crates/weaver_semconv/src/registry_repo.rs | 5 ++-- crates/weaver_semconv_gen/src/lib.rs | 2 +- src/registry/check.rs | 5 +++- src/registry/diff.rs | 6 +++-- src/registry/emit.rs | 1 + src/registry/generate.rs | 6 ++++- src/registry/mod.rs | 5 ++++ src/registry/resolve.rs | 2 ++ src/registry/search.rs | 2 +- src/registry/stats.rs | 2 +- src/registry/update_markdown.rs | 5 ++-- src/util.rs | 5 ++-- tests/resolution_process.rs | 2 +- 17 files changed, 68 insertions(+), 27 deletions(-) diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index bc3be7e50..bb8532e0c 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -43,8 +43,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = - RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = RegistryRepo::try_new("main", ®istry_path, None) + .unwrap_or_else(|e| process_error(&logger, e)); let semconv_specs = SchemaResolver::load_semconv_specs(®istry_repo, true, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_common/src/vdir.rs b/crates/weaver_common/src/vdir.rs index a9c647c9e..6bf82b891 100644 --- a/crates/weaver_common/src/vdir.rs +++ b/crates/weaver_common/src/vdir.rs @@ -276,7 +276,10 @@ impl VirtualDirectory { /// - Extracting local archives. /// /// Returns an [`Error`] if any operation fails (e.g. network issues, invalid paths, extraction failures). - pub fn try_new(vdir_path: &VirtualDirectoryPath) -> Result { + pub fn try_new( + vdir_path: &VirtualDirectoryPath, + auth_token: Option<&String>, + ) -> Result { let vdir_path_repr = vdir_path.to_string(); let vdir = match vdir_path { LocalFolder { path } => Ok(Self { @@ -297,7 +300,13 @@ impl VirtualDirectory { // Create a temporary directory for the virtual directory that will be deleted // when the `VirtualDirectory` goes out of scope. let tmp_dir = Self::create_tmp_repo()?; - Self::try_from_remote_archive(url, sub_folder.as_ref(), tmp_dir, vdir_path_repr) + Self::try_from_remote_archive( + url, + sub_folder.as_ref(), + tmp_dir, + vdir_path_repr, + auth_token, + ) } }; vdir @@ -593,6 +602,7 @@ impl VirtualDirectory { /// The sub_folder is used to filter the entries inside the archive to unpack. /// The temporary directory is created in the `.weaver/vdir_cache`. /// The temporary directory is deleted when the [`VirtualDirectory`] goes out of scope. + /// The auth_token is used to authenticate with the remote using Bearer schema. /// /// Arguments: /// - `id`: The unique identifier for the registry. @@ -605,11 +615,18 @@ impl VirtualDirectory { sub_folder: Option<&String>, target_dir: TempDir, vdir_path: String, + auth_token: Option<&String>, ) -> Result { let tmp_path = target_dir.path().to_path_buf(); + let mut req = ureq::get(url); + + if let Some(auth_token) = auth_token { + req = req.set("Authorization", format!("Bearer {}", auth_token).as_str()); + }; + // Download the archive from the URL - let response = ureq::get(url).call().map_err(|e| InvalidRegistryArchive { + let response = req.call().map_err(|e| InvalidRegistryArchive { archive: url.to_owned(), error: e.to_string(), })?; @@ -845,7 +862,7 @@ mod tests { let vdir_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = VirtualDirectory::try_new(&vdir_path).unwrap(); + let repo = VirtualDirectory::try_new(&vdir_path, None).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -859,7 +876,7 @@ mod tests { } fn check_archive(vdir_path: VirtualDirectoryPath, file_to_check: Option<&str>) { - let repo = VirtualDirectory::try_new(&vdir_path).unwrap(); + let repo = VirtualDirectory::try_new(&vdir_path, None).unwrap(); let repo_path = repo.path().to_path_buf(); // At this point, the repo should be cloned into a temporary directory. assert!(repo_path.exists()); diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 60cfafa87..fb78a98cb 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -449,7 +449,11 @@ impl SchemaResolver { })) } else { let dependency = &dependencies[0]; - match RegistryRepo::try_new(&dependency.name, &dependency.registry_path) { + match RegistryRepo::try_new( + &dependency.name, + &dependency.registry_path, + None, + ) { Ok(registry_repo_dep) => Some(Self::load_semconv_specs_with_depth( ®istry_repo_dep, true, @@ -660,7 +664,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new("main", ®istry_path, None)?; let result = SchemaResolver::load_semconv_specs(®istry_repo, true, true); match result { WResult::Ok(semconv_specs) => { @@ -692,7 +696,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new("app", ®istry_path, None)?; let result = SchemaResolver::load_semconv_specs(®istry_repo, true, true); match result { @@ -814,7 +818,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new("app", ®istry_path, None)?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -850,7 +854,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new("registry_a", ®istry_path)?; + let registry_repo = RegistryRepo::try_new("registry_a", ®istry_path, None)?; let result = SchemaResolver::load_semconv_specs(®istry_repo, true, true); match result { diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index e209bec72..bd1882a66 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -389,7 +389,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = RegistryRepo::try_new("test", ®istry_path).unwrap(); + let registry_repo = RegistryRepo::try_new("test", ®istry_path, None).unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 74a8daa9b..5aec3e1d1 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -36,10 +36,11 @@ impl RegistryRepo { pub fn try_new( registry_id_if_no_manifest: &str, registry_path: &VirtualDirectoryPath, + auth_token: Option<&String>, ) -> Result { let mut registry_repo = Self { id: Arc::from(registry_id_if_no_manifest), - registry: VirtualDirectory::try_new(registry_path) + registry: VirtualDirectory::try_new(registry_path, auth_token) .map_err(Error::VirtualDirectoryError)?, manifest: None, }; @@ -115,7 +116,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo = RegistryRepo::try_new("main", ®istry_path, None).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( diff --git a/crates/weaver_semconv_gen/src/lib.rs b/crates/weaver_semconv_gen/src/lib.rs index 43b81b479..be8da7f42 100644 --- a/crates/weaver_semconv_gen/src/lib.rs +++ b/crates/weaver_semconv_gen/src/lib.rs @@ -403,7 +403,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new("main", ®istry_path, None)?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, template, diff --git a/src/registry/check.rs b/src/registry/check.rs index 1235b45f2..74340e4f5 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -52,7 +52,7 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result, } /// Set of common parameters used for policy checks. diff --git a/src/registry/resolve.rs b/src/registry/resolve.rs index 45f90516c..5d0aa5485 100644 --- a/src/registry/resolve.rs +++ b/src/registry/resolve.rs @@ -107,6 +107,7 @@ mod tests { }, follow_symlinks: false, include_unreferenced: false, + auth_token: None, }, lineage: true, output: None, @@ -138,6 +139,7 @@ mod tests { }, follow_symlinks: false, include_unreferenced: false, + auth_token: None, }, lineage: true, output: None, diff --git a/src/registry/search.rs b/src/registry/search.rs index 5f2cf3f72..8cd854af2 100644 --- a/src/registry/search.rs +++ b/src/registry/search.rs @@ -368,7 +368,7 @@ pub(crate) fn command(args: &RegistrySearchArgs) -> Result Result Result<(ResolvedRegistry, Option), DiagnosticMessages> { let registry_path = ®istry_args.registry; - let main_registry_repo = RegistryRepo::try_new("main", registry_path)?; + let main_registry_repo = + RegistryRepo::try_new("main", registry_path, registry_args.auth_token.as_ref())?; // Load the semantic convention specs let main_semconv_specs = load_semconv_specs(&main_registry_repo, registry_args.follow_symlinks) @@ -263,7 +264,7 @@ pub(crate) fn prepare_main_registry( .policies .iter() .map(|path| { - VirtualDirectory::try_new(path).map_err(|e| { + VirtualDirectory::try_new(path, registry_args.auth_token.as_ref()).map_err(|e| { DiagnosticMessages::from_error(weaver_common::Error::InvalidVirtualDirectory { path: path.to_string(), error: e.to_string(), diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 67789502a..1440119fb 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -36,7 +36,7 @@ fn test_cli_interface() { sub_folder: Some(SEMCONV_REGISTRY_MODEL.to_owned()), refspec: None, }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| { + let registry_repo = RegistryRepo::try_new("main", ®istry_path, None).unwrap_or_else(|e| { panic!("Failed to create the registry repo, error: {e}"); }); let semconv_specs = SchemaResolver::load_semconv_specs(®istry_repo, true, false) From 989d460ae6ef7463b58f387d8e03a0b904ca1fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matou=C5=A1=20Dzivjak?= Date: Fri, 10 Oct 2025 17:06:49 +0200 Subject: [PATCH 2/2] add tests --- crates/weaver_common/src/test.rs | 52 +++++++++++++++++++++++++++++++- crates/weaver_common/src/vdir.rs | 35 ++++++++++++++++----- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/crates/weaver_common/src/test.rs b/crates/weaver_common/src/test.rs index b230de46f..0ff9de13f 100644 --- a/crates/weaver_common/src/test.rs +++ b/crates/weaver_common/src/test.rs @@ -5,7 +5,7 @@ use paris::error; use std::path::PathBuf; -use rouille::{match_assets, Server}; +use rouille::{match_assets, Response, Server}; use std::sync::mpsc::Sender; /// An error that can occur while starting the HTTP server. @@ -33,13 +33,40 @@ impl ServeStaticFiles { /// Creates a new HTTP server that serves static files from a directory. /// Note: This server is only available for testing purposes. pub fn from(static_path: impl Into) -> Result { + Self::from_impl(static_path, None) + } + + /// Same as [`Self::from`], but requires `Authorization: Bearer ` on every request. + /// Note: This server is only available for testing purposes. + pub fn from_with_bearer( + static_path: impl Into, + token: impl Into, + ) -> Result { + Self::from_impl(static_path, Some(token.into())) + } + + fn from_impl( + static_path: impl Into, + token: Option, + ) -> Result { let static_path = static_path.into(); + let server = Server::new("127.0.0.1:0", move |request| { + if let Some(token) = token.as_ref() { + if !request + .header("Authorization") + .map(|h| h == format!("Bearer {}", token)) + .unwrap_or(false) + { + return Response::text("Unauthorized").with_status_code(401); + } + } match_assets(request, &static_path) }) .map_err(|e| HttpServerError { error: e.to_string(), })?; + let port = server.server_addr().port(); let (_, kill_switch) = server.stoppable(); Ok(Self { kill_switch, port }) @@ -93,4 +120,27 @@ mod tests { assert!(result.is_err()); assert!(matches!(result.unwrap_err(), ureq::Error::Status(404, _))); } + + #[test] + fn test_http_server_with_bearer_auth() { + let token = "token"; + let server = ServeStaticFiles::from_with_bearer("tests/test_data", token).unwrap(); + + let resp = ureq::get(&server.relative_path_to_url("file_a.yaml")).call(); + assert!(resp.is_err()); + assert!(matches!(resp.unwrap_err(), ureq::Error::Status(401, _))); + + let resp = ureq::get(&server.relative_path_to_url("file_a.yaml")) + .set("Authorization", "wrong_token") + .call(); + assert!(resp.is_err()); + assert!(matches!(resp.unwrap_err(), ureq::Error::Status(401, _))); + + let content = ureq::get(&server.relative_path_to_url("file_a.yaml")) + .set("Authorization", &format!("Bearer {}", token)) + .call() + .unwrap(); + assert_eq!(content.status(), 200); + assert_eq!(content.into_string().unwrap(), "file: A"); + } } diff --git a/crates/weaver_common/src/vdir.rs b/crates/weaver_common/src/vdir.rs index 6bf82b891..20bab1876 100644 --- a/crates/weaver_common/src/vdir.rs +++ b/crates/weaver_common/src/vdir.rs @@ -875,8 +875,12 @@ mod tests { assert!(repo_path.exists()); } - fn check_archive(vdir_path: VirtualDirectoryPath, file_to_check: Option<&str>) { - let repo = VirtualDirectory::try_new(&vdir_path, None).unwrap(); + fn check_archive( + vdir_path: VirtualDirectoryPath, + file_to_check: Option<&str>, + auth_token: Option<&String>, + ) { + let repo = VirtualDirectory::try_new(&vdir_path, auth_token).unwrap(); let repo_path = repo.path().to_path_buf(); // At this point, the repo should be cloned into a temporary directory. assert!(repo_path.exists()); @@ -902,7 +906,7 @@ mod tests { sub_folder: Some("model".to_owned()), refspec: None, }; - check_archive(registry_path, None); + check_archive(registry_path, None, None); } #[test] @@ -910,7 +914,7 @@ mod tests { let registry_path = "../../test_data/semantic-conventions-1.26.0.tar.gz[model]" .parse::() .unwrap(); - check_archive(registry_path, Some("general.yaml")); + check_archive(registry_path, Some("general.yaml"), None); } #[test] @@ -918,7 +922,7 @@ mod tests { let registry_path = "../../test_data/semantic-conventions-1.26.0.zip[model]" .parse::() .unwrap(); - check_archive(registry_path, Some("general.yaml")); + check_archive(registry_path, Some("general.yaml"), None); } #[test] @@ -930,7 +934,7 @@ mod tests { ) .parse::() .unwrap(); - check_archive(registry_path, Some("general.yaml")); + check_archive(registry_path, Some("general.yaml"), None); } #[test] @@ -942,6 +946,23 @@ mod tests { ) .parse::() .unwrap(); - check_archive(registry_path, Some("general.yaml")); + check_archive(registry_path, Some("general.yaml"), None); + } + + #[test] + fn test_semconv_registry_authentication() { + let token = "token"; + let server = ServeStaticFiles::from_with_bearer("tests/test_data", token).unwrap(); + let registry_path = format!( + "{}[model]", + server.relative_path_to_url("semconv_registry_v1.26.0.zip") + ) + .parse::() + .unwrap(); + check_archive( + registry_path, + Some("general.yaml"), + Some(&token.to_string()), + ); } }