Skip to content

Commit 4042e61

Browse files
authored
feat(fetchers): add PackageRegistryFetcher for PyPI, crates.io, npm (#65)
## What Adds a `PackageRegistryFetcher` handling PyPI, crates.io, and npm package URLs with structured metadata via their JSON APIs. Closes #54 ## Why Agents evaluating dependencies need quick access to version, license, and dependency info without parsing noisy registry HTML. Directly supports the AGENTS.md requirement to avoid non-permissive licenses. ## How - Single fetcher with sub-matching for three registries - PyPI: `pypi.org/project/{name}` via `pypi.org/pypi/{name}/json` - crates.io: `crates.io/crates/{name}` via `crates.io/api/v1/crates/{name}` - npm: `npmjs.com/package/{name}` via `registry.npmjs.org/{name}` - Supports version-specific PyPI URLs and scoped npm packages - Format field: `"package_registry"` ## Risk - Low ### Checklist - [x] Unit tests passed - [x] Clippy clean - [x] Formatting applied
1 parent ad3d30a commit 4042e61

File tree

3 files changed

+525
-4
lines changed

3 files changed

+525
-4
lines changed

crates/fetchkit/src/fetchers/mod.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod docs_site;
88
mod github_code;
99
mod github_issue;
1010
mod github_repo;
11+
mod package_registry;
1112
mod stackoverflow;
1213
mod twitter;
1314

@@ -16,6 +17,7 @@ pub use docs_site::DocsSiteFetcher;
1617
pub use github_code::GitHubCodeFetcher;
1718
pub use github_issue::GitHubIssueFetcher;
1819
pub use github_repo::GitHubRepoFetcher;
20+
pub use package_registry::PackageRegistryFetcher;
1921
pub use stackoverflow::StackOverflowFetcher;
2022
pub use twitter::TwitterFetcher;
2123

@@ -135,6 +137,7 @@ impl FetcherRegistry {
135137
registry.register(Box::new(GitHubRepoFetcher::new()));
136138
registry.register(Box::new(TwitterFetcher::new()));
137139
registry.register(Box::new(StackOverflowFetcher::new()));
140+
registry.register(Box::new(PackageRegistryFetcher::new()));
138141
// DocsSiteFetcher for docs sites and llms.txt
139142
registry.register(Box::new(DocsSiteFetcher::new()));
140143
// Default fetcher last (catches all remaining URLs)
@@ -293,14 +296,15 @@ mod tests {
293296
#[test]
294297
fn test_registry_with_defaults() {
295298
let registry = FetcherRegistry::with_defaults();
296-
assert_eq!(registry.fetchers.len(), 7);
299+
assert_eq!(registry.fetchers.len(), 8);
297300
assert_eq!(registry.fetchers[0].name(), "github_code");
298301
assert_eq!(registry.fetchers[1].name(), "github_issue");
299302
assert_eq!(registry.fetchers[2].name(), "github_repo");
300303
assert_eq!(registry.fetchers[3].name(), "twitter_tweet");
301304
assert_eq!(registry.fetchers[4].name(), "stackoverflow");
302-
assert_eq!(registry.fetchers[5].name(), "docs_site");
303-
assert_eq!(registry.fetchers[6].name(), "default");
305+
assert_eq!(registry.fetchers[5].name(), "package_registry");
306+
assert_eq!(registry.fetchers[6].name(), "docs_site");
307+
assert_eq!(registry.fetchers[7].name(), "default");
304308
}
305309

306310
#[test]

0 commit comments

Comments
 (0)