Skip to content

Commit ad3d30a

Browse files
authored
feat(fetchers): add StackOverflowFetcher for clean Q&A extraction (#64)
## What Adds a `StackOverflowFetcher` for Stack Overflow and Stack Exchange network question URLs, returning structured Q&A content via the Stack Exchange API. Closes #53 ## Why Coding agents frequently encounter Stack Overflow links. The DefaultFetcher returns full pages with ads, sidebars, and noise. This fetcher yields just the signal: question, answers, and votes. ## How - Matches `stackoverflow.com/questions/{id}` and SE network sites (serverfault, superuser, askubuntu, *.stackexchange.com) - Fetches via Stack Exchange API v2.3 with `withbody_markdown` filter - Returns: question title, body, score, views, tags, author, top N answers sorted by votes - Accepted answers marked with indicator - Format field: `"stackoverflow_qa"` ## Risk - Low - Only adds a new fetcher; no changes to existing behavior ### Checklist - [x] Unit tests passed - [x] Clippy clean (`-D warnings`) - [x] Docs build without warnings - [x] Formatting applied
1 parent 0276684 commit ad3d30a

File tree

3 files changed

+435
-6
lines changed

3 files changed

+435
-6
lines changed

crates/fetchkit/src/fetchers/mod.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ mod docs_site;
88
mod github_code;
99
mod github_issue;
1010
mod github_repo;
11+
mod stackoverflow;
1112
mod twitter;
1213

1314
pub use default::DefaultFetcher;
1415
pub use docs_site::DocsSiteFetcher;
1516
pub use github_code::GitHubCodeFetcher;
1617
pub use github_issue::GitHubIssueFetcher;
1718
pub use github_repo::GitHubRepoFetcher;
19+
pub use stackoverflow::StackOverflowFetcher;
1820
pub use twitter::TwitterFetcher;
1921

2022
use crate::client::FetchOptions;
@@ -121,8 +123,9 @@ impl FetcherRegistry {
121123
/// 2. GitHubIssueFetcher - handles GitHub issue/PR URLs
122124
/// 3. GitHubRepoFetcher - handles GitHub repository URLs
123125
/// 4. TwitterFetcher - handles Twitter/X tweet URLs
124-
/// 5. DocsSiteFetcher - handles docs sites and llms.txt URLs
125-
/// 6. DefaultFetcher - handles all remaining HTTP/HTTPS URLs
126+
/// 5. StackOverflowFetcher - handles Stack Exchange Q&A URLs
127+
/// 6. DocsSiteFetcher - handles docs sites and llms.txt URLs
128+
/// 7. DefaultFetcher - handles all remaining HTTP/HTTPS URLs
126129
pub fn with_defaults() -> Self {
127130
let mut registry = Self::new();
128131
// Register specialized fetchers first (higher priority)
@@ -131,6 +134,7 @@ impl FetcherRegistry {
131134
registry.register(Box::new(GitHubIssueFetcher::new()));
132135
registry.register(Box::new(GitHubRepoFetcher::new()));
133136
registry.register(Box::new(TwitterFetcher::new()));
137+
registry.register(Box::new(StackOverflowFetcher::new()));
134138
// DocsSiteFetcher for docs sites and llms.txt
135139
registry.register(Box::new(DocsSiteFetcher::new()));
136140
// Default fetcher last (catches all remaining URLs)
@@ -289,13 +293,14 @@ mod tests {
289293
#[test]
290294
fn test_registry_with_defaults() {
291295
let registry = FetcherRegistry::with_defaults();
292-
assert_eq!(registry.fetchers.len(), 6);
296+
assert_eq!(registry.fetchers.len(), 7);
293297
assert_eq!(registry.fetchers[0].name(), "github_code");
294298
assert_eq!(registry.fetchers[1].name(), "github_issue");
295299
assert_eq!(registry.fetchers[2].name(), "github_repo");
296300
assert_eq!(registry.fetchers[3].name(), "twitter_tweet");
297-
assert_eq!(registry.fetchers[4].name(), "docs_site");
298-
assert_eq!(registry.fetchers[5].name(), "default");
301+
assert_eq!(registry.fetchers[4].name(), "stackoverflow");
302+
assert_eq!(registry.fetchers[5].name(), "docs_site");
303+
assert_eq!(registry.fetchers[6].name(), "default");
299304
}
300305

301306
#[test]

0 commit comments

Comments
 (0)