diff --git a/.changeset/web-search-initial.md b/.changeset/web-search-initial.md new file mode 100644 index 0000000..87dc4df --- /dev/null +++ b/.changeset/web-search-initial.md @@ -0,0 +1,42 @@ +--- +'@link-assistant/web-search': minor +--- + +Add web search microservice with multi-provider aggregation + +**Features:** + +- Multi-provider search aggregation (Google, DuckDuckGo, Bing) +- Multiple merge strategies: Reciprocal Rank Fusion (RRF), weighted scoring, interleaving +- Configurable provider weights for reranking +- URL normalization for proper deduplication across providers +- API-first design with fallback to web scraping +- browser-commander integration for direct browser search testing + +**JavaScript Library:** + +- Search provider interfaces with API support and scraping fallback +- Result merger with RRF, weighted, and interleave strategies +- WebSearchEngine class for multi-provider search +- Express.js REST API server +- CLI tool for command-line usage + +**Rust Library:** + +- Async search providers using reqwest and scraper +- Result merger with same strategies as JavaScript version +- WebSearchEngine with async search +- Axum REST API server +- CLI tool with clap + +**REST API Endpoints:** + +- GET /search?q= - Search all providers +- POST /search - Search with options in body +- GET /search/:provider?q= - Search single provider +- GET /providers - List available providers +- GET /health - Health check + +**CI/CD:** + +- Added rust.yml workflow for Rust CI (lint, test matrix, build) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 78f532a..3961e89 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -309,7 +309,7 @@ jobs: - name: Run tests (Deno) if: matrix.runtime == 'deno' - run: deno test --allow-read + run: deno test --allow-read --allow-env # Release - only runs on main after tests pass (for push events) release: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..e49158f --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,117 @@ +name: Rust CI + +on: + push: + branches: + - main + paths: + - 'rust/**' + - '.github/workflows/rust.yml' + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'rust/**' + - '.github/workflows/rust.yml' + +concurrency: + group: rust-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: -Dwarnings + +defaults: + run: + working-directory: rust + +jobs: + # === LINT AND FORMAT CHECK === + lint: + name: Lint and Format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-${{ hashFiles('rust/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Run Clippy + run: cargo clippy --all-targets --all-features + + # === TEST === + test: + name: Test (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + needs: [lint] + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-${{ hashFiles('rust/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Run tests + run: cargo test --all-features --verbose + + - name: Run doc tests + run: cargo test --doc --verbose + + # === BUILD === + build: + name: Build Package + runs-on: ubuntu-latest + needs: [lint, test] + if: always() && !cancelled() && needs.lint.result == 'success' && needs.test.result == 'success' + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + rust/target + key: ${{ runner.os }}-cargo-build-${{ hashFiles('rust/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-build- + + - name: Build release + run: cargo build --release --verbose + + - name: Check package + run: cargo package --list diff --git a/.prettierignore b/.prettierignore index 7790297..82cf46f 100644 --- a/.prettierignore +++ b/.prettierignore @@ -5,5 +5,7 @@ dist package-lock.json .eslintcache CLAUDE.md +# Rust build artifacts +rust/target # Case study raw data files (downloaded from external sources) docs/case-studies/*/data/ diff --git a/README.md b/README.md index bd05bf6..7792477 100644 --- a/README.md +++ b/README.md @@ -1,196 +1,288 @@ -# js-ai-driven-development-pipeline-template +# @link-assistant/web-search -A comprehensive template for AI-driven JavaScript/TypeScript development with full CI/CD pipeline support. +A web search microservice that aggregates results from multiple search engines with intelligent result merging and reranking. ## Features +- **Multi-provider search**: Aggregate results from Google, DuckDuckGo, and Bing +- **Result merging**: Combine results using RRF, weighted scoring, or interleaving +- **Configurable weights**: Adjust provider weights for custom reranking +- **URL deduplication**: Automatic normalization and deduplication across providers +- **Browser testing**: Integration with browser-commander for direct browser search - **Multi-runtime support**: Works with Bun, Node.js, and Deno -- **Universal testing**: Uses [test-anywhere](https://github.com/link-foundation/test-anywhere) for cross-runtime tests -- **Automated releases**: Changesets-based versioning with GitHub Actions -- **Code quality**: ESLint + Prettier with pre-commit hooks via Husky -- **Package manager agnostic**: Works with bun, npm, yarn, pnpm, and deno + +## Installation + +```bash +# With npm +npm install @link-assistant/web-search + +# With bun +bun add @link-assistant/web-search + +# With yarn +yarn add @link-assistant/web-search +``` ## Quick Start -### Using This Template +### As a Library + +```javascript +import { + WebSearchEngine, + createSearchEngine, +} from '@link-assistant/web-search'; + +// Create a search engine +const engine = createSearchEngine(); + +// Search across all providers +const results = await engine.search('artificial intelligence'); -1. Click "Use this template" on GitHub to create a new repository -2. Clone your new repository -3. Update `package.json` with your package name and description -4. Update the `PACKAGE_NAME` constant in these scripts: - - `scripts/validate-changeset.mjs` - - `scripts/merge-changesets.mjs` - - `scripts/publish-to-npm.mjs` - - `scripts/format-release-notes.mjs` - - `scripts/create-manual-changeset.mjs` -5. Install dependencies: `bun install` -6. Start developing! +// Search with options +const results = await engine.search('machine learning', { + limit: 20, + providers: ['google', 'duckduckgo'], + strategy: 'rrf', + weights: { google: 1.5, duckduckgo: 1.0 }, +}); -### Development +// Search single provider +const googleResults = await engine.searchSingle('deep learning', 'google'); +``` + +### As a REST API Server ```bash -# Install dependencies -bun install +# Start the server +npx web-search serve --port 3000 -# Run tests -bun test +# Or with bun +bunx web-search serve --port 3000 +``` -# Or with other runtimes: -npm test -deno test --allow-read +API Endpoints: -# Lint code -bun run lint +- `GET /search?q=` - Search all providers +- `POST /search` - Search with options in body +- `GET /search/:provider?q=` - Search single provider +- `GET /providers` - List available providers +- `GET /health` - Health check -# Format code -bun run format +Example: + +```bash +curl "http://localhost:3000/search?q=rust+programming&limit=10&strategy=rrf" +``` + +### As a CLI Tool + +```bash +# Search from command line +npx web-search "artificial intelligence" + +# With options +npx web-search "machine learning" --limit 20 --providers google,bing --format json + +# Output just URLs +npx web-search "deep learning" --format urls +``` + +## Merge Strategies + +### Reciprocal Rank Fusion (RRF) + +Default strategy. Combines results by their rank positions across providers. + +```javascript +const results = await engine.search(query, { strategy: 'rrf' }); +``` -# Check all (lint + format + file size) -bun run check +### Weighted Scoring + +Score results based on provider weights and rank positions. + +```javascript +const results = await engine.search(query, { + strategy: 'weighted', + weights: { google: 2.0, duckduckgo: 1.0, bing: 0.5 }, +}); ``` -## Project Structure +### Interleaving + +Round-robin style interleaving of results from each provider. +```javascript +const results = await engine.search(query, { strategy: 'interleave' }); ``` -. -├── .changeset/ # Changeset configuration -├── .github/workflows/ # GitHub Actions CI/CD -├── .husky/ # Git hooks (pre-commit) -├── examples/ # Usage examples -├── scripts/ # Build and release scripts -├── src/ # Source code -│ ├── index.js # Main entry point -│ └── index.d.ts # TypeScript definitions -├── tests/ # Test files -├── .eslintrc.js # ESLint configuration -├── .prettierrc # Prettier configuration -├── bunfig.toml # Bun configuration -├── deno.json # Deno configuration -└── package.json # Node.js package manifest + +## Search Providers + +### Google + +- Uses Custom Search API when credentials are configured +- Falls back to web scraping otherwise + +```javascript +import { GoogleProvider } from '@link-assistant/web-search'; + +const provider = new GoogleProvider({ + apiKey: 'your-api-key', + searchEngineId: 'your-cx-id', +}); ``` -## Design Choices +### DuckDuckGo -### Multi-Runtime Support +- Uses HTML scraping (no API required) -This template is designed to work seamlessly with all major JavaScript runtimes: +```javascript +import { DuckDuckGoProvider } from '@link-assistant/web-search'; -- **Bun**: Primary runtime with highest performance, uses native test support (`bun test`) -- **Node.js**: Alternative runtime, uses built-in test runner (`node --test`) -- **Deno**: Secure runtime with built-in TypeScript support (`deno test`) +const provider = new DuckDuckGoProvider(); +``` -The [test-anywhere](https://github.com/link-foundation/test-anywhere) framework provides a unified testing API that works identically across all runtimes. +### Bing -### Package Manager Agnostic +- Uses Web Search API when configured +- Falls back to web scraping otherwise -While `package.json` is the source of truth for dependencies, the template supports: +```javascript +import { BingProvider } from '@link-assistant/web-search'; -- **bun**: Primary choice, uses `bun.lockb` -- **npm**: Uses `package-lock.json` -- **yarn**: Uses `yarn.lock` -- **pnpm**: Uses `pnpm-lock.yaml` -- **deno**: Uses `deno.json` for configuration +const provider = new BingProvider({ + apiKey: 'your-bing-api-key', +}); +``` -Note: `package-lock.json` is not committed by default to allow any package manager. +### Browser-Based Search -### Code Quality +- Uses browser-commander for direct browser search +- Useful for testing and when scraping is blocked -- **ESLint**: Configured with recommended rules + Prettier integration -- **Prettier**: Consistent code formatting -- **Husky + lint-staged**: Pre-commit hooks ensure code quality -- **File size limit**: Scripts must stay under 1000 lines for maintainability +```javascript +import { createBrowserProvider } from '@link-assistant/web-search'; -### Release Workflow +const provider = createBrowserProvider({ + engine: 'google', + browserOptions: { headless: true }, +}); +``` -The release workflow uses [Changesets](https://github.com/changesets/changesets) for version management: +## API Reference -1. **Creating a changeset**: Run `bun run changeset` to document changes -2. **PR validation**: CI checks for valid changeset in each PR -3. **Automated versioning**: Merging to `main` triggers version bump -4. **npm publishing**: Automated via OIDC trusted publishing (no tokens needed) -5. **GitHub releases**: Auto-created with formatted release notes +### WebSearchEngine -#### Manual Releases +```javascript +const engine = new WebSearchEngine(config); -Two manual release modes are available via GitHub Actions: +// Search methods +await engine.search(query, options); +await engine.searchSingle(query, providerName, options); -- **Instant release**: Immediately bump version and publish -- **Changeset PR**: Create a PR with changeset for review +// Provider management +engine.getAvailableProviders(); +engine.getProviderStatus(); +engine.setProviderWeight(name, weight); +engine.setProviderEnabled(name, enabled); +engine.getProvider(name); +``` -### CI/CD Pipeline +### Merge Functions + +```javascript +import { + mergeResults, + mergeWithRRF, + mergeWithWeights, + mergeWithInterleave, +} from '@link-assistant/web-search'; + +// Merge results from multiple providers +const merged = mergeResults(resultsByProvider, { + strategy: 'rrf', + weights: { google: 1.5 }, + rrfK: 60, + removeDuplicates: true, +}); +``` -The GitHub Actions workflow (`.github/workflows/release.yml`) provides: +## Rust Library -1. **Changeset check**: Validates PR has exactly one changeset (added by that PR) -2. **Lint & format**: Ensures code quality standards -3. **Test matrix**: 3 runtimes × 3 OS = 9 test combinations -4. **Changeset merge**: Combines multiple pending changesets at release time -5. **Release**: Automated versioning and npm publishing +A Rust implementation is also available in the `rust/` directory. -#### Robust Changeset Handling +```bash +cd rust +cargo build --release +``` -The CI/CD pipeline is designed to handle concurrent PRs gracefully: +### Rust CLI -- **PR Validation**: Only validates changesets **added by the current PR**, not pre-existing ones from other merged PRs. This prevents false failures when multiple PRs merge before a release cycle completes. +```bash +# Search +./target/release/web-search "artificial intelligence" --limit 10 -- **Release-time Merging**: If multiple changesets exist when releasing, they are automatically merged into a single changeset with: - - The highest version bump type (major > minor > patch) - - All descriptions preserved in chronological order +# Start server +./target/release/web-search serve --port 3000 +``` -This design decouples PR validation from the need to pull changes from the default branch, reducing conflicts and ensuring that even if CI/CD fails, all unpublished changesets will still get published when the error is resolved. +### Rust Library Usage -## Configuration +```rust +use web_search::{WebSearchEngine, SearchOptions, MergeStrategy}; -### Updating Package Name +let engine = WebSearchEngine::new(); -After creating a repository from this template, update the package name in: +let results = engine.search_with_options( + "machine learning", + SearchOptions { limit: Some(10), ..Default::default() }, + None, + Some(MergeOptions { strategy: MergeStrategy::Rrf, ..Default::default() }) +).await?; +``` -1. `package.json`: `"name": "your-package-name"` -2. `.changeset/config.json`: Package references -3. Scripts that reference the package name (see Quick Start) +## Development -### ESLint Rules +```bash +# Install dependencies +bun install -Customize ESLint in `eslint.config.js`. Current configuration: +# Run tests +bun test -- ES Modules support -- Prettier integration -- No console restrictions (common in CLI tools) -- Strict equality enforcement -- Async/await best practices -- **Strict unused variables rule**: No exceptions - all unused variables, arguments, and caught errors must be removed (no `_` prefix exceptions) +# Run with other runtimes +npm test +deno test --allow-read --allow-net -### Prettier Options +# Lint code +bun run lint -Configured in `.prettierrc`: +# Format code +bun run format +``` + +### Rust Development + +```bash +cd rust -- Single quotes -- Semicolons -- 2-space indentation -- 80-character line width -- ES5 trailing commas -- LF line endings +# Run tests +cargo test -## Scripts Reference +# Run clippy +cargo clippy -| Script | Description | -| ---------------------- | --------------------------------------- | -| `bun test` | Run tests with Bun | -| `bun run lint` | Check code with ESLint | -| `bun run lint:fix` | Fix ESLint issues automatically | -| `bun run format` | Format code with Prettier | -| `bun run format:check` | Check formatting without changing files | -| `bun run check` | Run all checks (lint + format) | -| `bun run changeset` | Create a new changeset | +# Format code +cargo fmt +``` -## Contributing +## Environment Variables -1. Fork the repository -2. Create a feature branch: `git checkout -b feature/my-feature` -3. Make your changes -4. Create a changeset: `bun run changeset` -5. Commit your changes (pre-commit hooks will run automatically) -6. Push and create a Pull Request +- `GOOGLE_API_KEY` - Google Custom Search API key +- `GOOGLE_SEARCH_ENGINE_ID` - Google Custom Search Engine ID +- `BING_API_KEY` - Bing Web Search API key ## License diff --git a/bin/web-search.js b/bin/web-search.js new file mode 100644 index 0000000..bb36330 --- /dev/null +++ b/bin/web-search.js @@ -0,0 +1,276 @@ +#!/usr/bin/env node +/** + * Web Search CLI + * Command-line interface for web search aggregation + * + * Usage: + * web-search [options] Search the web + * web-search --serve [--port 3000] Start as API server + */ + +import { fileURLToPath } from 'url'; +import { dirname, resolve } from 'path'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// eslint-disable-next-line complexity -- CLI argument parsing inherently has many branches +function parseArgs(args) { + const result = { + query: null, + serve: false, + port: 3000, + providers: null, + limit: 10, + strategy: 'rrf', + format: 'text', + language: null, + region: null, + safeSearch: false, + help: false, + version: false, + verbose: false, + }; + + const positional = []; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + + if (arg === '--serve' || arg === '-s') { + result.serve = true; + } else if (arg === '--port' || arg === '-p') { + result.port = parseInt(args[++i], 10) || 3000; + } else if (arg === '--providers') { + result.providers = args[++i]?.split(',').map((p) => p.trim()); + } else if (arg === '--limit' || arg === '-l') { + result.limit = parseInt(args[++i], 10) || 10; + } else if (arg === '--strategy') { + result.strategy = args[++i] || 'rrf'; + } else if (arg === '--format' || arg === '-f') { + result.format = args[++i] || 'text'; + } else if (arg === '--language' || arg === '--lang') { + result.language = args[++i]; + } else if (arg === '--region') { + result.region = args[++i]; + } else if (arg === '--safe' || arg === '--safeSearch') { + result.safeSearch = true; + } else if (arg === '--help' || arg === '-h') { + result.help = true; + } else if (arg === '--version' || arg === '-v') { + result.version = true; + } else if (arg === '--verbose' || arg === '-V') { + result.verbose = true; + } else if (!arg.startsWith('-')) { + positional.push(arg); + } + } + + if (positional.length > 0) { + result.query = positional.join(' '); + } + + return result; +} + +function showHelp() { + console.log(` +web-search - Multi-provider web search aggregator + +Usage: + web-search [options] Search the web + web-search --serve [--port ] Start as API server + +Search Options: + --providers Comma-separated list of providers (google,duckduckgo,bing) + --limit, -l Maximum results per provider (default: 10) + --strategy Merge strategy: rrf, weighted, interleave (default: rrf) + --language Language code (e.g., en, de) + --region Region code (e.g., us, de) + --safe Enable safe search filtering + +Output Options: + --format, -f Output format: text, json, urls (default: text) + --verbose, -V Show detailed output + +Server Options: + --serve, -s Start as HTTP API server + --port, -p Port to listen on (default: 3000) + +General Options: + --help, -h Show this help message + --version, -v Show version number + +Environment Variables: + GOOGLE_API_KEY Google Custom Search API key + GOOGLE_CX Google Custom Search Engine ID + BING_API_KEY Bing Search API key + PORT Server port (default: 3000) + +Examples: + web-search "javascript tutorial" + web-search "rust programming" --providers google,duckduckgo --limit 5 + web-search "climate change" --format json | jq . + web-search --serve --port 8080 + +API Endpoints (in server mode): + GET /search?q= Search all providers + POST /search Search with JSON body + GET /search/:provider?q= Search single provider + GET /providers List available providers + GET /health Health check +`); +} + +async function showVersion() { + const fs = await import('fs'); + const packagePath = resolve(__dirname, '..', 'package.json'); + const packageJson = JSON.parse(fs.readFileSync(packagePath, 'utf-8')); + console.log(`web-search v${packageJson.version}`); +} + +async function startServer(port) { + const { app } = await import('../src/server.js'); + + return new Promise((resolve, reject) => { + const server = app.listen(port, () => { + console.log(`Web Search API listening on http://localhost:${port}`); + console.log(''); + console.log('Available endpoints:'); + console.log(' GET /search?q= - Search all providers'); + console.log(' POST /search - Search with JSON body'); + console.log( + ' GET /search/:provider?q= - Search single provider' + ); + console.log(' GET /providers - List available providers'); + console.log(' GET /health - Health check'); + console.log(''); + console.log('Press Ctrl+C to stop the server'); + resolve(server); + }); + + server.on('error', reject); + + function shutdown(signal) { + console.log(`\nReceived ${signal}, shutting down...`); + server.close(() => { + console.log('Server closed'); + process.exit(0); + }); + setTimeout(() => { + console.error('Force exiting after 2s'); + process.exit(1); + }, 2000); + } + + process.on('SIGTERM', () => shutdown('SIGTERM')); + process.on('SIGINT', () => shutdown('SIGINT')); + }); +} + +async function performSearch(query, options) { + const { WebSearchEngine } = await import('../src/search.js'); + + const searchEngine = new WebSearchEngine({ + providers: options.providers || ['duckduckgo', 'google', 'bing'], + google: { + apiKey: process.env.GOOGLE_API_KEY, + searchEngineId: process.env.GOOGLE_CX, + }, + bing: { + apiKey: process.env.BING_API_KEY, + }, + }); + + const searchOptions = { + providers: options.providers, + limit: options.limit, + language: options.language, + region: options.region, + safeSearch: options.safeSearch, + strategy: options.strategy, + }; + + if (options.verbose) { + console.error(`Searching for: "${query}"`); + console.error(`Providers: ${options.providers?.join(', ') || 'all'}`); + console.error(`Strategy: ${options.strategy}`); + console.error(''); + } + + const results = await searchEngine.search(query, searchOptions); + + if (options.format === 'json') { + console.log( + JSON.stringify( + { + query, + count: results.length, + results, + }, + null, + 2 + ) + ); + } else if (options.format === 'urls') { + for (const result of results) { + console.log(result.url); + } + } else { + if (results.length === 0) { + console.log('No results found.'); + return; + } + + console.log(`Found ${results.length} results for "${query}":\n`); + + for (const result of results) { + console.log(`${result.rank}. ${result.title}`); + console.log(` ${result.url}`); + if (result.snippet) { + console.log( + ` ${result.snippet.slice(0, 150)}${result.snippet.length > 150 ? '...' : ''}` + ); + } + console.log(` [${result.sources?.join(', ') || result.source}]`); + console.log(''); + } + } +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + + if (args.help) { + showHelp(); + return; + } + + if (args.version) { + await showVersion(); + return; + } + + if (args.serve) { + await startServer(args.port); + return; + } + + if (!args.query) { + console.error('Error: Missing search query'); + console.error('Run with --help for usage information'); + process.exit(1); + } + + try { + await performSearch(args.query, args); + } catch (error) { + console.error('Error:', error.message); + process.exit(1); + } +} + +main().catch((err) => { + console.error('Fatal error:', err.message); + process.exit(1); +}); diff --git a/eslint.config.js b/eslint.config.js index 25498b0..dbeef78 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -22,6 +22,14 @@ export default [ __filename: 'readonly', // Node.js 18+ globals fetch: 'readonly', + URL: 'readonly', + URLSearchParams: 'readonly', + setTimeout: 'readonly', + setInterval: 'readonly', + clearTimeout: 'readonly', + clearInterval: 'readonly', + // Browser globals (for browser-commander integration) + document: 'readonly', // Runtime-specific globals Bun: 'readonly', Deno: 'readonly', diff --git a/package-lock.json b/package-lock.json index 032b12f..7b6a6c9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,13 +1,19 @@ { - "name": "my-package", + "name": "@link-assistant/web-search", "version": "0.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "my-package", + "name": "@link-assistant/web-search", "version": "0.5.0", "license": "Unlicense", + "dependencies": { + "express": "^4.21.2" + }, + "bin": { + "web-search": "bin/web-search.js" + }, "devDependencies": { "@changesets/cli": "^2.29.7", "eslint": "^9.38.0", @@ -860,6 +866,19 @@ "dev": true, "license": "MIT" }, + "node_modules/accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "license": "MIT", + "dependencies": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/acorn": { "version": "8.15.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", @@ -959,6 +978,12 @@ "dev": true, "license": "Python-2.0" }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", + "license": "MIT" + }, "node_modules/array-union": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", @@ -1030,6 +1055,57 @@ "node": ">=8.9" } }, + "node_modules/body-parser": { + "version": "1.20.4", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz", + "integrity": "sha512-ZTgYYLMOXY9qKU/57FAo8F+HA2dGX7bqGc71txDRC1rS4frdFI5R7NhluHxH6M0YItAP0sHB4uqAOcYKxO6uGA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "content-type": "~1.0.5", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "~1.2.0", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "on-finished": "~2.4.1", + "qs": "~6.14.0", + "raw-body": "~2.5.3", + "type-is": "~1.6.18", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, + "node_modules/body-parser/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/body-parser/node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/body-parser/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -1058,7 +1134,6 @@ "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.8" @@ -1068,7 +1143,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -1082,7 +1156,6 @@ "version": "1.0.4", "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -1301,6 +1374,42 @@ "@babel/types": "^7.6.1" } }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.7.tgz", + "integrity": "sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==", + "license": "MIT" + }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -1341,6 +1450,25 @@ "dev": true, "license": "MIT" }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==", + "license": "MIT", + "engines": { + "node": ">= 0.8", + "npm": "1.2.8000 || >= 1.4.16" + } + }, "node_modules/detect-indent": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.1.0.tgz", @@ -1375,7 +1503,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -1386,6 +1513,12 @@ "node": ">= 0.4" } }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, "node_modules/emoji-regex": { "version": "10.6.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.6.0.tgz", @@ -1393,6 +1526,15 @@ "dev": true, "license": "MIT" }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/end-of-stream": { "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", @@ -1434,7 +1576,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -1444,7 +1585,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -1454,7 +1594,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0" @@ -1463,6 +1602,12 @@ "node": ">= 0.4" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -1756,6 +1901,15 @@ "node": ">=0.10.0" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/eventemitter3": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.1.tgz", @@ -1810,6 +1964,67 @@ "dev": true, "license": "ISC" }, + "node_modules/express": { + "version": "4.22.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz", + "integrity": "sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==", + "license": "MIT", + "dependencies": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "~1.20.3", + "content-disposition": "~0.5.4", + "content-type": "~1.0.4", + "cookie": "~0.7.1", + "cookie-signature": "~1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.3.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.0", + "merge-descriptors": "1.0.3", + "methods": "~1.1.2", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "~0.1.12", + "proxy-addr": "~2.0.7", + "qs": "~6.14.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "~0.19.0", + "serve-static": "~1.16.2", + "setprototypeof": "1.2.0", + "statuses": "~2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/express/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, "node_modules/extendable-error": { "version": "0.1.7", "resolved": "https://registry.npmjs.org/extendable-error/-/extendable-error-0.1.7.tgz", @@ -1911,6 +2126,39 @@ "node": ">=8" } }, + "node_modules/finalhandler": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.2.tgz", + "integrity": "sha512-aA4RyPcd3badbdABGDuTXCMTtOneUCAYH/gxoYRTZlIJdF0YPWuGqiAsIrhNnnqdXGswYk6dGujem4w80UJFhg==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "on-finished": "~2.4.1", + "parseurl": "~1.3.3", + "statuses": "~2.0.2", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/finalhandler/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/finalhandler/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, "node_modules/find-up": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", @@ -1946,6 +2194,24 @@ "dev": true, "license": "ISC" }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/fs-extra": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-7.0.1.tgz", @@ -1965,7 +2231,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -1988,7 +2253,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -2013,7 +2277,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", @@ -2100,7 +2363,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -2130,7 +2392,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -2159,7 +2420,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, "license": "MIT", "dependencies": { "function-bind": "^1.1.2" @@ -2168,6 +2428,26 @@ "node": ">= 0.4" } }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/human-id": { "version": "4.1.3", "resolved": "https://registry.npmjs.org/human-id/-/human-id-4.1.3.tgz", @@ -2268,6 +2548,21 @@ "node": ">=0.8.19" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/is-core-module": { "version": "2.16.1", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.16.1.tgz", @@ -2768,12 +3063,29 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" } }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/merge-stream": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", @@ -2791,6 +3103,15 @@ "node": ">= 8" } }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/micromatch": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", @@ -2805,6 +3126,39 @@ "node": ">=8.6" } }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/mimic-fn": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", @@ -2855,7 +3209,6 @@ "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, "license": "MIT" }, "node_modules/nano-spawn": { @@ -2878,6 +3231,15 @@ "dev": true, "license": "MIT" }, + "node_modules/negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/node-sarif-builder": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/node-sarif-builder/-/node-sarif-builder-2.0.3.tgz", @@ -2953,6 +3315,30 @@ "node": ">=0.10.0" } }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -3089,6 +3475,15 @@ "node": ">=6" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -3116,6 +3511,12 @@ "dev": true, "license": "MIT" }, + "node_modules/path-to-regexp": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", + "license": "MIT" + }, "node_modules/path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", @@ -3218,6 +3619,19 @@ "asap": "~2.0.3" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/pug": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/pug/-/pug-3.0.3.tgz", @@ -3375,6 +3789,21 @@ "node": ">=6" } }, + "node_modules/qs": { + "version": "6.14.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", + "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/quansync": { "version": "0.2.11", "resolved": "https://registry.npmjs.org/quansync/-/quansync-0.2.11.tgz", @@ -3413,6 +3842,42 @@ ], "license": "MIT" }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.3.tgz", + "integrity": "sha512-s4VSOf6yN0rvbRZGxs8Om5CWj6seneMwK3oDb4lWDH0UPhWcxwOWw5+qk24bxq87szX1ydrwylIOp2uG1ojUpA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.4.24", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/raw-body/node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/read-yaml-file": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/read-yaml-file/-/read-yaml-file-1.1.0.tgz", @@ -3560,11 +4025,30 @@ "queue-microtask": "^1.2.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, "license": "MIT" }, "node_modules/semver": { @@ -3580,6 +4064,66 @@ "node": ">=10" } }, + "node_modules/send": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.2.tgz", + "integrity": "sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==", + "license": "MIT", + "dependencies": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "~0.5.2", + "http-errors": "~2.0.1", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "~2.4.1", + "range-parser": "~1.2.1", + "statuses": "~2.0.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "license": "MIT", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/send/node_modules/debug/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", + "license": "MIT" + }, + "node_modules/serve-static": { + "version": "1.16.3", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.3.tgz", + "integrity": "sha512-x0RTqQel6g5SY7Lg6ZreMmsOzncHFU7nhnRWkKgWuMTu5NN0DR5oruckMqRvacAN9d5w6ARnRBXl9xhDCgfMeA==", + "license": "MIT", + "dependencies": { + "encodeurl": "~2.0.0", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "~0.19.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -3603,6 +4147,78 @@ "node": ">=8" } }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -3681,6 +4297,15 @@ "dev": true, "license": "BSD-3-Clause" }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/string-argv": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.2.tgz", @@ -3861,6 +4486,15 @@ "node": ">=8.0" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/token-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/token-stream/-/token-stream-1.0.0.tgz", @@ -3881,6 +4515,19 @@ "node": ">= 0.8.0" } }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "license": "MIT", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", @@ -3891,6 +4538,15 @@ "node": ">= 4.0.0" } }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/uri-js": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", @@ -3901,6 +4557,24 @@ "punycode": "^2.1.0" } }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/void-elements": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/void-elements/-/void-elements-3.1.0.tgz", diff --git a/package.json b/package.json index dbc8f2d..ff6b11c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { - "name": "my-package", + "name": "@link-assistant/web-search", "version": "0.5.0", - "description": "A JavaScript/TypeScript package template for AI-driven development", + "description": "A web search microservice that aggregates results from multiple search engines (Google, DuckDuckGo, Bing) with reranking support", "type": "module", "main": "src/index.js", "types": "src/index.d.ts", @@ -11,8 +11,13 @@ "import": "./src/index.js" } }, + "bin": { + "web-search": "./bin/web-search.js" + }, "scripts": { "test": "node --test tests/", + "start": "node src/server.js", + "dev": "node --watch src/server.js", "lint": "eslint .", "lint:fix": "eslint . --fix", "format": "prettier --write .", @@ -26,20 +31,28 @@ "changeset:status": "changeset status --since=origin/main" }, "keywords": [ - "template", - "javascript", - "typescript", - "ai-driven" + "web-search", + "search-engine", + "google", + "duckduckgo", + "bing", + "search-api", + "microservice", + "reranking", + "meta-search" ], - "author": "", + "author": "Link Assistant", "license": "Unlicense", "repository": { "type": "git", - "url": "https://github.com/link-foundation/js-ai-driven-development-pipeline-template" + "url": "https://github.com/link-assistant/web-search" }, "engines": { "node": ">=20.0.0" }, + "dependencies": { + "express": "^4.21.2" + }, "devDependencies": { "@changesets/cli": "^2.29.7", "eslint": "^9.38.0", diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 0000000..ad0b627 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,4 @@ +/target +Cargo.lock +**/*.rs.bk +*.pdb diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..77501b9 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "web-search" +version = "0.1.0" +edition = "2021" +description = "A multi-provider web search aggregator with reranking support" +license = "Unlicense" +repository = "https://github.com/link-assistant/web-search" +keywords = ["web-search", "search-engine", "google", "duckduckgo", "bing"] +categories = ["web-programming", "api-bindings"] +readme = "README.md" + +[lib] +name = "web_search" +path = "src/lib.rs" + +[[bin]] +name = "web-search" +path = "src/main.rs" + +[dependencies] +tokio = { version = "1.43", features = ["full"] } +reqwest = { version = "0.12", features = ["json"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +scraper = "0.22" +thiserror = "2.0" +async-trait = "0.1" +clap = { version = "4.5", features = ["derive"] } +axum = "0.8" +tower-http = { version = "0.6", features = ["cors"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +url = "2.5" +urlencoding = "2.1" + +[dev-dependencies] +tokio-test = "0.4" +wiremock = "0.6" + +[features] +default = ["server"] +server = [] diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..08142fa --- /dev/null +++ b/rust/README.md @@ -0,0 +1,118 @@ +# Web Search (Rust) + +A multi-provider web search aggregator written in Rust with support for result merging and reranking. + +## Features + +- **Multiple Search Providers**: Google, DuckDuckGo, and Bing +- **API Support**: Uses official APIs when credentials are provided, falls back to scraping +- **Result Merging**: Combine results from multiple providers with deduplication +- **Reranking Strategies**: RRF (Reciprocal Rank Fusion), weighted scoring, or interleaving +- **REST API Server**: Built with Axum for high performance +- **CLI Tool**: Command-line interface for quick searches + +## Installation + +### From Source + +```bash +cd rust +cargo build --release +``` + +### As Library + +Add to your `Cargo.toml`: + +```toml +[dependencies] +web-search = { git = "https://github.com/link-assistant/web-search", path = "rust" } +``` + +## Usage + +### CLI + +```bash +# Basic search +web-search "rust programming" + +# Search with specific providers +web-search "rust async" --providers google,duckduckgo + +# Output as JSON +web-search "web scraping" --format json + +# Start API server +web-search serve --port 8080 +``` + +### Library + +```rust +use web_search::{WebSearchEngine, SearchOptions}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let engine = WebSearchEngine::new(); + + let results = engine + .search("rust programming", SearchOptions::default()) + .await?; + + for result in results { + println!("{}: {}", result.title, result.url); + } + + Ok(()) +} +``` + +### REST API + +```bash +# Start server +web-search serve --port 3000 + +# Search all providers +curl "http://localhost:3000/search?q=rust+programming" + +# Search single provider +curl "http://localhost:3000/search/duckduckgo?q=rust+programming" + +# Get provider status +curl "http://localhost:3000/providers" +``` + +## API Endpoints + +| Endpoint | Method | Description | +| ----------------------------- | ------ | ------------------------ | +| `/search?q=` | GET | Search all providers | +| `/search/:provider?q=` | GET | Search single provider | +| `/providers` | GET | List available providers | +| `/health` | GET | Health check | + +### Query Parameters + +| Parameter | Description | Default | +| ------------ | ------------------------------------------ | ------- | +| `q` | Search query (required) | - | +| `providers` | Comma-separated provider list | all | +| `limit` | Max results per provider | 10 | +| `strategy` | Merge strategy (rrf, weighted, interleave) | rrf | +| `language` | Language code (e.g., en) | - | +| `region` | Region code (e.g., us) | - | +| `safeSearch` | Enable safe search | false | + +## Environment Variables + +| Variable | Description | +| ---------------- | ------------------------------ | +| `GOOGLE_API_KEY` | Google Custom Search API key | +| `GOOGLE_CX` | Google Custom Search Engine ID | +| `BING_API_KEY` | Bing Search API key | + +## License + +[Unlicense](../LICENSE) - Public Domain diff --git a/rust/src/error.rs b/rust/src/error.rs new file mode 100644 index 0000000..5e5737a --- /dev/null +++ b/rust/src/error.rs @@ -0,0 +1,42 @@ +//! Error types for web search operations + +use thiserror::Error; + +/// Errors that can occur during web search operations +#[derive(Error, Debug)] +pub enum SearchError { + /// HTTP request failed + #[error("HTTP request failed: {0}")] + RequestError(#[from] reqwest::Error), + + /// Failed to parse HTML response + #[error("Failed to parse HTML: {0}")] + ParseError(String), + + /// Invalid URL + #[error("Invalid URL: {0}")] + UrlError(#[from] url::ParseError), + + /// Provider not found + #[error("Unknown provider: {0}")] + UnknownProvider(String), + + /// Provider is disabled + #[error("Provider {0} is disabled")] + ProviderDisabled(String), + + /// API error from search provider + #[error("API error from {provider}: {message}")] + ApiError { provider: String, message: String }, + + /// Invalid configuration + #[error("Invalid configuration: {0}")] + ConfigError(String), + + /// JSON serialization/deserialization error + #[error("JSON error: {0}")] + JsonError(#[from] serde_json::Error), +} + +/// Result type alias for search operations +pub type SearchResult = std::result::Result; diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..3be9906 --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,31 @@ +//! Web Search - Multi-provider web search aggregator +//! +//! A library for aggregating search results from multiple search engines +//! (Google, DuckDuckGo, Bing) with support for result merging and reranking. +//! +//! # Example +//! +//! ```no_run +//! use web_search::{WebSearchEngine, SearchOptions}; +//! +//! #[tokio::main] +//! async fn main() -> Result<(), Box> { +//! let engine = WebSearchEngine::new(); +//! let results = engine.search("rust programming", SearchOptions::default()).await?; +//! +//! for result in results { +//! println!("{}: {}", result.title, result.url); +//! } +//! Ok(()) +//! } +//! ``` + +pub mod error; +pub mod merger; +pub mod providers; +pub mod search; + +pub use error::SearchError; +pub use merger::{MergeOptions, MergeStrategy}; +pub use providers::{SearchOptions, SearchResult}; +pub use search::WebSearchEngine; diff --git a/rust/src/main.rs b/rust/src/main.rs new file mode 100644 index 0000000..c86f6a9 --- /dev/null +++ b/rust/src/main.rs @@ -0,0 +1,411 @@ +//! Web Search CLI and Server +//! +//! Usage: +//! web-search [options] Search the web +//! web-search --serve [--port ] Start as API server + +use std::collections::HashMap; +use std::net::SocketAddr; + +use axum::{ + extract::{Path, Query, State}, + http::StatusCode, + response::Json, + routing::get, + Router, +}; +use clap::{Parser, Subcommand}; +use serde::{Deserialize, Serialize}; +use tower_http::cors::CorsLayer; +use tracing_subscriber::EnvFilter; + +use web_search::{MergeOptions, MergeStrategy, SearchOptions, WebSearchEngine}; + +#[derive(Parser)] +#[command(name = "web-search")] +#[command(about = "Multi-provider web search aggregator")] +#[command(version)] +struct Cli { + #[command(subcommand)] + command: Option, + + /// Search query (when not using subcommands) + #[arg(trailing_var_arg = true)] + query: Vec, + + /// Providers to use (comma-separated) + #[arg(long, value_delimiter = ',')] + providers: Option>, + + /// Maximum results per provider + #[arg(short, long, default_value = "10")] + limit: usize, + + /// Merge strategy (rrf, weighted, interleave) + #[arg(long, default_value = "rrf")] + strategy: String, + + /// Output format (text, json, urls) + #[arg(short, long, default_value = "text")] + format: String, + + /// Language code + #[arg(long)] + language: Option, + + /// Region code + #[arg(long)] + region: Option, + + /// Enable safe search + #[arg(long)] + safe: bool, + + /// Verbose output + #[arg(short = 'V', long)] + verbose: bool, +} + +#[derive(Subcommand)] +enum Commands { + /// Start as HTTP API server + Serve { + /// Port to listen on + #[arg(short, long, default_value = "3000")] + port: u16, + }, +} + +#[derive(Clone)] +struct AppState { + engine: std::sync::Arc, +} + +#[derive(Debug, Deserialize)] +struct SearchQuery { + q: Option, + query: Option, + providers: Option, + limit: Option, + strategy: Option, + language: Option, + region: Option, + #[serde(rename = "safeSearch")] + safe_search: Option, + safe: Option, +} + +#[derive(Debug, Serialize)] +struct SearchResponse { + query: String, + count: usize, + options: SearchResponseOptions, + results: Vec, +} + +#[derive(Debug, Serialize)] +struct SearchResponseOptions { + providers: Vec, + strategy: String, + limit: usize, +} + +#[derive(Debug, Serialize)] +struct ErrorResponse { + error: String, + #[serde(skip_serializing_if = "Option::is_none")] + message: Option, +} + +#[derive(Debug, Serialize)] +struct HealthResponse { + status: String, + providers: HashMap, +} + +async fn health_handler(State(state): State) -> Json { + Json(HealthResponse { + status: "healthy".to_string(), + providers: state.engine.get_provider_status().await, + }) +} + +async fn providers_handler( + State(state): State, +) -> Json> { + Json(state.engine.get_provider_status().await) +} + +async fn search_handler( + State(state): State, + Query(params): Query, +) -> Result, (StatusCode, Json)> { + let query = params.q.or(params.query).ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: "Missing required parameter: q or query".to_string(), + message: None, + }), + ) + })?; + + let providers = params + .providers + .map(|p| p.split(',').map(|s| s.trim().to_string()).collect()); + let limit = params.limit.unwrap_or(10); + let strategy = match params.strategy.as_deref() { + Some("weighted") => MergeStrategy::Weighted, + Some("interleave") => MergeStrategy::Interleave, + _ => MergeStrategy::Rrf, + }; + let safe_search = params.safe_search.or(params.safe); + + let search_options = SearchOptions { + limit: Some(limit), + language: params.language, + region: params.region, + safe_search, + }; + + let merge_options = MergeOptions { + strategy, + weights: HashMap::new(), + rrf_k: None, + remove_duplicates: true, + }; + + let results = state + .engine + .search_with_options( + &query, + search_options, + providers.clone(), + Some(merge_options), + ) + .await + .map_err(|e| { + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(ErrorResponse { + error: "Search failed".to_string(), + message: Some(e.to_string()), + }), + ) + })?; + + let strategy_str = match strategy { + MergeStrategy::Rrf => "rrf", + MergeStrategy::Weighted => "weighted", + MergeStrategy::Interleave => "interleave", + }; + + Ok(Json(SearchResponse { + query, + count: results.len(), + options: SearchResponseOptions { + providers: providers.unwrap_or_else(|| state.engine.get_available_providers()), + strategy: strategy_str.to_string(), + limit, + }, + results, + })) +} + +async fn search_provider_handler( + State(state): State, + Path(provider): Path, + Query(params): Query, +) -> Result, (StatusCode, Json)> { + let query = params.q.or(params.query).ok_or_else(|| { + ( + StatusCode::BAD_REQUEST, + Json(ErrorResponse { + error: "Missing required parameter: q or query".to_string(), + message: None, + }), + ) + })?; + + let search_options = SearchOptions { + limit: params.limit, + language: params.language, + region: params.region, + safe_search: params.safe_search.or(params.safe), + }; + + let results = state + .engine + .search_single(&query, &provider, search_options) + .await + .map_err(|e| { + let status = if e.to_string().contains("Unknown provider") { + StatusCode::BAD_REQUEST + } else { + StatusCode::INTERNAL_SERVER_ERROR + }; + ( + status, + Json(ErrorResponse { + error: e.to_string(), + message: None, + }), + ) + })?; + + Ok(Json(serde_json::json!({ + "query": query, + "provider": provider, + "count": results.len(), + "results": results, + }))) +} + +async fn start_server(port: u16) -> Result<(), Box> { + let state = AppState { + engine: std::sync::Arc::new(WebSearchEngine::new()), + }; + + let app = Router::new() + .route("/health", get(health_handler)) + .route("/providers", get(providers_handler)) + .route("/search", get(search_handler)) + .route("/search/{provider}", get(search_provider_handler)) + .layer(CorsLayer::permissive()) + .with_state(state); + + let addr = SocketAddr::from(([0, 0, 0, 0], port)); + println!("Web Search API listening on http://localhost:{}", port); + println!(); + println!("Available endpoints:"); + println!(" GET /search?q= - Search all providers"); + println!(" GET /search/:provider?q= - Search single provider"); + println!(" GET /providers - List available providers"); + println!(" GET /health - Health check"); + println!(); + println!("Press Ctrl+C to stop the server"); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} + +async fn perform_search(cli: &Cli) -> Result<(), Box> { + let query = cli.query.join(" "); + if query.is_empty() { + eprintln!("Error: Missing search query"); + eprintln!("Run with --help for usage information"); + std::process::exit(1); + } + + let engine = WebSearchEngine::new(); + + let strategy = match cli.strategy.as_str() { + "weighted" => MergeStrategy::Weighted, + "interleave" => MergeStrategy::Interleave, + _ => MergeStrategy::Rrf, + }; + + let search_options = SearchOptions { + limit: Some(cli.limit), + language: cli.language.clone(), + region: cli.region.clone(), + safe_search: if cli.safe { Some(true) } else { None }, + }; + + let merge_options = MergeOptions { + strategy, + weights: HashMap::new(), + rrf_k: None, + remove_duplicates: true, + }; + + if cli.verbose { + eprintln!("Searching for: \"{}\"", query); + eprintln!( + "Providers: {}", + cli.providers + .as_ref() + .map(|p| p.join(", ")) + .unwrap_or_else(|| "all".to_string()) + ); + eprintln!("Strategy: {}", cli.strategy); + eprintln!(); + } + + let results = engine + .search_with_options( + &query, + search_options, + cli.providers.clone(), + Some(merge_options), + ) + .await?; + + match cli.format.as_str() { + "json" => { + let response = serde_json::json!({ + "query": query, + "count": results.len(), + "results": results, + }); + println!("{}", serde_json::to_string_pretty(&response)?); + } + "urls" => { + for result in &results { + println!("{}", result.url); + } + } + _ => { + if results.is_empty() { + println!("No results found."); + return Ok(()); + } + + println!("Found {} results for \"{}\":\n", results.len(), query); + + for result in &results { + println!("{}. {}", result.rank, result.title); + println!(" {}", result.url); + if !result.snippet.is_empty() { + let snippet = if result.snippet.len() > 150 { + format!("{}...", &result.snippet[..150]) + } else { + result.snippet.clone() + }; + println!(" {}", snippet); + } + let sources = result + .sources + .as_ref() + .map(|s| s.join(", ")) + .unwrap_or_else(|| result.source.clone()); + println!(" [{}]", sources); + println!(); + } + } + } + + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env()) + .init(); + + let cli = Cli::parse(); + + match &cli.command { + Some(Commands::Serve { port }) => { + start_server(*port).await?; + } + None => { + perform_search(&cli).await?; + } + } + + Ok(()) +} diff --git a/rust/src/merger.rs b/rust/src/merger.rs new file mode 100644 index 0000000..e84281b --- /dev/null +++ b/rust/src/merger.rs @@ -0,0 +1,301 @@ +//! Search result merger with reranking support + +use std::collections::{HashMap, HashSet}; + +use crate::providers::SearchResult; + +/// Merge strategy for combining results +#[derive(Debug, Clone, Copy, Default)] +pub enum MergeStrategy { + /// Reciprocal Rank Fusion (default) + #[default] + Rrf, + /// Weighted scoring + Weighted, + /// Interleaved (round-robin) + Interleave, +} + +/// Options for merging search results +#[derive(Debug, Clone, Default)] +pub struct MergeOptions { + /// Merge strategy to use + pub strategy: MergeStrategy, + /// Weights for each provider (provider name -> weight) + pub weights: HashMap, + /// RRF k parameter (default: 60) + pub rrf_k: Option, + /// Whether to remove duplicate URLs (default: true) + pub remove_duplicates: bool, +} + +impl MergeOptions { + /// Create new merge options with default values + pub fn new() -> Self { + Self { + strategy: MergeStrategy::Rrf, + weights: HashMap::new(), + rrf_k: None, + remove_duplicates: true, + } + } + + /// Set the merge strategy + pub fn with_strategy(mut self, strategy: MergeStrategy) -> Self { + self.strategy = strategy; + self + } + + /// Set provider weights + pub fn with_weights(mut self, weights: HashMap) -> Self { + self.weights = weights; + self + } + + /// Set the RRF k parameter + pub fn with_rrf_k(mut self, k: f64) -> Self { + self.rrf_k = Some(k); + self + } +} + +/// Normalize URL for deduplication +fn normalize_url(url: &str) -> String { + match url::Url::parse(url) { + Ok(parsed) => { + let mut normalized = format!("{}{}", parsed.host_str().unwrap_or(""), parsed.path()); + normalized = normalized.trim_end_matches('/').to_lowercase(); + normalized + } + Err(_) => url.to_lowercase(), + } +} + +/// Calculate RRF score +fn rrf_score(rank: usize, k: f64) -> f64 { + 1.0 / (k + rank as f64) +} + +/// Merge results using Reciprocal Rank Fusion +pub fn merge_with_rrf( + results_by_provider: &HashMap>, + options: &MergeOptions, +) -> Vec { + let k = options.rrf_k.unwrap_or(60.0); + let mut scores_by_url: HashMap = HashMap::new(); + let mut results_by_url: HashMap = HashMap::new(); + let mut sources_by_url: HashMap> = HashMap::new(); + + for (provider, results) in results_by_provider { + let weight = options.weights.get(provider).copied().unwrap_or(1.0); + + for result in results { + let normalized_url = normalize_url(&result.url); + let score = rrf_score(result.rank, k) * weight; + + *scores_by_url.entry(normalized_url.clone()).or_insert(0.0) += score; + + sources_by_url + .entry(normalized_url.clone()) + .or_default() + .insert(result.source.clone()); + + results_by_url + .entry(normalized_url) + .or_insert_with(|| result.clone()); + } + } + + let mut merged: Vec<_> = scores_by_url + .into_iter() + .map(|(url, score)| { + let mut result = results_by_url.remove(&url).unwrap(); + result.score = Some(score); + let sources: Vec<_> = sources_by_url + .get(&url) + .map(|s| s.iter().cloned().collect()) + .unwrap_or_default(); + if sources.len() > 1 { + result.sources = Some(sources); + } + (score, result) + }) + .collect(); + + merged.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + merged + .into_iter() + .enumerate() + .map(|(i, (_, mut result))| { + result.rank = i + 1; + result + }) + .collect() +} + +/// Merge results using weighted scoring +pub fn merge_with_weights( + results_by_provider: &HashMap>, + options: &MergeOptions, +) -> Vec { + let max_rank = 100.0; + let mut scores_by_url: HashMap = HashMap::new(); + let mut results_by_url: HashMap = HashMap::new(); + let mut sources_by_url: HashMap> = HashMap::new(); + + for (provider, results) in results_by_provider { + let weight = options.weights.get(provider).copied().unwrap_or(1.0); + + for result in results { + let normalized_url = normalize_url(&result.url); + let score = ((max_rank - result.rank as f64 + 1.0) / max_rank) * weight; + + *scores_by_url.entry(normalized_url.clone()).or_insert(0.0) += score; + + sources_by_url + .entry(normalized_url.clone()) + .or_default() + .insert(result.source.clone()); + + results_by_url + .entry(normalized_url) + .or_insert_with(|| result.clone()); + } + } + + let mut merged: Vec<_> = scores_by_url + .into_iter() + .map(|(url, score)| { + let mut result = results_by_url.remove(&url).unwrap(); + result.score = Some(score); + let sources: Vec<_> = sources_by_url + .get(&url) + .map(|s| s.iter().cloned().collect()) + .unwrap_or_default(); + if sources.len() > 1 { + result.sources = Some(sources); + } + (score, result) + }) + .collect(); + + merged.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + + merged + .into_iter() + .enumerate() + .map(|(i, (_, mut result))| { + result.rank = i + 1; + result + }) + .collect() +} + +/// Merge results using interleaving (round-robin) +pub fn merge_with_interleave( + results_by_provider: &HashMap>, + options: &MergeOptions, +) -> Vec { + let mut results = Vec::new(); + let mut seen_urls: HashSet = HashSet::new(); + + let providers: Vec<_> = results_by_provider.keys().collect(); + let max_len = results_by_provider + .values() + .map(|v| v.len()) + .max() + .unwrap_or(0); + + for i in 0..max_len { + for provider in &providers { + if let Some(provider_results) = results_by_provider.get(*provider) { + if i < provider_results.len() { + let result = &provider_results[i]; + + if options.remove_duplicates { + let normalized = normalize_url(&result.url); + if seen_urls.contains(&normalized) { + continue; + } + seen_urls.insert(normalized); + } + + let mut new_result = result.clone(); + new_result.rank = results.len() + 1; + results.push(new_result); + } + } + } + } + + results +} + +/// Merge search results using the specified strategy +pub fn merge_results( + results_by_provider: &HashMap>, + options: &MergeOptions, +) -> Vec { + match options.strategy { + MergeStrategy::Rrf => merge_with_rrf(results_by_provider, options), + MergeStrategy::Weighted => merge_with_weights(results_by_provider, options), + MergeStrategy::Interleave => merge_with_interleave(results_by_provider, options), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_result(url: &str, title: &str, source: &str, rank: usize) -> SearchResult { + SearchResult { + title: title.to_string(), + url: url.to_string(), + snippet: String::new(), + source: source.to_string(), + rank, + score: None, + sources: None, + } + } + + #[test] + fn test_rrf_merge() { + let mut results_by_provider = HashMap::new(); + + results_by_provider.insert( + "google".to_string(), + vec![ + create_test_result("https://example.com/1", "Result 1", "google", 1), + create_test_result("https://example.com/2", "Result 2", "google", 2), + ], + ); + + results_by_provider.insert( + "bing".to_string(), + vec![ + create_test_result("https://example.com/2", "Result 2", "bing", 1), + create_test_result("https://example.com/3", "Result 3", "bing", 2), + ], + ); + + let options = MergeOptions::new(); + let merged = merge_with_rrf(&results_by_provider, &options); + + assert_eq!(merged.len(), 3); + assert!(merged[0].url.contains("example.com/2")); + } + + #[test] + fn test_url_normalization() { + assert_eq!( + normalize_url("https://example.com/path/"), + normalize_url("https://example.com/path") + ); + assert_eq!( + normalize_url("https://Example.COM/Path"), + normalize_url("https://example.com/path") + ); + } +} diff --git a/rust/src/providers/base.rs b/rust/src/providers/base.rs new file mode 100644 index 0000000..eb3b020 --- /dev/null +++ b/rust/src/providers/base.rs @@ -0,0 +1,66 @@ +//! Base provider trait and common types + +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use crate::error::SearchError; + +/// A single search result +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + /// The title of the search result + pub title: String, + /// The URL of the search result + pub url: String, + /// The description/snippet of the search result + pub snippet: String, + /// The search provider that returned this result + pub source: String, + /// The rank position in the original results (1-based) + pub rank: usize, + /// Computed score after merging (optional) + #[serde(skip_serializing_if = "Option::is_none")] + pub score: Option, + /// Sources that returned this result (after deduplication) + #[serde(skip_serializing_if = "Option::is_none")] + pub sources: Option>, +} + +/// Options for search queries +#[derive(Debug, Clone, Default)] +pub struct SearchOptions { + /// Maximum number of results to return + pub limit: Option, + /// Language code (e.g., "en", "de") + pub language: Option, + /// Region code (e.g., "us", "de") + pub region: Option, + /// Enable safe search filtering + pub safe_search: Option, +} + +/// Trait that all search providers must implement +#[async_trait] +pub trait SearchProvider: Send + Sync { + /// Get the provider name + fn name(&self) -> &str; + + /// Check if the provider is available/enabled + fn is_available(&self) -> bool; + + /// Get the provider weight for reranking + fn weight(&self) -> f64; + + /// Set the provider weight for reranking + fn set_weight(&mut self, weight: f64); + + /// Enable or disable the provider + fn set_enabled(&mut self, enabled: bool); + + /// Perform a search + async fn search( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError>; +} diff --git a/rust/src/providers/bing.rs b/rust/src/providers/bing.rs new file mode 100644 index 0000000..26b6836 --- /dev/null +++ b/rust/src/providers/bing.rs @@ -0,0 +1,281 @@ +//! Bing search provider + +use async_trait::async_trait; +use serde::Deserialize; + +use super::base::{SearchOptions, SearchProvider, SearchResult}; +use crate::error::SearchError; + +/// Bing Web Search API response +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct BingApiResponse { + web_pages: Option, +} + +#[derive(Debug, Deserialize)] +struct BingWebPages { + value: Vec, +} + +#[derive(Debug, Deserialize)] +struct BingWebPage { + name: String, + url: String, + snippet: Option, +} + +/// Configuration for Bing provider +#[derive(Debug, Clone, Default)] +pub struct BingConfig { + /// Bing Search API key + pub api_key: Option, +} + +/// Bing search provider +pub struct BingProvider { + name: String, + enabled: bool, + weight: f64, + client: reqwest::Client, + config: BingConfig, + api_url: String, +} + +impl BingProvider { + /// Create a new Bing provider with optional API credentials + pub fn new(config: BingConfig) -> Self { + Self { + name: "bing".to_string(), + enabled: true, + weight: 1.0, + client: reqwest::Client::builder() + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .build() + .expect("Failed to create HTTP client"), + config, + api_url: "https://api.bing.microsoft.com/v7.0/search".to_string(), + } + } + + /// Create a new Bing provider from environment variables + pub fn from_env() -> Self { + Self::new(BingConfig { + api_key: std::env::var("BING_API_KEY").ok(), + }) + } + + /// Check if API credentials are configured + pub fn has_api_credentials(&self) -> bool { + self.config.api_key.is_some() + } + + async fn search_with_api( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + let api_key = self.config.api_key.as_ref().unwrap(); + let limit = options.limit.unwrap_or(10).min(50); + + let mut url = format!( + "{}?q={}&count={}&responseFilter=Webpages", + self.api_url, + urlencoding::encode(query), + limit + ); + + if let Some(ref region) = options.region { + let lang = options.language.as_deref().unwrap_or("en"); + url.push_str(&format!("&mkt={}-{}", lang, region.to_uppercase())); + } + + let safe_search = match options.safe_search { + Some(true) => "Strict", + Some(false) => "Off", + None => "Moderate", + }; + url.push_str(&format!("&safeSearch={}", safe_search)); + + let response = self + .client + .get(&url) + .header("Ocp-Apim-Subscription-Key", api_key) + .send() + .await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + return Err(SearchError::ApiError { + provider: self.name.clone(), + message: error_text, + }); + } + + let api_response: BingApiResponse = response.json().await?; + + let results = api_response + .web_pages + .map(|wp| wp.value) + .unwrap_or_default() + .into_iter() + .enumerate() + .map(|(i, page)| SearchResult { + title: page.name, + url: page.url, + snippet: page.snippet.unwrap_or_default(), + source: self.name.clone(), + rank: i + 1, + score: None, + sources: None, + }) + .collect(); + + Ok(results) + } + + async fn search_with_scraping( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + let limit = options.limit.unwrap_or(10); + let mut url = format!( + "https://www.bing.com/search?q={}&count={}", + urlencoding::encode(query), + limit.min(30) + ); + + if let Some(ref region) = options.region { + url.push_str(&format!("&cc={}", region.to_uppercase())); + } + + let response = self + .client + .get(&url) + .header( + "Accept", + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + ) + .header("Accept-Language", "en-US,en;q=0.5") + .send() + .await?; + + if !response.status().is_success() { + return Err(SearchError::ApiError { + provider: self.name.clone(), + message: format!("HTTP {}", response.status()), + }); + } + + let html = response.text().await?; + Ok(self.parse_scraped_results(&html, limit)) + } + + fn parse_scraped_results(&self, html: &str, limit: usize) -> Vec { + use scraper::{Html, Selector}; + + let document = Html::parse_document(html); + let mut results = Vec::new(); + + let algo_selector = Selector::parse(".b_algo").unwrap(); + let link_selector = Selector::parse("a").unwrap(); + let h2_selector = Selector::parse("h2").unwrap(); + let snippet_selector = Selector::parse("p").unwrap(); + + for element in document.select(&algo_selector) { + if results.len() >= limit { + break; + } + + let link = element.select(&link_selector).next(); + let h2 = element.select(&h2_selector).next(); + let snippet = element.select(&snippet_selector).next(); + + if let Some(link_elem) = link { + let url = link_elem.value().attr("href").unwrap_or_default(); + if url.is_empty() || url.contains("bing.com") || url.starts_with('/') { + continue; + } + + let title = h2 + .map(|h| h.text().collect::()) + .unwrap_or_else(|| link_elem.text().collect::()) + .trim() + .to_string(); + + let snippet_text = snippet + .map(|s| s.text().collect::()) + .unwrap_or_default() + .trim() + .to_string(); + + if title.is_empty() { + continue; + } + + results.push(SearchResult { + title, + url: url.to_string(), + snippet: snippet_text, + source: self.name.clone(), + rank: results.len() + 1, + score: None, + sources: None, + }); + } + } + + results + } +} + +impl Default for BingProvider { + fn default() -> Self { + Self::from_env() + } +} + +#[async_trait] +impl SearchProvider for BingProvider { + fn name(&self) -> &str { + &self.name + } + + fn is_available(&self) -> bool { + self.enabled + } + + fn weight(&self) -> f64 { + self.weight + } + + fn set_weight(&mut self, weight: f64) { + self.weight = weight.clamp(0.0, 1.0); + } + + fn set_enabled(&mut self, enabled: bool) { + self.enabled = enabled; + } + + async fn search( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + if query.is_empty() { + return Ok(Vec::new()); + } + + if self.has_api_credentials() { + match self.search_with_api(query, options).await { + Ok(results) => return Ok(results), + Err(e) => { + tracing::warn!("Bing API search failed, falling back to scraping: {}", e); + } + } + } + + self.search_with_scraping(query, options).await + } +} diff --git a/rust/src/providers/duckduckgo.rs b/rust/src/providers/duckduckgo.rs new file mode 100644 index 0000000..5a0c196 --- /dev/null +++ b/rust/src/providers/duckduckgo.rs @@ -0,0 +1,149 @@ +//! DuckDuckGo search provider + +use async_trait::async_trait; +use scraper::{Html, Selector}; + +use super::base::{SearchOptions, SearchProvider, SearchResult}; +use crate::error::SearchError; + +/// DuckDuckGo search provider +pub struct DuckDuckGoProvider { + name: String, + enabled: bool, + weight: f64, + client: reqwest::Client, + base_url: String, +} + +impl DuckDuckGoProvider { + /// Create a new DuckDuckGo provider + pub fn new() -> Self { + Self { + name: "duckduckgo".to_string(), + enabled: true, + weight: 1.0, + client: reqwest::Client::builder() + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .build() + .expect("Failed to create HTTP client"), + base_url: "https://html.duckduckgo.com/html/".to_string(), + } + } + + fn parse_results(&self, html: &str, limit: usize) -> Vec { + let document = Html::parse_document(html); + let mut results = Vec::new(); + + let result_selector = + Selector::parse(".result__a").unwrap_or_else(|_| Selector::parse("a").unwrap()); + let snippet_selector = Selector::parse(".result__snippet") + .unwrap_or_else(|_| Selector::parse(".result__body").unwrap()); + + let links: Vec<_> = document.select(&result_selector).collect(); + let snippets: Vec<_> = document.select(&snippet_selector).collect(); + + for (i, link) in links.iter().enumerate() { + if results.len() >= limit { + break; + } + + let url = link.value().attr("href").unwrap_or_default(); + if url.is_empty() || url.starts_with("//duckduckgo.com") || url.contains("ad_provider") + { + continue; + } + + let decoded_url = urlencoding::decode(url).unwrap_or_else(|_| url.into()); + let title = link.text().collect::().trim().to_string(); + let snippet = snippets + .get(i) + .map(|s| s.text().collect::().trim().to_string()) + .unwrap_or_default(); + + results.push(SearchResult { + title: if title.is_empty() { + "Untitled".to_string() + } else { + title + }, + url: decoded_url.to_string(), + snippet, + source: self.name.clone(), + rank: results.len() + 1, + score: None, + sources: None, + }); + } + + results + } +} + +impl Default for DuckDuckGoProvider { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl SearchProvider for DuckDuckGoProvider { + fn name(&self) -> &str { + &self.name + } + + fn is_available(&self) -> bool { + self.enabled + } + + fn weight(&self) -> f64 { + self.weight + } + + fn set_weight(&mut self, weight: f64) { + self.weight = weight.clamp(0.0, 1.0); + } + + fn set_enabled(&mut self, enabled: bool) { + self.enabled = enabled; + } + + async fn search( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + if query.is_empty() { + return Ok(Vec::new()); + } + + let limit = options.limit.unwrap_or(10); + let mut params = vec![("q", query.to_string())]; + + if let Some(ref region) = options.region { + params.push(("kl", region.clone())); + } else { + params.push(("kl", "wt-wt".to_string())); + } + + if let Some(safe) = options.safe_search { + params.push(("kp", if safe { "1" } else { "-2" }.to_string())); + } + + let response = self + .client + .post(&self.base_url) + .form(¶ms) + .send() + .await?; + + if !response.status().is_success() { + return Err(SearchError::ApiError { + provider: self.name.clone(), + message: format!("HTTP {}", response.status()), + }); + } + + let html = response.text().await?; + Ok(self.parse_results(&html, limit)) + } +} diff --git a/rust/src/providers/google.rs b/rust/src/providers/google.rs new file mode 100644 index 0000000..2fa0912 --- /dev/null +++ b/rust/src/providers/google.rs @@ -0,0 +1,285 @@ +//! Google search provider + +use async_trait::async_trait; +use serde::Deserialize; + +use super::base::{SearchOptions, SearchProvider, SearchResult}; +use crate::error::SearchError; + +/// Google Custom Search API response +#[derive(Debug, Deserialize)] +struct GoogleApiResponse { + items: Option>, +} + +#[derive(Debug, Deserialize)] +struct GoogleApiItem { + title: String, + link: String, + snippet: Option, +} + +/// Configuration for Google provider +#[derive(Debug, Clone, Default)] +pub struct GoogleConfig { + /// Google Custom Search API key + pub api_key: Option, + /// Google Custom Search Engine ID + pub search_engine_id: Option, +} + +/// Google search provider +pub struct GoogleProvider { + name: String, + enabled: bool, + weight: f64, + client: reqwest::Client, + config: GoogleConfig, + api_url: String, +} + +impl GoogleProvider { + /// Create a new Google provider with optional API credentials + pub fn new(config: GoogleConfig) -> Self { + Self { + name: "google".to_string(), + enabled: true, + weight: 1.0, + client: reqwest::Client::builder() + .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .build() + .expect("Failed to create HTTP client"), + config, + api_url: "https://www.googleapis.com/customsearch/v1".to_string(), + } + } + + /// Create a new Google provider from environment variables + pub fn from_env() -> Self { + Self::new(GoogleConfig { + api_key: std::env::var("GOOGLE_API_KEY").ok(), + search_engine_id: std::env::var("GOOGLE_CX").ok(), + }) + } + + /// Check if API credentials are configured + pub fn has_api_credentials(&self) -> bool { + self.config.api_key.is_some() && self.config.search_engine_id.is_some() + } + + async fn search_with_api( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + let api_key = self.config.api_key.as_ref().unwrap(); + let cx = self.config.search_engine_id.as_ref().unwrap(); + let limit = options.limit.unwrap_or(10).min(10); + + let mut url = format!( + "{}?key={}&cx={}&q={}&num={}", + self.api_url, + api_key, + cx, + urlencoding::encode(query), + limit + ); + + if let Some(ref lang) = options.language { + url.push_str(&format!("&lr=lang_{}", lang)); + } + + if let Some(ref region) = options.region { + url.push_str(&format!("&gl={}", region)); + } + + if let Some(safe) = options.safe_search { + url.push_str(&format!("&safe={}", if safe { "active" } else { "off" })); + } + + let response = self.client.get(&url).send().await?; + + if !response.status().is_success() { + let error_text = response.text().await.unwrap_or_default(); + return Err(SearchError::ApiError { + provider: self.name.clone(), + message: error_text, + }); + } + + let api_response: GoogleApiResponse = response.json().await?; + + let results = api_response + .items + .unwrap_or_default() + .into_iter() + .enumerate() + .map(|(i, item)| SearchResult { + title: item.title, + url: item.link, + snippet: item.snippet.unwrap_or_default(), + source: self.name.clone(), + rank: i + 1, + score: None, + sources: None, + }) + .collect(); + + Ok(results) + } + + async fn search_with_scraping( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + let limit = options.limit.unwrap_or(10); + let mut url = format!( + "https://www.google.com/search?q={}&num={}", + urlencoding::encode(query), + limit.min(20) + ); + + if let Some(ref lang) = options.language { + url.push_str(&format!("&hl={}", lang)); + } + + if let Some(ref region) = options.region { + url.push_str(&format!("&gl={}", region)); + } + + let response = self + .client + .get(&url) + .header( + "Accept", + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + ) + .header("Accept-Language", "en-US,en;q=0.5") + .send() + .await?; + + if !response.status().is_success() { + return Err(SearchError::ApiError { + provider: self.name.clone(), + message: format!("HTTP {}", response.status()), + }); + } + + let html = response.text().await?; + Ok(self.parse_scraped_results(&html, limit)) + } + + fn parse_scraped_results(&self, html: &str, limit: usize) -> Vec { + use scraper::{Html, Selector}; + + let document = Html::parse_document(html); + let mut results = Vec::new(); + let mut seen_urls = std::collections::HashSet::new(); + + let link_selector = Selector::parse("a").unwrap(); + let h3_selector = Selector::parse("h3").unwrap(); + + for element in document.select(&link_selector) { + if results.len() >= limit { + break; + } + + let href = element.value().attr("href").unwrap_or_default(); + let url = if href.starts_with("/url?q=") { + href.strip_prefix("/url?q=") + .and_then(|u| u.split('&').next()) + .map(|u| { + urlencoding::decode(u) + .unwrap_or_else(|_| u.into()) + .to_string() + }) + } else if href.starts_with("http") && !href.contains("google.com") { + Some(href.to_string()) + } else { + None + }; + + if let Some(url) = url { + if seen_urls.contains(&url) || url.contains("google.com") { + continue; + } + + let title = element + .select(&h3_selector) + .next() + .map(|h3| h3.text().collect::()) + .unwrap_or_default() + .trim() + .to_string(); + + if title.is_empty() { + continue; + } + + seen_urls.insert(url.clone()); + results.push(SearchResult { + title, + url, + snippet: String::new(), + source: self.name.clone(), + rank: results.len() + 1, + score: None, + sources: None, + }); + } + } + + results + } +} + +impl Default for GoogleProvider { + fn default() -> Self { + Self::from_env() + } +} + +#[async_trait] +impl SearchProvider for GoogleProvider { + fn name(&self) -> &str { + &self.name + } + + fn is_available(&self) -> bool { + self.enabled + } + + fn weight(&self) -> f64 { + self.weight + } + + fn set_weight(&mut self, weight: f64) { + self.weight = weight.clamp(0.0, 1.0); + } + + fn set_enabled(&mut self, enabled: bool) { + self.enabled = enabled; + } + + async fn search( + &self, + query: &str, + options: &SearchOptions, + ) -> Result, SearchError> { + if query.is_empty() { + return Ok(Vec::new()); + } + + if self.has_api_credentials() { + match self.search_with_api(query, options).await { + Ok(results) => return Ok(results), + Err(e) => { + tracing::warn!("Google API search failed, falling back to scraping: {}", e); + } + } + } + + self.search_with_scraping(query, options).await + } +} diff --git a/rust/src/providers/mod.rs b/rust/src/providers/mod.rs new file mode 100644 index 0000000..29c6967 --- /dev/null +++ b/rust/src/providers/mod.rs @@ -0,0 +1,11 @@ +//! Search provider implementations + +mod base; +mod bing; +mod duckduckgo; +mod google; + +pub use base::{SearchOptions, SearchProvider, SearchResult}; +pub use bing::{BingConfig, BingProvider}; +pub use duckduckgo::DuckDuckGoProvider; +pub use google::{GoogleConfig, GoogleProvider}; diff --git a/rust/src/search.rs b/rust/src/search.rs new file mode 100644 index 0000000..4987cf8 --- /dev/null +++ b/rust/src/search.rs @@ -0,0 +1,244 @@ +//! Web Search Engine - main entry point + +use std::collections::HashMap; +use std::sync::Arc; + +use tokio::sync::RwLock; + +use crate::error::SearchError; +use crate::merger::{merge_results, MergeOptions, MergeStrategy}; +use crate::providers::{ + BingConfig, BingProvider, DuckDuckGoProvider, GoogleConfig, GoogleProvider, SearchOptions, + SearchProvider, SearchResult, +}; + +/// Configuration for the web search engine +#[derive(Debug, Clone, Default)] +pub struct WebSearchConfig { + /// Providers to use by default + pub providers: Vec, + /// Google API key + pub google_api_key: Option, + /// Google Custom Search Engine ID + pub google_cx: Option, + /// Bing API key + pub bing_api_key: Option, + /// Default weights for providers + pub weights: HashMap, + /// Default merge strategy + pub merge_strategy: MergeStrategy, +} + +impl WebSearchConfig { + /// Create config from environment variables + pub fn from_env() -> Self { + Self { + providers: vec![ + "duckduckgo".to_string(), + "google".to_string(), + "bing".to_string(), + ], + google_api_key: std::env::var("GOOGLE_API_KEY").ok(), + google_cx: std::env::var("GOOGLE_CX").ok(), + bing_api_key: std::env::var("BING_API_KEY").ok(), + weights: HashMap::new(), + merge_strategy: MergeStrategy::Rrf, + } + } +} + +/// Web Search Engine +pub struct WebSearchEngine { + providers: HashMap>>>, + default_providers: Vec, + default_weights: HashMap, + default_strategy: MergeStrategy, +} + +impl WebSearchEngine { + /// Create a new web search engine with default configuration + pub fn new() -> Self { + Self::with_config(WebSearchConfig::from_env()) + } + + /// Create a new web search engine with custom configuration + pub fn with_config(config: WebSearchConfig) -> Self { + let mut providers: HashMap>>> = HashMap::new(); + + providers.insert( + "duckduckgo".to_string(), + Arc::new(RwLock::new(Box::new(DuckDuckGoProvider::new()))), + ); + + providers.insert( + "google".to_string(), + Arc::new(RwLock::new(Box::new(GoogleProvider::new(GoogleConfig { + api_key: config.google_api_key, + search_engine_id: config.google_cx, + })))), + ); + + providers.insert( + "bing".to_string(), + Arc::new(RwLock::new(Box::new(BingProvider::new(BingConfig { + api_key: config.bing_api_key, + })))), + ); + + Self { + providers, + default_providers: config.providers, + default_weights: config.weights, + default_strategy: config.merge_strategy, + } + } + + /// Search across multiple providers + pub async fn search( + &self, + query: &str, + options: SearchOptions, + ) -> Result, SearchError> { + self.search_with_options(query, options, None, None).await + } + + /// Search with additional merge options + pub async fn search_with_options( + &self, + query: &str, + options: SearchOptions, + providers: Option>, + merge_options: Option, + ) -> Result, SearchError> { + if query.is_empty() { + return Ok(Vec::new()); + } + + let providers_to_use = providers.unwrap_or_else(|| self.default_providers.clone()); + let merge_opts = merge_options.unwrap_or_else(|| MergeOptions { + strategy: self.default_strategy, + weights: self.default_weights.clone(), + rrf_k: None, + remove_duplicates: true, + }); + + let mut handles = Vec::new(); + + for provider_name in &providers_to_use { + let provider = self.providers.get(provider_name).cloned(); + if provider.is_none() { + continue; + } + + let provider = provider.unwrap(); + let query = query.to_string(); + let opts = options.clone(); + let name = provider_name.clone(); + + handles.push(tokio::spawn(async move { + let provider = provider.read().await; + if !provider.is_available() { + return (name, Vec::new()); + } + match provider.search(&query, &opts).await { + Ok(results) => (name, results), + Err(e) => { + tracing::error!("Provider {} failed: {}", name, e); + (name, Vec::new()) + } + } + })); + } + + let mut results_by_provider = HashMap::new(); + + for handle in handles { + if let Ok((name, results)) = handle.await { + results_by_provider.insert(name, results); + } + } + + Ok(merge_results(&results_by_provider, &merge_opts)) + } + + /// Search with a single provider + pub async fn search_single( + &self, + query: &str, + provider_name: &str, + options: SearchOptions, + ) -> Result, SearchError> { + let provider = self + .providers + .get(provider_name) + .ok_or_else(|| SearchError::UnknownProvider(provider_name.to_string()))?; + + let provider = provider.read().await; + + if !provider.is_available() { + return Err(SearchError::ProviderDisabled(provider_name.to_string())); + } + + provider.search(query, &options).await + } + + /// Get available provider names + pub fn get_available_providers(&self) -> Vec { + self.providers.keys().cloned().collect() + } + + /// Get provider status + pub async fn get_provider_status(&self) -> HashMap { + let mut status = HashMap::new(); + + for (name, provider) in &self.providers { + let p = provider.read().await; + status.insert( + name.clone(), + ProviderStatus { + enabled: p.is_available(), + weight: p.weight(), + }, + ); + } + + status + } + + /// Set provider weight + pub async fn set_provider_weight(&self, name: &str, weight: f64) -> Result<(), SearchError> { + let provider = self + .providers + .get(name) + .ok_or_else(|| SearchError::UnknownProvider(name.to_string()))?; + + provider.write().await.set_weight(weight); + Ok(()) + } + + /// Enable or disable a provider + pub async fn set_provider_enabled(&self, name: &str, enabled: bool) -> Result<(), SearchError> { + let provider = self + .providers + .get(name) + .ok_or_else(|| SearchError::UnknownProvider(name.to_string()))?; + + provider.write().await.set_enabled(enabled); + Ok(()) + } +} + +impl Default for WebSearchEngine { + fn default() -> Self { + Self::new() + } +} + +/// Provider status information +#[derive(Debug, Clone, serde::Serialize)] +pub struct ProviderStatus { + /// Whether the provider is enabled + pub enabled: bool, + /// Provider weight for reranking + pub weight: f64, +} diff --git a/scripts/create-manual-changeset.mjs b/scripts/create-manual-changeset.mjs index ba5559e..954d05d 100644 --- a/scripts/create-manual-changeset.mjs +++ b/scripts/create-manual-changeset.mjs @@ -15,8 +15,7 @@ import { writeFileSync } from 'fs'; import { randomBytes } from 'crypto'; -// TODO: Update this to match your package name in package.json -const PACKAGE_NAME = 'my-package'; +const PACKAGE_NAME = '@link-assistant/web-search'; // Load use-m dynamically const { use } = eval( diff --git a/scripts/format-release-notes.mjs b/scripts/format-release-notes.mjs index c6b6aaa..ddab938 100644 --- a/scripts/format-release-notes.mjs +++ b/scripts/format-release-notes.mjs @@ -23,8 +23,7 @@ * Note: Uses --release-version instead of --version to avoid conflict with yargs' built-in --version flag. */ -// TODO: Update this to match your package name in package.json -const PACKAGE_NAME = 'my-package'; +const PACKAGE_NAME = '@link-assistant/web-search'; // Load use-m dynamically const { use } = eval( diff --git a/scripts/merge-changesets.mjs b/scripts/merge-changesets.mjs index 04a4b85..ced9d8c 100644 --- a/scripts/merge-changesets.mjs +++ b/scripts/merge-changesets.mjs @@ -25,8 +25,7 @@ import { } from 'fs'; import { join } from 'path'; -// TODO: Update this to match your package name in package.json -const PACKAGE_NAME = 'my-package'; +const PACKAGE_NAME = '@link-assistant/web-search'; const CHANGESET_DIR = '.changeset'; // Version bump type priority (higher number = higher priority) diff --git a/scripts/publish-to-npm.mjs b/scripts/publish-to-npm.mjs index 1d4b3d5..c9be12a 100644 --- a/scripts/publish-to-npm.mjs +++ b/scripts/publish-to-npm.mjs @@ -31,8 +31,7 @@ import { parseJsRootConfig, } from './js-paths.mjs'; -// TODO: Update this to match your package name in package.json -const PACKAGE_NAME = 'my-package'; +const PACKAGE_NAME = '@link-assistant/web-search'; // Load use-m dynamically const { use } = eval( diff --git a/scripts/validate-changeset.mjs b/scripts/validate-changeset.mjs index e1a86de..9c7f6d5 100644 --- a/scripts/validate-changeset.mjs +++ b/scripts/validate-changeset.mjs @@ -16,8 +16,7 @@ import { execSync } from 'child_process'; import { readFileSync, readdirSync, existsSync } from 'fs'; import { join } from 'path'; -// TODO: Update this to match your package name in package.json -const PACKAGE_NAME = 'my-package'; +const PACKAGE_NAME = '@link-assistant/web-search'; const CHANGESET_DIR = '.changeset'; /** diff --git a/src/index.d.ts b/src/index.d.ts index 658cd82..07e6040 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -1,27 +1,283 @@ /** - * Example module type definitions - * Replace this with your actual type definitions + * Web Search - Multi-provider web search aggregator + * TypeScript type definitions */ /** - * Adds two numbers - * @param a - First number - * @param b - Second number - * @returns Sum of a and b + * A single search result */ -export declare const add: (a: number, b: number) => number; +export interface SearchResult { + /** The title of the search result */ + title: string; + /** The URL of the search result */ + url: string; + /** The description/snippet of the search result */ + snippet: string; + /** The search provider that returned this result */ + source: string; + /** The rank position in the original results (1-based) */ + rank: number; + /** Computed score after merging (optional) */ + score?: number; + /** Sources that returned this result (after deduplication) */ + sources?: string[]; +} /** - * Multiplies two numbers - * @param a - First number - * @param b - Second number - * @returns Product of a and b + * Options for search queries */ -export declare const multiply: (a: number, b: number) => number; +export interface SearchOptions { + /** Maximum number of results to return per provider */ + limit?: number; + /** Language code (e.g., 'en', 'de') */ + language?: string; + /** Region code (e.g., 'us', 'de') */ + region?: string; + /** Enable safe search filtering */ + safeSearch?: boolean; + /** Providers to use for this search */ + providers?: string[]; + /** Merge strategy */ + strategy?: 'rrf' | 'weighted' | 'interleave'; + /** Provider weights */ + weights?: Record; +} /** - * Delays execution for specified milliseconds - * @param ms - Milliseconds to wait - * @returns Promise that resolves after the delay + * Options for merging search results */ -export declare const delay: (ms: number) => Promise; +export interface MergeOptions { + /** Merge strategy */ + strategy?: 'rrf' | 'weighted' | 'interleave'; + /** Provider weights */ + weights?: Record; + /** RRF k parameter (default: 60) */ + rrfK?: number; + /** Remove duplicate URLs (default: true) */ + removeDuplicates?: boolean; +} + +/** + * Provider status information + */ +export interface ProviderStatus { + /** Whether the provider is enabled */ + enabled: boolean; + /** Provider weight for reranking */ + weight: number; + /** Whether the provider has API credentials */ + hasApi: boolean; +} + +/** + * Configuration for Google provider + */ +export interface GoogleConfig { + /** Google Custom Search API key */ + apiKey?: string; + /** Google Custom Search Engine ID */ + searchEngineId?: string; +} + +/** + * Configuration for Bing provider + */ +export interface BingConfig { + /** Bing Search API key */ + apiKey?: string; +} + +/** + * Configuration for WebSearchEngine + */ +export interface WebSearchConfig { + /** Default providers to use */ + providers?: string[]; + /** Google provider configuration */ + google?: GoogleConfig; + /** Bing provider configuration */ + bing?: BingConfig; + /** Default weights for providers */ + weights?: Record; + /** Default merge strategy */ + mergeStrategy?: 'rrf' | 'weighted' | 'interleave'; +} + +/** + * Base class for search providers + */ +export declare abstract class BaseSearchProvider { + /** Provider name */ + readonly name: string; + /** Whether the provider is enabled */ + enabled: boolean; + /** Provider weight for reranking */ + weight: number; + + constructor(name: string); + + /** + * Search for results using this provider + */ + abstract search( + query: string, + options?: SearchOptions + ): Promise; + + /** + * Check if the provider is available/configured + */ + isAvailable(): boolean; + + /** + * Get the provider name + */ + getName(): string; + + /** + * Get the provider weight + */ + getWeight(): number; + + /** + * Set the provider weight + */ + setWeight(weight: number): void; +} + +/** + * Google search provider + */ +export declare class GoogleProvider extends BaseSearchProvider { + constructor(config?: GoogleConfig); + hasApiCredentials(): boolean; + search(query: string, options?: SearchOptions): Promise; +} + +/** + * DuckDuckGo search provider + */ +export declare class DuckDuckGoProvider extends BaseSearchProvider { + constructor(); + search(query: string, options?: SearchOptions): Promise; +} + +/** + * Bing search provider + */ +export declare class BingProvider extends BaseSearchProvider { + constructor(config?: BingConfig); + hasApiCredentials(): boolean; + search(query: string, options?: SearchOptions): Promise; +} + +/** + * Browser-based search provider using browser-commander + */ +export declare class BrowserSearchProvider extends BaseSearchProvider { + constructor(config?: { + engine?: 'google' | 'duckduckgo' | 'bing'; + browserCommander?: unknown; + browserOptions?: Record; + }); + search(query: string, options?: SearchOptions): Promise; +} + +/** + * Web Search Engine - main class for multi-provider search + */ +export declare class WebSearchEngine { + constructor(config?: WebSearchConfig); + + /** + * Search across multiple providers + */ + search(query: string, options?: SearchOptions): Promise; + + /** + * Search with a single provider + */ + searchSingle( + query: string, + providerName: string, + options?: SearchOptions + ): Promise; + + /** + * Get available provider names + */ + getAvailableProviders(): string[]; + + /** + * Get provider status information + */ + getProviderStatus(): Record; + + /** + * Set provider weight + */ + setProviderWeight(name: string, weight: number): void; + + /** + * Enable or disable a provider + */ + setProviderEnabled(name: string, enabled: boolean): void; + + /** + * Get a provider by name + */ + getProvider(name: string): BaseSearchProvider | undefined; +} + +/** + * Create a default web search engine instance + */ +export declare function createSearchEngine( + config?: WebSearchConfig +): WebSearchEngine; + +/** + * Get list of available provider names + */ +export declare function getAvailableProviders(): string[]; + +/** + * Create a browser search provider + */ +export declare function createBrowserProvider(config?: { + engine?: 'google' | 'duckduckgo' | 'bing'; + browserCommander?: unknown; + browserOptions?: Record; +}): BrowserSearchProvider; + +/** + * Merge search results using the specified strategy + */ +export declare function mergeResults( + resultsByProvider: Record, + options?: MergeOptions +): SearchResult[]; + +/** + * Merge results using Reciprocal Rank Fusion + */ +export declare function mergeWithRRF( + resultsByProvider: Record, + options?: MergeOptions +): SearchResult[]; + +/** + * Merge results using weighted scoring + */ +export declare function mergeWithWeights( + resultsByProvider: Record, + options?: MergeOptions +): SearchResult[]; + +/** + * Merge results using interleaving (round-robin) + */ +export declare function mergeWithInterleave( + resultsByProvider: Record, + options?: MergeOptions +): SearchResult[]; diff --git a/src/index.js b/src/index.js index 22705fd..44964f7 100644 --- a/src/index.js +++ b/src/index.js @@ -1,28 +1,21 @@ /** - * Example module entry point - * Replace this with your actual implementation + * Web Search - Multi-provider web search aggregator + * + * A microservice that aggregates search results from multiple search engines + * (Google, DuckDuckGo, Bing) with support for result merging and reranking. */ -/** - * Example function that adds two numbers - * @param {number} a - First number - * @param {number} b - Second number - * @returns {number} Sum of a and b - */ -export const add = (a, b) => a + b; - -/** - * Example function that multiplies two numbers - * @param {number} a - First number - * @param {number} b - Second number - * @returns {number} Product of a and b - */ -export const multiply = (a, b) => a * b; - -/** - * Example async function - * @param {number} ms - Milliseconds to wait - * @returns {Promise} - */ -export const delay = (ms) => - new Promise((resolve) => globalThis.setTimeout(resolve, ms)); +export { WebSearchEngine, createSearchEngine } from './search.js'; +export { + BaseSearchProvider, + GoogleProvider, + DuckDuckGoProvider, + BingProvider, + getAvailableProviders, +} from './providers/index.js'; +export { + mergeResults, + mergeWithRRF, + mergeWithWeights, + mergeWithInterleave, +} from './merger.js'; diff --git a/src/merger.js b/src/merger.js new file mode 100644 index 0000000..a6c6e0b --- /dev/null +++ b/src/merger.js @@ -0,0 +1,204 @@ +/** + * Search result merger with reranking support + * Combines results from multiple search providers and reranks them + */ + +/** + * @typedef {import('./providers/base.js').SearchResult} SearchResult + */ + +/** + * @typedef {Object} MergeOptions + * @property {'rrf' | 'weighted' | 'interleave'} [strategy] - Merge strategy + * @property {Object} [weights] - Weight for each provider + * @property {number} [rrfK] - RRF k parameter (default: 60) + * @property {boolean} [removeDuplicates] - Remove duplicate URLs (default: true) + */ + +/** + * Reciprocal Rank Fusion (RRF) score calculation + * @param {number} rank - The rank position (1-based) + * @param {number} k - The k parameter (default: 60) + * @returns {number} + */ +function rrfScore(rank, k = 60) { + return 1 / (k + rank); +} + +/** + * Normalize URL for deduplication + * @param {string} url - The URL to normalize + * @returns {string} + */ +function normalizeUrl(url) { + try { + const parsed = new URL(url); + let normalized = `${parsed.hostname}${parsed.pathname}`; + normalized = normalized.replace(/\/+$/, ''); + normalized = normalized.toLowerCase(); + return normalized; + } catch { + return url.toLowerCase(); + } +} + +/** + * Merge and rerank search results using Reciprocal Rank Fusion + * @param {Object} resultsByProvider - Results grouped by provider + * @param {MergeOptions} [options] + * @returns {SearchResult[]} + */ +export function mergeWithRRF(resultsByProvider, options = {}) { + const k = options.rrfK || 60; + const weights = options.weights || {}; + const removeDuplicates = options.removeDuplicates !== false; + + const scoresByUrl = new Map(); + const resultByUrl = new Map(); + + for (const [provider, results] of Object.entries(resultsByProvider)) { + const providerWeight = weights[provider] || 1.0; + + for (const result of results) { + const normalizedUrl = normalizeUrl(result.url); + const score = rrfScore(result.rank, k) * providerWeight; + + if (scoresByUrl.has(normalizedUrl)) { + scoresByUrl.set(normalizedUrl, scoresByUrl.get(normalizedUrl) + score); + const existing = resultByUrl.get(normalizedUrl); + if (!existing.sources) { + existing.sources = [existing.source]; + } + if (!existing.sources.includes(result.source)) { + existing.sources.push(result.source); + } + } else { + scoresByUrl.set(normalizedUrl, score); + resultByUrl.set(normalizedUrl, { ...result }); + } + } + } + + const merged = Array.from(scoresByUrl.entries()) + .sort((a, b) => b[1] - a[1]) + .map(([url, score], index) => ({ + ...resultByUrl.get(url), + score, + rank: index + 1, + })); + + return removeDuplicates + ? merged + : Array.from(resultByUrl.values()).map((r, i) => ({ ...r, rank: i + 1 })); +} + +/** + * Merge and rerank search results using weighted scoring + * @param {Object} resultsByProvider - Results grouped by provider + * @param {MergeOptions} [options] + * @returns {SearchResult[]} + */ +export function mergeWithWeights(resultsByProvider, options = {}) { + const weights = options.weights || {}; + const maxRank = 100; + const removeDuplicates = options.removeDuplicates !== false; + + const scoresByUrl = new Map(); + const resultByUrl = new Map(); + + for (const [provider, results] of Object.entries(resultsByProvider)) { + const providerWeight = weights[provider] || 1.0; + + for (const result of results) { + const normalizedUrl = normalizeUrl(result.url); + const score = ((maxRank - result.rank + 1) / maxRank) * providerWeight; + + if (scoresByUrl.has(normalizedUrl)) { + scoresByUrl.set(normalizedUrl, scoresByUrl.get(normalizedUrl) + score); + const existing = resultByUrl.get(normalizedUrl); + if (!existing.sources) { + existing.sources = [existing.source]; + } + if (!existing.sources.includes(result.source)) { + existing.sources.push(result.source); + } + } else { + scoresByUrl.set(normalizedUrl, score); + resultByUrl.set(normalizedUrl, { ...result }); + } + } + } + + const merged = Array.from(scoresByUrl.entries()) + .sort((a, b) => b[1] - a[1]) + .map(([url, score], index) => ({ + ...resultByUrl.get(url), + score, + rank: index + 1, + })); + + return removeDuplicates + ? merged + : Array.from(resultByUrl.values()).map((r, i) => ({ ...r, rank: i + 1 })); +} + +/** + * Merge results by interleaving (round-robin) + * @param {Object} resultsByProvider - Results grouped by provider + * @param {MergeOptions} [options] + * @returns {SearchResult[]} + */ +export function mergeWithInterleave(resultsByProvider, options = {}) { + const removeDuplicates = options.removeDuplicates !== false; + const results = []; + const seenUrls = new Set(); + + const providers = Object.entries(resultsByProvider); + const indices = providers.map(() => 0); + const maxLength = Math.max(...providers.map(([, r]) => r.length)); + + for (let i = 0; i < maxLength; i++) { + for (let j = 0; j < providers.length; j++) { + const [, providerResults] = providers[j]; + if (indices[j] < providerResults.length) { + const result = providerResults[indices[j]]; + indices[j]++; + + if (removeDuplicates) { + const normalizedUrl = normalizeUrl(result.url); + if (seenUrls.has(normalizedUrl)) { + continue; + } + seenUrls.add(normalizedUrl); + } + + results.push({ + ...result, + rank: results.length + 1, + }); + } + } + } + + return results; +} + +/** + * Merge search results using the specified strategy + * @param {Object} resultsByProvider - Results grouped by provider + * @param {MergeOptions} [options] + * @returns {SearchResult[]} + */ +export function mergeResults(resultsByProvider, options = {}) { + const strategy = options.strategy || 'rrf'; + + switch (strategy) { + case 'weighted': + return mergeWithWeights(resultsByProvider, options); + case 'interleave': + return mergeWithInterleave(resultsByProvider, options); + case 'rrf': + default: + return mergeWithRRF(resultsByProvider, options); + } +} diff --git a/src/providers/base.js b/src/providers/base.js new file mode 100644 index 0000000..1958092 --- /dev/null +++ b/src/providers/base.js @@ -0,0 +1,83 @@ +/** + * Base search provider class + * All search providers must implement this interface + */ + +/** + * @typedef {Object} SearchResult + * @property {string} title - The title of the search result + * @property {string} url - The URL of the search result + * @property {string} snippet - The description/snippet of the search result + * @property {string} source - The search provider that returned this result + * @property {number} [rank] - The rank position in the original results + */ + +/** + * @typedef {Object} SearchOptions + * @property {number} [limit] - Maximum number of results to return + * @property {string} [language] - Language code (e.g., 'en', 'de') + * @property {string} [region] - Region code (e.g., 'us', 'de') + * @property {boolean} [safeSearch] - Enable safe search filtering + */ + +/** + * Base class for search providers + * @abstract + */ +export class BaseSearchProvider { + /** + * @param {string} name - The name of the search provider + */ + constructor(name) { + if (new.target === BaseSearchProvider) { + throw new Error('BaseSearchProvider is an abstract class'); + } + this.name = name; + this.enabled = true; + this.weight = 1.0; + } + + /** + * Search for results using this provider + * @abstract + * @param {string} query - The search query + * @param {SearchOptions} [options] - Search options + * @returns {Promise} - Array of search results + */ + // eslint-disable-next-line require-await -- Abstract method, implementations will use await + async search() { + throw new Error('search() must be implemented by subclass'); + } + + /** + * Check if the provider is available/configured + * @returns {boolean} + */ + isAvailable() { + return this.enabled; + } + + /** + * Get the provider name + * @returns {string} + */ + getName() { + return this.name; + } + + /** + * Get the provider weight for reranking + * @returns {number} + */ + getWeight() { + return this.weight; + } + + /** + * Set the provider weight for reranking + * @param {number} weight - Weight value (0.0 to 1.0) + */ + setWeight(weight) { + this.weight = Math.max(0, Math.min(1, weight)); + } +} diff --git a/src/providers/bing.js b/src/providers/bing.js new file mode 100644 index 0000000..dd3eb0f --- /dev/null +++ b/src/providers/bing.js @@ -0,0 +1,251 @@ +/** + * Bing search provider + * Uses Bing Web Search API when API key is available + * Falls back to web scraping for basic functionality + */ + +import { BaseSearchProvider } from './base.js'; + +/** + * Bing search provider implementation + * @extends BaseSearchProvider + */ +export class BingProvider extends BaseSearchProvider { + /** + * @param {Object} [config] + * @param {string} [config.apiKey] - Bing Search API key + */ + constructor(config = {}) { + super('bing'); + this.apiKey = config.apiKey || process.env.BING_API_KEY; + this.apiUrl = 'https://api.bing.microsoft.com/v7.0/search'; + this.webUrl = 'https://www.bing.com/search'; + } + + /** + * Check if API credentials are configured + * @returns {boolean} + */ + hasApiCredentials() { + return Boolean(this.apiKey); + } + + /** + * Search Bing for results + * @param {string} query - The search query + * @param {import('./base.js').SearchOptions} [options] - Search options + * @returns {Promise} + */ + async search(query, options = {}) { + if (!query || typeof query !== 'string') { + return []; + } + + if (this.hasApiCredentials()) { + return await this.searchWithApi(query, options); + } + + return await this.searchWithScraping(query, options); + } + + /** + * Search using Bing Web Search API + * @param {string} query + * @param {import('./base.js').SearchOptions} options + * @returns {Promise} + */ + async searchWithApi(query, options) { + const limit = Math.min(options.limit || 10, 50); + + try { + const params = new URLSearchParams({ + q: query, + count: String(limit), + responseFilter: 'Webpages', + }); + + if (options.region) { + params.set( + 'mkt', + `${options.language || 'en'}-${options.region.toUpperCase()}` + ); + } + + if (options.safeSearch === true) { + params.set('safeSearch', 'Strict'); + } else if (options.safeSearch === false) { + params.set('safeSearch', 'Off'); + } else { + params.set('safeSearch', 'Moderate'); + } + + const response = await fetch(`${this.apiUrl}?${params}`, { + headers: { + 'Ocp-Apim-Subscription-Key': this.apiKey, + }, + }); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Bing API error: ${response.status} - ${error}`); + } + + const data = await response.json(); + return this.parseApiResults(data); + } catch (error) { + console.error(`Bing API search error: ${error.message}`); + return this.searchWithScraping(query, options); + } + } + + /** + * Parse Bing Web Search API response + * @param {Object} data - API response data + * @returns {import('./base.js').SearchResult[]} + */ + parseApiResults(data) { + if (!data.webPages || !data.webPages.value) { + return []; + } + + return data.webPages.value.map((item, index) => ({ + title: item.name || 'Untitled', + url: item.url, + snippet: item.snippet || '', + source: this.name, + rank: index + 1, + })); + } + + /** + * Search using web scraping (fallback) + * @param {string} query + * @param {import('./base.js').SearchOptions} options + * @returns {Promise} + */ + async searchWithScraping(query, options) { + const limit = options.limit || 10; + + try { + const params = new URLSearchParams({ + q: query, + count: String(Math.min(limit, 30)), + }); + + if (options.region) { + params.set('cc', options.region.toUpperCase()); + } + + if (options.safeSearch === true) { + params.set('safeSearch', 'Strict'); + } else if (options.safeSearch === false) { + params.set('safeSearch', 'Off'); + } + + const response = await fetch(`${this.webUrl}?${params}`, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + }, + }); + + if (!response.ok) { + throw new Error(`Bing returned status ${response.status}`); + } + + const html = await response.text(); + return this.parseScrapedResults(html, limit); + } catch (error) { + console.error(`Bing scraping search error: ${error.message}`); + return []; + } + } + + /** + * Parse scraped Bing HTML results + * @param {string} html - The HTML response + * @param {number} limit - Maximum number of results + * @returns {import('./base.js').SearchResult[]} + */ + parseScrapedResults(html, limit) { + const results = []; + + const resultPattern = + /]+class="b_algo"[^>]*>.*?]+href="([^"]+)"[^>]*>([^<]*(?:<[^>]+>[^<]*)*)<\/a>.*?]*>([^<]*(?:<[^>]+>[^<]*)*)<\/p>/gs; + + let match; + while ( + (match = resultPattern.exec(html)) !== null && + results.length < limit + ) { + const url = match[1]; + const title = this.stripHtml(match[2]); + const snippet = this.stripHtml(match[3]); + + if (url.includes('bing.com') || url.startsWith('/')) { + continue; + } + + results.push({ + title: this.decodeHtmlEntities(title) || 'Untitled', + url, + snippet: this.decodeHtmlEntities(snippet) || '', + source: this.name, + rank: results.length + 1, + }); + } + + if (results.length === 0) { + const simplePattern = + /]+href="(https?:\/\/[^"]+)"[^>]*>.*?]*>([^<]+)<\/h2>/gs; + while ( + (match = simplePattern.exec(html)) !== null && + results.length < limit + ) { + const url = match[1]; + const title = match[2]; + + if (url.includes('bing.com') || url.includes('microsoft.com/maps')) { + continue; + } + + results.push({ + title: this.decodeHtmlEntities(title) || 'Untitled', + url, + snippet: '', + source: this.name, + rank: results.length + 1, + }); + } + } + + return results; + } + + /** + * Strip HTML tags from string + * @param {string} html + * @returns {string} + */ + stripHtml(html) { + return html.replace(/<[^>]+>/g, '').trim(); + } + + /** + * Decode HTML entities + * @param {string} text + * @returns {string} + */ + decodeHtmlEntities(text) { + return text + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/ /g, ' '); + } +} diff --git a/src/providers/browser.js b/src/providers/browser.js new file mode 100644 index 0000000..1b185cf --- /dev/null +++ b/src/providers/browser.js @@ -0,0 +1,204 @@ +/** + * Browser-based search provider + * Uses browser-commander for direct browser search testing + * This is primarily used for testing and verification purposes + */ + +import { BaseSearchProvider } from './base.js'; + +/** + * Browser search provider using browser-commander + * Performs actual browser searches for testing and verification + * @extends BaseSearchProvider + */ +export class BrowserSearchProvider extends BaseSearchProvider { + /** + * @param {Object} [config] + * @param {string} [config.engine] - Search engine to use ('google', 'duckduckgo', 'bing') + * @param {Object} [config.browserCommander] - Browser-commander instance (optional) + */ + constructor(config = {}) { + super('browser'); + this.targetEngine = config.engine || 'duckduckgo'; + this.browserCommander = config.browserCommander; + this.browserOptions = config.browserOptions || {}; + } + + /** + * Get the search URL for the target engine + * @param {string} query - Search query + * @param {string} engine - Target search engine + * @returns {string} - Search URL + */ + getSearchUrl(query, engine) { + const encodedQuery = encodeURIComponent(query); + const urls = { + google: `https://www.google.com/search?q=${encodedQuery}`, + duckduckgo: `https://duckduckgo.com/?q=${encodedQuery}`, + bing: `https://www.bing.com/search?q=${encodedQuery}`, + }; + return urls[engine] || urls.duckduckgo; + } + + /** + * Parse search results from the page using the browser + * @param {Object} page - Browser page object + * @param {string} engine - Target search engine + * @param {number} limit - Max results + * @returns {Promise} + */ + async parseResultsFromPage(page, engine, limit) { + const selectors = { + google: { + container: '.g', + titleSelector: 'h3', + linkSelector: 'a', + snippetSelector: '.VwiC3b, [data-snf], .s3v9rd', + }, + duckduckgo: { + container: '.result, .react-results--main article', + titleSelector: 'h2 a, a[data-testid="result-title-a"]', + linkSelector: 'h2 a, a[data-testid="result-title-a"]', + snippetSelector: '.result__snippet, [data-result="snippet"]', + }, + bing: { + container: '.b_algo', + titleSelector: 'h2 a', + linkSelector: 'h2 a', + snippetSelector: 'p', + }, + }; + + const sel = selectors[engine] || selectors.duckduckgo; + + const results = await page.evaluate( + (containerSel, titleSel, linkSel, snippetSel, maxResults) => { + const containers = document.querySelectorAll(containerSel); + const parsed = []; + + for (const container of containers) { + if (parsed.length >= maxResults) { + break; + } + + const titleEl = container.querySelector(titleSel); + const linkEl = container.querySelector(linkSel); + const snippetEl = container.querySelector(snippetSel); + + if (!linkEl || !linkEl.href) { + continue; + } + + const url = linkEl.href; + if ( + url.includes('google.com/search') || + url.includes('bing.com/search') || + url.includes('duckduckgo.com/?q') + ) { + continue; + } + + parsed.push({ + title: titleEl?.textContent?.trim() || 'Untitled', + url, + snippet: snippetEl?.textContent?.trim() || '', + }); + } + + return parsed; + }, + sel.container, + sel.titleSelector, + sel.linkSelector, + sel.snippetSelector, + limit + ); + + return results.map((r, i) => ({ + ...r, + source: `browser-${engine}`, + rank: i + 1, + })); + } + + /** + * Search using browser automation + * @param {string} query - The search query + * @param {import('./base.js').SearchOptions} [options] - Search options + * @returns {Promise} + */ + async search(query, options = {}) { + if (!query || typeof query !== 'string') { + return []; + } + + if (!this.browserCommander) { + console.warn( + 'BrowserSearchProvider: browser-commander not configured, returning empty results' + ); + return []; + } + + const limit = options.limit || 10; + const engine = this.targetEngine; + + try { + const searchUrl = this.getSearchUrl(query, engine); + + const results = await this.browserCommander.runAction( + async (page, signal) => { + await page.goto(searchUrl, { + waitUntil: 'networkidle0', + timeout: 30000, + }); + + if (signal.aborted) { + return []; + } + + await new Promise((resolve) => setTimeout(resolve, 2000)); + + return this.parseResultsFromPage(page, engine, limit); + } + ); + + return results; + } catch (error) { + console.error(`Browser search error (${engine}): ${error.message}`); + return []; + } + } + + /** + * Create a browser commander instance with Puppeteer + * @param {Object} [puppeteerOptions] - Puppeteer launch options + * @returns {Promise} - Browser commander instance + */ + static async createWithPuppeteer(puppeteerOptions = {}) { + try { + const browserCommander = await import('browser-commander'); + const puppeteer = await import('puppeteer'); + + const browser = await puppeteer.default.launch({ + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'], + ...puppeteerOptions, + }); + + const commander = new browserCommander.BrowserCommander({ browser }); + return commander; + } catch (error) { + console.error('Failed to create browser commander:', error.message); + return null; + } + } +} + +/** + * Create a browser search provider with browser-commander + * @param {Object} [config] - Configuration options + * @returns {BrowserSearchProvider} + */ +export function createBrowserProvider(config = {}) { + return new BrowserSearchProvider(config); +} diff --git a/src/providers/duckduckgo.js b/src/providers/duckduckgo.js new file mode 100644 index 0000000..471850f --- /dev/null +++ b/src/providers/duckduckgo.js @@ -0,0 +1,138 @@ +/** + * DuckDuckGo search provider + * Uses DuckDuckGo's HTML search page since they don't have a public API + */ + +import { BaseSearchProvider } from './base.js'; + +/** + * DuckDuckGo search provider implementation + * @extends BaseSearchProvider + */ +export class DuckDuckGoProvider extends BaseSearchProvider { + constructor() { + super('duckduckgo'); + this.baseUrl = 'https://html.duckduckgo.com/html/'; + } + + /** + * Search DuckDuckGo for results + * @param {string} query - The search query + * @param {import('./base.js').SearchOptions} [options] - Search options + * @returns {Promise} + */ + async search(query, options = {}) { + if (!query || typeof query !== 'string') { + return []; + } + + const limit = options.limit || 10; + + try { + const params = new URLSearchParams({ + q: query, + kl: options.region || 'wt-wt', + }); + + if (options.safeSearch === false) { + params.set('kp', '-2'); + } else if (options.safeSearch === true) { + params.set('kp', '1'); + } + + const response = await fetch(this.baseUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + }, + body: params.toString(), + }); + + if (!response.ok) { + throw new Error(`DuckDuckGo returned status ${response.status}`); + } + + const html = await response.text(); + return this.parseResults(html, limit); + } catch (error) { + console.error(`DuckDuckGo search error: ${error.message}`); + return []; + } + } + + /** + * Parse DuckDuckGo HTML response to extract search results + * @param {string} html - The HTML response + * @param {number} limit - Maximum number of results + * @returns {import('./base.js').SearchResult[]} + */ + parseResults(html, limit) { + const results = []; + + const resultPattern = + /]+class="result__a"[^>]*href="([^"]+)"[^>]*>([^<]+)<\/a>/g; + const snippetPattern = + /]+class="result__snippet"[^>]*>([^<]*(?:<[^>]+>[^<]*)*)<\/a>/g; + + const urls = []; + const titles = []; + const snippets = []; + + let match; + while ((match = resultPattern.exec(html)) !== null) { + urls.push(decodeURIComponent(match[1])); + titles.push(this.decodeHtmlEntities(match[2].trim())); + } + + while ((match = snippetPattern.exec(html)) !== null) { + snippets.push(this.decodeHtmlEntities(this.stripHtml(match[1]))); + } + + for (let i = 0; i < Math.min(urls.length, limit); i++) { + const url = urls[i]; + if ( + !url || + url.startsWith('//duckduckgo.com') || + url.includes('ad_provider') + ) { + continue; + } + + results.push({ + title: titles[i] || 'Untitled', + url, + snippet: snippets[i] || '', + source: this.name, + rank: i + 1, + }); + } + + return results; + } + + /** + * Strip HTML tags from string + * @param {string} html + * @returns {string} + */ + stripHtml(html) { + return html.replace(/<[^>]+>/g, '').trim(); + } + + /** + * Decode HTML entities + * @param {string} text + * @returns {string} + */ + decodeHtmlEntities(text) { + return text + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/ /g, ' '); + } +} diff --git a/src/providers/google.js b/src/providers/google.js new file mode 100644 index 0000000..b19d659 --- /dev/null +++ b/src/providers/google.js @@ -0,0 +1,226 @@ +/** + * Google search provider + * Uses Google Custom Search JSON API when API key is available + * Falls back to web scraping for basic functionality + */ + +import { BaseSearchProvider } from './base.js'; + +/** + * Google search provider implementation + * @extends BaseSearchProvider + */ +export class GoogleProvider extends BaseSearchProvider { + /** + * @param {Object} [config] + * @param {string} [config.apiKey] - Google Custom Search API key + * @param {string} [config.searchEngineId] - Google Custom Search Engine ID (cx) + */ + constructor(config = {}) { + super('google'); + this.apiKey = config.apiKey || process.env.GOOGLE_API_KEY; + this.searchEngineId = config.searchEngineId || process.env.GOOGLE_CX; + this.apiUrl = 'https://www.googleapis.com/customsearch/v1'; + this.webUrl = 'https://www.google.com/search'; + } + + /** + * Check if API credentials are configured + * @returns {boolean} + */ + hasApiCredentials() { + return Boolean(this.apiKey && this.searchEngineId); + } + + /** + * Search Google for results + * @param {string} query - The search query + * @param {import('./base.js').SearchOptions} [options] - Search options + * @returns {Promise} + */ + async search(query, options = {}) { + if (!query || typeof query !== 'string') { + return []; + } + + if (this.hasApiCredentials()) { + return await this.searchWithApi(query, options); + } + + return await this.searchWithScraping(query, options); + } + + /** + * Search using Google Custom Search API + * @param {string} query + * @param {import('./base.js').SearchOptions} options + * @returns {Promise} + */ + async searchWithApi(query, options) { + const limit = Math.min(options.limit || 10, 10); + + try { + const params = new URLSearchParams({ + key: this.apiKey, + cx: this.searchEngineId, + q: query, + num: String(limit), + }); + + if (options.language) { + params.set('lr', `lang_${options.language}`); + } + + if (options.region) { + params.set('gl', options.region); + } + + if (options.safeSearch === true) { + params.set('safe', 'active'); + } else if (options.safeSearch === false) { + params.set('safe', 'off'); + } + + const response = await fetch(`${this.apiUrl}?${params}`); + + if (!response.ok) { + const error = await response.text(); + throw new Error(`Google API error: ${response.status} - ${error}`); + } + + const data = await response.json(); + return this.parseApiResults(data); + } catch (error) { + console.error(`Google API search error: ${error.message}`); + return this.searchWithScraping(query, options); + } + } + + /** + * Parse Google Custom Search API response + * @param {Object} data - API response data + * @returns {import('./base.js').SearchResult[]} + */ + parseApiResults(data) { + if (!data.items || !Array.isArray(data.items)) { + return []; + } + + return data.items.map((item, index) => ({ + title: item.title || 'Untitled', + url: item.link, + snippet: item.snippet || '', + source: this.name, + rank: index + 1, + })); + } + + /** + * Search using web scraping (fallback) + * @param {string} query + * @param {import('./base.js').SearchOptions} options + * @returns {Promise} + */ + async searchWithScraping(query, options) { + const limit = options.limit || 10; + + try { + const params = new URLSearchParams({ + q: query, + num: String(Math.min(limit, 20)), + }); + + if (options.language) { + params.set('hl', options.language); + } + + if (options.region) { + params.set('gl', options.region); + } + + if (options.safeSearch === true) { + params.set('safe', 'active'); + } + + const response = await fetch(`${this.webUrl}?${params}`, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + }, + }); + + if (!response.ok) { + throw new Error(`Google returned status ${response.status}`); + } + + const html = await response.text(); + return this.parseScrapedResults(html, limit); + } catch (error) { + console.error(`Google scraping search error: ${error.message}`); + return []; + } + } + + /** + * Parse scraped Google HTML results + * @param {string} html - The HTML response + * @param {number} limit - Maximum number of results + * @returns {import('./base.js').SearchResult[]} + */ + parseScrapedResults(html, limit) { + const results = []; + + const resultPattern = + /]+href="\/url\?q=([^&"]+)[^"]*"[^>]*>.*?]*>([^<]+)<\/h3>/gs; + const alternativePattern = + /]+href="(https?:\/\/[^"]+)"[^>]*>.*?]*>([^<]+)<\/h3>/gs; + + let match; + const seen = new Set(); + + for (const pattern of [resultPattern, alternativePattern]) { + while ((match = pattern.exec(html)) !== null && results.length < limit) { + const url = decodeURIComponent(match[1]).split('&')[0]; + const title = this.decodeHtmlEntities(match[2].trim()); + + if ( + seen.has(url) || + url.includes('google.com') || + url.startsWith('/search') + ) { + continue; + } + + seen.add(url); + + results.push({ + title: title || 'Untitled', + url, + snippet: '', + source: this.name, + rank: results.length + 1, + }); + } + } + + return results; + } + + /** + * Decode HTML entities + * @param {string} text + * @returns {string} + */ + decodeHtmlEntities(text) { + return text + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/ /g, ' '); + } +} diff --git a/src/providers/index.js b/src/providers/index.js new file mode 100644 index 0000000..9afd840 --- /dev/null +++ b/src/providers/index.js @@ -0,0 +1,18 @@ +/** + * Search providers module + * Exports all available search providers + */ + +export { BaseSearchProvider } from './base.js'; +export { GoogleProvider } from './google.js'; +export { DuckDuckGoProvider } from './duckduckgo.js'; +export { BingProvider } from './bing.js'; +export { BrowserSearchProvider, createBrowserProvider } from './browser.js'; + +/** + * Get list of available provider names + * @returns {string[]} + */ +export function getAvailableProviders() { + return ['google', 'duckduckgo', 'bing']; +} diff --git a/src/search.js b/src/search.js new file mode 100644 index 0000000..30883c5 --- /dev/null +++ b/src/search.js @@ -0,0 +1,225 @@ +/** + * Web Search Engine + * Main class for performing multi-provider web searches with merging and reranking + */ + +import { GoogleProvider } from './providers/google.js'; +import { DuckDuckGoProvider } from './providers/duckduckgo.js'; +import { BingProvider } from './providers/bing.js'; +import { mergeResults } from './merger.js'; + +/** + * @typedef {import('./providers/base.js').SearchResult} SearchResult + * @typedef {import('./providers/base.js').SearchOptions} SearchOptions + * @typedef {import('./merger.js').MergeOptions} MergeOptions + */ + +/** + * @typedef {Object} WebSearchConfig + * @property {string[]} [providers] - List of providers to use + * @property {Object} [google] - Google provider configuration + * @property {string} [google.apiKey] - Google API key + * @property {string} [google.searchEngineId] - Google Custom Search Engine ID + * @property {Object} [bing] - Bing provider configuration + * @property {string} [bing.apiKey] - Bing API key + * @property {Object} [weights] - Weights for each provider + * @property {'rrf' | 'weighted' | 'interleave'} [mergeStrategy] - Default merge strategy + */ + +/** + * @typedef {Object} SearchEngineOptions + * @property {SearchOptions & MergeOptions} options + * @property {string[]} [providers] - Override providers list + */ + +/** + * Web Search Engine class + */ +export class WebSearchEngine { + /** + * @param {WebSearchConfig} [config] + */ + constructor(config = {}) { + this.providers = new Map(); + this.defaultProviders = config.providers || [ + 'duckduckgo', + 'google', + 'bing', + ]; + this.defaultWeights = config.weights || {}; + this.defaultMergeStrategy = config.mergeStrategy || 'rrf'; + + this.initializeProviders(config); + } + + /** + * Initialize search providers based on configuration + * @param {WebSearchConfig} config + */ + initializeProviders(config) { + this.providers.set('google', new GoogleProvider(config.google)); + this.providers.set('duckduckgo', new DuckDuckGoProvider()); + this.providers.set('bing', new BingProvider(config.bing)); + + for (const [name, weight] of Object.entries(this.defaultWeights)) { + const provider = this.providers.get(name); + if (provider) { + provider.setWeight(weight); + } + } + } + + /** + * Get a provider by name + * @param {string} name + * @returns {import('./providers/base.js').BaseSearchProvider|undefined} + */ + getProvider(name) { + return this.providers.get(name.toLowerCase()); + } + + /** + * Set provider weight + * @param {string} name - Provider name + * @param {number} weight - Weight value (0.0 to 1.0) + */ + setProviderWeight(name, weight) { + const provider = this.providers.get(name.toLowerCase()); + if (provider) { + provider.setWeight(weight); + } + } + + /** + * Enable or disable a provider + * @param {string} name - Provider name + * @param {boolean} enabled - Whether to enable the provider + */ + setProviderEnabled(name, enabled) { + const provider = this.providers.get(name.toLowerCase()); + if (provider) { + provider.enabled = enabled; + } + } + + /** + * Perform a search across multiple providers + * @param {string} query - Search query + * @param {Object} [options] - Search and merge options + * @param {number} [options.limit] - Maximum results per provider + * @param {string} [options.language] - Language code + * @param {string} [options.region] - Region code + * @param {boolean} [options.safeSearch] - Enable safe search + * @param {string[]} [options.providers] - Providers to use + * @param {'rrf' | 'weighted' | 'interleave'} [options.strategy] - Merge strategy + * @param {Object} [options.weights] - Provider weights + * @returns {Promise} + */ + async search(query, options = {}) { + if (!query || typeof query !== 'string' || query.trim().length === 0) { + return []; + } + + const providersToUse = options.providers || this.defaultProviders; + const weights = options.weights || this.defaultWeights; + const strategy = options.strategy || this.defaultMergeStrategy; + + const searchPromises = []; + const providerNames = []; + + for (const name of providersToUse) { + const provider = this.providers.get(name.toLowerCase()); + if (provider && provider.isAvailable()) { + providerNames.push(name); + searchPromises.push( + provider.search(query, { + limit: options.limit, + language: options.language, + region: options.region, + safeSearch: options.safeSearch, + }) + ); + } + } + + const results = await Promise.allSettled(searchPromises); + const resultsByProvider = {}; + + for (let i = 0; i < results.length; i++) { + const result = results[i]; + const providerName = providerNames[i]; + + if (result.status === 'fulfilled' && Array.isArray(result.value)) { + resultsByProvider[providerName] = result.value; + } else { + console.error( + `Provider ${providerName} failed:`, + result.status === 'rejected' ? result.reason : 'Invalid result' + ); + resultsByProvider[providerName] = []; + } + } + + return mergeResults(resultsByProvider, { + strategy, + weights, + removeDuplicates: true, + }); + } + + /** + * Search with a single provider only + * @param {string} query - Search query + * @param {string} providerName - Provider name + * @param {SearchOptions} [options] - Search options + * @returns {Promise} + */ + async searchSingle(query, providerName, options = {}) { + const provider = this.providers.get(providerName.toLowerCase()); + if (!provider) { + throw new Error(`Unknown provider: ${providerName}`); + } + + if (!provider.isAvailable()) { + throw new Error(`Provider ${providerName} is not available`); + } + + return await provider.search(query, options); + } + + /** + * Get list of available provider names + * @returns {string[]} + */ + getAvailableProviders() { + return Array.from(this.providers.keys()); + } + + /** + * Get provider status information + * @returns {Object} + */ + getProviderStatus() { + const status = {}; + for (const [name, provider] of this.providers) { + status[name] = { + enabled: provider.enabled, + weight: provider.getWeight(), + hasApi: + typeof provider.hasApiCredentials === 'function' + ? provider.hasApiCredentials() + : false, + }; + } + return status; + } +} + +/** + * Create a default web search engine instance + * @param {WebSearchConfig} [config] + * @returns {WebSearchEngine} + */ +export function createSearchEngine(config) { + return new WebSearchEngine(config); +} diff --git a/src/server.js b/src/server.js new file mode 100644 index 0000000..0fc82cc --- /dev/null +++ b/src/server.js @@ -0,0 +1,268 @@ +/** + * Web Search REST API Server + * Express.js microservice for web search aggregation + */ + +import express from 'express'; +import { fileURLToPath } from 'url'; +import { WebSearchEngine } from './search.js'; + +const app = express(); +const port = process.env.PORT || 3000; + +app.use(express.json()); + +const searchEngine = new WebSearchEngine({ + providers: ['duckduckgo', 'google', 'bing'], + google: { + apiKey: process.env.GOOGLE_API_KEY, + searchEngineId: process.env.GOOGLE_CX, + }, + bing: { + apiKey: process.env.BING_API_KEY, + }, +}); + +/** + * Health check endpoint + * GET /health + */ +app.get('/health', (req, res) => { + res.json({ + status: 'healthy', + providers: searchEngine.getProviderStatus(), + }); +}); + +/** + * Main search endpoint + * GET /search?q=&providers=&limit=&strategy= + */ +app.get('/search', async (req, res) => { + const { q, query } = req.query; + const searchQuery = q || query; + + if (!searchQuery) { + return res.status(400).json({ + error: 'Missing required parameter: q or query', + }); + } + + const options = { + limit: parseInt(req.query.limit, 10) || 10, + language: req.query.language || req.query.lang, + region: req.query.region, + safeSearch: + req.query.safeSearch === 'true' || + req.query.safe === 'true' || + req.query.safeSearch === '1', + strategy: req.query.strategy || 'rrf', + }; + + if (req.query.providers) { + options.providers = req.query.providers.split(',').map((p) => p.trim()); + } + + if (req.query.weights) { + try { + options.weights = JSON.parse(req.query.weights); + } catch { + return res.status(400).json({ + error: 'Invalid weights parameter: must be valid JSON', + }); + } + } + + try { + const results = await searchEngine.search(searchQuery, options); + res.json({ + query: searchQuery, + count: results.length, + options: { + providers: options.providers || searchEngine.getAvailableProviders(), + strategy: options.strategy, + limit: options.limit, + }, + results, + }); + } catch (error) { + console.error('Search error:', error); + res.status(500).json({ + error: 'Search failed', + message: error.message, + }); + } +}); + +/** + * POST /search - Search with JSON body + */ +app.post('/search', async (req, res) => { + const { + query, + q, + providers, + limit, + language, + region, + safeSearch, + strategy, + weights, + } = req.body; + const searchQuery = query || q; + + if (!searchQuery) { + return res.status(400).json({ + error: 'Missing required parameter: query or q', + }); + } + + const options = { + providers, + limit: limit || 10, + language, + region, + safeSearch, + strategy: strategy || 'rrf', + weights, + }; + + try { + const results = await searchEngine.search(searchQuery, options); + res.json({ + query: searchQuery, + count: results.length, + options: { + providers: options.providers || searchEngine.getAvailableProviders(), + strategy: options.strategy, + limit: options.limit, + }, + results, + }); + } catch (error) { + console.error('Search error:', error); + res.status(500).json({ + error: 'Search failed', + message: error.message, + }); + } +}); + +/** + * Search with single provider + * GET /search/:provider?q= + */ +app.get('/search/:provider', async (req, res) => { + const { provider } = req.params; + const { q, query } = req.query; + const searchQuery = q || query; + + if (!searchQuery) { + return res.status(400).json({ + error: 'Missing required parameter: q or query', + }); + } + + const options = { + limit: parseInt(req.query.limit, 10) || 10, + language: req.query.language || req.query.lang, + region: req.query.region, + safeSearch: + req.query.safeSearch === 'true' || + req.query.safe === 'true' || + req.query.safeSearch === '1', + }; + + try { + const results = await searchEngine.searchSingle( + searchQuery, + provider, + options + ); + res.json({ + query: searchQuery, + provider, + count: results.length, + results, + }); + } catch (error) { + if (error.message.includes('Unknown provider')) { + return res.status(400).json({ + error: error.message, + availableProviders: searchEngine.getAvailableProviders(), + }); + } + console.error(`Search error (${provider}):`, error); + res.status(500).json({ + error: 'Search failed', + message: error.message, + }); + } +}); + +/** + * Get available providers + * GET /providers + */ +app.get('/providers', (req, res) => { + res.json({ + providers: searchEngine.getProviderStatus(), + }); +}); + +const isMainModule = + process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]; + +let server; +if (isMainModule) { + console.log('Process PID:', process.pid); + server = app.listen(port, () => { + console.log(`Web Search API listening on http://localhost:${port}`); + console.log(''); + console.log('Available endpoints:'); + console.log(' GET /search?q= - Search all providers'); + console.log(' POST /search - Search with JSON body'); + console.log(' GET /search/:provider?q= - Search single provider'); + console.log(' GET /providers - List available providers'); + console.log(' GET /health - Health check'); + console.log(''); + console.log('Query parameters:'); + console.log(' q, query - Search query (required)'); + console.log(' providers - Comma-separated list of providers'); + console.log(' limit - Max results per provider (default: 10)'); + console.log(' strategy - Merge strategy: rrf, weighted, interleave'); + console.log(' weights - JSON object with provider weights'); + console.log(' language - Language code (e.g., en, de)'); + console.log(' region - Region code (e.g., us, de)'); + console.log(' safeSearch - Enable safe search (true/false)'); + console.log(''); + console.log('Press Ctrl+C to stop the server'); + }); + + function shutdown(signal) { + console.log(`Received shutdown signal (${signal}), closing server...`); + server.close(() => { + console.log('Server closed. Exiting process.'); + process.exit(0); + }); + setTimeout(() => { + console.error('Force exiting after 2s'); + process.exit(1); + }, 2000); + } + + process.on('SIGTERM', () => shutdown('SIGTERM')); + process.on('SIGINT', () => shutdown('SIGINT')); + + process.on('exit', (code) => { + console.log('Process exit event with code:', code); + }); + process.on('uncaughtException', (err) => { + console.error('Uncaught Exception:', err); + }); + process.on('unhandledRejection', (reason, promise) => { + console.error('Unhandled Rejection at:', promise, 'reason:', reason); + }); +} + +export { app, searchEngine }; diff --git a/tests/index.test.js b/tests/index.test.js index 15fbcb6..05287a0 100644 --- a/tests/index.test.js +++ b/tests/index.test.js @@ -1,35 +1,286 @@ /** - * Example test file using test-anywhere + * Unit tests for web-search library * Works with Node.js, Bun, and Deno */ import { describe, it, expect } from 'test-anywhere'; -import { add, multiply } from '../src/index.js'; +import { + mergeWithRRF, + mergeWithWeights, + mergeWithInterleave, + mergeResults, +} from '../src/merger.js'; -describe('add function', () => { - it('should add two positive numbers', () => { - expect(add(2, 3)).toBe(5); +describe('merger', () => { + const createResult = (url, title, source, rank) => ({ + title, + url, + snippet: `Snippet for ${title}`, + source, + rank, }); - it('should add negative numbers', () => { - expect(add(-1, -2)).toBe(-3); + describe('mergeWithRRF', () => { + it('should merge results from multiple providers', () => { + const resultsByProvider = { + google: [ + createResult('https://example.com/1', 'Result 1', 'google', 1), + createResult('https://example.com/2', 'Result 2', 'google', 2), + ], + bing: [ + createResult('https://example.com/2', 'Result 2', 'bing', 1), + createResult('https://example.com/3', 'Result 3', 'bing', 2), + ], + }; + + const merged = mergeWithRRF(resultsByProvider); + + expect(merged.length).toBe(3); + expect(merged[0].url.includes('example.com/2')).toBe(true); + expect(merged[0].sources !== undefined).toBe(true); + expect(merged[0].sources.length).toBe(2); + }); + + it('should handle empty results', () => { + const merged = mergeWithRRF({}); + expect(merged.length).toBe(0); + }); + + it('should apply provider weights', () => { + const resultsByProvider = { + google: [createResult('https://a.com', 'A', 'google', 1)], + bing: [createResult('https://b.com', 'B', 'bing', 1)], + }; + + const merged = mergeWithRRF(resultsByProvider, { + weights: { google: 2.0, bing: 0.5 }, + }); + + expect(merged[0].url).toBe('https://a.com'); + }); + + it('should deduplicate URLs by normalized form', () => { + const resultsByProvider = { + google: [createResult('https://example.com/path/', 'R1', 'google', 1)], + bing: [createResult('https://EXAMPLE.COM/path', 'R1', 'bing', 1)], + }; + + const merged = mergeWithRRF(resultsByProvider); + expect(merged.length).toBe(1); + }); }); - it('should add zero', () => { - expect(add(5, 0)).toBe(5); + describe('mergeWithWeights', () => { + it('should merge using weighted scoring', () => { + const resultsByProvider = { + google: [ + createResult('https://a.com', 'A', 'google', 1), + createResult('https://b.com', 'B', 'google', 2), + ], + bing: [createResult('https://b.com', 'B', 'bing', 1)], + }; + + const merged = mergeWithWeights(resultsByProvider); + + expect(merged.length).toBe(2); + expect(merged[0].score !== undefined).toBe(true); + }); + + it('should respect weight overrides', () => { + const resultsByProvider = { + google: [createResult('https://a.com', 'A', 'google', 1)], + bing: [createResult('https://b.com', 'B', 'bing', 1)], + }; + + const merged = mergeWithWeights(resultsByProvider, { + weights: { bing: 2.0, google: 0.1 }, + }); + + expect(merged[0].url).toBe('https://b.com'); + }); + }); + + describe('mergeWithInterleave', () => { + it('should interleave results round-robin style', () => { + const resultsByProvider = { + google: [ + createResult('https://g1.com', 'G1', 'google', 1), + createResult('https://g2.com', 'G2', 'google', 2), + ], + bing: [ + createResult('https://b1.com', 'B1', 'bing', 1), + createResult('https://b2.com', 'B2', 'bing', 2), + ], + }; + + const merged = mergeWithInterleave(resultsByProvider); + + expect(merged.length).toBe(4); + }); + + it('should remove duplicates when enabled', () => { + const resultsByProvider = { + google: [createResult('https://same.com', 'Same', 'google', 1)], + bing: [createResult('https://same.com', 'Same', 'bing', 1)], + }; + + const merged = mergeWithInterleave(resultsByProvider, { + removeDuplicates: true, + }); + + expect(merged.length).toBe(1); + }); + }); + + describe('mergeResults', () => { + it('should use RRF strategy by default', () => { + const resultsByProvider = { + google: [createResult('https://a.com', 'A', 'google', 1)], + }; + + const merged = mergeResults(resultsByProvider); + expect(merged.length).toBe(1); + }); + + it('should use weighted strategy when specified', () => { + const resultsByProvider = { + google: [createResult('https://a.com', 'A', 'google', 1)], + }; + + const merged = mergeResults(resultsByProvider, { strategy: 'weighted' }); + expect(merged[0].score !== undefined).toBe(true); + }); + + it('should use interleave strategy when specified', () => { + const resultsByProvider = { + google: [createResult('https://a.com', 'A', 'google', 1)], + bing: [createResult('https://b.com', 'B', 'bing', 1)], + }; + + const merged = mergeResults(resultsByProvider, { + strategy: 'interleave', + }); + expect(merged.length).toBe(2); + }); }); }); -describe('multiply function', () => { - it('should multiply two positive numbers', () => { - expect(multiply(2, 3)).toBe(6); +describe('providers', () => { + describe('BaseSearchProvider', () => { + it('should export BaseSearchProvider', async () => { + const { BaseSearchProvider } = await import('../src/providers/base.js'); + expect(BaseSearchProvider !== undefined).toBe(true); + }); }); - it('should multiply by zero', () => { - expect(multiply(5, 0)).toBe(0); + describe('GoogleProvider', () => { + it('should create GoogleProvider instance', async () => { + const { GoogleProvider } = await import('../src/providers/google.js'); + const provider = new GoogleProvider(); + + expect(provider.getName()).toBe('google'); + expect(provider.isAvailable()).toBe(true); + }); + + it('should report API credentials status', async () => { + const { GoogleProvider } = await import('../src/providers/google.js'); + + const withoutApi = new GoogleProvider(); + expect(withoutApi.hasApiCredentials()).toBe(false); + + const withApi = new GoogleProvider({ + apiKey: 'test-key', + searchEngineId: 'test-cx', + }); + expect(withApi.hasApiCredentials()).toBe(true); + }); + }); + + describe('DuckDuckGoProvider', () => { + it('should create DuckDuckGoProvider instance', async () => { + const { DuckDuckGoProvider } = + await import('../src/providers/duckduckgo.js'); + const provider = new DuckDuckGoProvider(); + + expect(provider.getName()).toBe('duckduckgo'); + expect(provider.isAvailable()).toBe(true); + }); + }); + + describe('BingProvider', () => { + it('should create BingProvider instance', async () => { + const { BingProvider } = await import('../src/providers/bing.js'); + const provider = new BingProvider(); + + expect(provider.getName()).toBe('bing'); + expect(provider.isAvailable()).toBe(true); + }); + + it('should report API credentials status', async () => { + const { BingProvider } = await import('../src/providers/bing.js'); + + const withoutApi = new BingProvider(); + expect(withoutApi.hasApiCredentials()).toBe(false); + + const withApi = new BingProvider({ apiKey: 'test-key' }); + expect(withApi.hasApiCredentials()).toBe(true); + }); + }); +}); + +describe('search engine', () => { + it('should create WebSearchEngine instance', async () => { + const { WebSearchEngine } = await import('../src/search.js'); + const engine = new WebSearchEngine(); + + expect(engine.getAvailableProviders().includes('google')).toBe(true); + expect(engine.getAvailableProviders().includes('duckduckgo')).toBe(true); + expect(engine.getAvailableProviders().includes('bing')).toBe(true); + }); + + it('should get provider status', async () => { + const { WebSearchEngine } = await import('../src/search.js'); + const engine = new WebSearchEngine(); + + const status = engine.getProviderStatus(); + + expect(status.google !== undefined).toBe(true); + expect(status.google.enabled).toBe(true); + expect(typeof status.google.weight).toBe('number'); + }); + + it('should allow setting provider weight', async () => { + const { WebSearchEngine } = await import('../src/search.js'); + const engine = new WebSearchEngine(); + + engine.setProviderWeight('google', 0.5); + const status = engine.getProviderStatus(); + + expect(status.google.weight).toBe(0.5); }); - it('should multiply negative numbers', () => { - expect(multiply(-2, 3)).toBe(-6); + it('should return empty array for empty query', async () => { + const { WebSearchEngine } = await import('../src/search.js'); + const engine = new WebSearchEngine(); + + const results = await engine.search(''); + expect(results.length).toBe(0); + }); +}); + +describe('exports', () => { + it('should export all main components', async () => { + const exports = await import('../src/index.js'); + + expect(exports.WebSearchEngine !== undefined).toBe(true); + expect(exports.createSearchEngine !== undefined).toBe(true); + expect(exports.BaseSearchProvider !== undefined).toBe(true); + expect(exports.GoogleProvider !== undefined).toBe(true); + expect(exports.DuckDuckGoProvider !== undefined).toBe(true); + expect(exports.BingProvider !== undefined).toBe(true); + expect(exports.mergeResults !== undefined).toBe(true); + expect(exports.mergeWithRRF !== undefined).toBe(true); + expect(exports.mergeWithWeights !== undefined).toBe(true); + expect(exports.mergeWithInterleave !== undefined).toBe(true); }); });