diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b23389b..1df25a0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,9 +4,14 @@ on: workflow_dispatch: inputs: version: - description: "Release version (e.g. 0.1.0)" + description: "Release version (e.g. 1.2.0 or 1.2.0-beta.1)" required: true type: string + prerelease: + description: "Mark as pre-release (beta)" + required: false + type: boolean + default: false permissions: contents: write @@ -62,7 +67,12 @@ jobs: APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: npx electron-builder --mac --arm64 --publish always + run: | + if [ "${{ github.event.inputs.prerelease }}" = "true" ]; then + npx electron-builder --mac --arm64 --publish always -c.publish.releaseType=prerelease + else + npx electron-builder --mac --arm64 --publish always + fi - name: Add release notes env: @@ -72,3 +82,9 @@ jobs: -f tag_name="v${{ github.event.inputs.version }}" \ --jq '.body') gh release edit "v${{ github.event.inputs.version }}" --notes "$NOTES" + + - name: Mark as pre-release + if: ${{ github.event.inputs.prerelease == 'true' }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh release edit "v${{ github.event.inputs.version }}" --prerelease diff --git a/CLAUDE.md b/CLAUDE.md index c8da0c8..f1239a7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -56,9 +56,18 @@ export const useStore = create((set, get) => ({ - **Framework**: Vitest with `globals: true` - **Database tests**: Create temp SQLite file in `os.tmpdir()`, clean up in `afterEach` - **Mock paths**: `vi.mock('../paths', () => ({ getDataDir: () => '' }))` -- **Test file location**: `src/main/__tests__/` +- **Test file location**: `src/main/__tests__/`, `src/renderer/__tests__/` - **Run**: `npm run test` (rebuilds native modules before/after) +### Test Requirements + +Follow **test-driven development (TDD)**: write a failing test first, then implement the feature to make it pass. Tests are mandatory for all code changes. Always run `npm run test` after making changes. + +- **Data flow defaults**: When a DB query can return `undefined`/`null` for a field that the UI consumes, test that the hydration layer provides a sensible default. The UI should never receive `undefined` for a field it conditionally renders on. +- **Event-driven UI state**: When backend events drive UI state changes (e.g., `INDEXING_PROGRESS` → badge states), test the full state machine: every event type must be covered, including transitions between states and the final/reset state. +- **Conditional rendering**: When UI elements render conditionally (e.g., `{value && }`), ensure tests verify that the condition is met for all expected cases — not just the happy path. A missing default value that makes a field `undefined` will silently hide UI elements. +- **Renderer logic tests**: Extract non-trivial derivation logic (state machines, computed props) into pure functions and test them in `src/renderer/__tests__/`. This avoids needing a DOM environment while still catching logic bugs. + ### Adding a New DB Domain 1. Create `src/main/db/.ts` with query/mutation functions diff --git a/electron-builder.yml b/electron-builder.yml index 360950e..d13a47b 100644 --- a/electron-builder.yml +++ b/electron-builder.yml @@ -10,6 +10,10 @@ asarUnpack: - "**/node_modules/better-sqlite3/**" - "**/node_modules/bindings/**" - "**/node_modules/file-uri-to-path/**" + - "**/node_modules/onnxruntime-node/**" + - "**/node_modules/onnxruntime-common/**" + - "**/node_modules/@huggingface/transformers/**" + - "**/dist/main/main/services/embedding-worker.js" icon: resources/icon.icns publish: provider: github diff --git a/package-lock.json b/package-lock.json index 0ef5326..22cd49e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,14 +1,15 @@ { "name": "papershelf", - "version": "1.2.1", + "version": "2.0.0-beta.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "papershelf", - "version": "1.2.1", + "version": "2.0.0-beta.4", "hasInstallScript": true, "dependencies": { + "@huggingface/transformers": "^3.8.1", "@modelcontextprotocol/sdk": "^1.26.0", "better-sqlite3": "^12.6.2", "electron-log": "^5.4.3", @@ -1009,6 +1010,16 @@ "node": ">= 10.0.0" } }, + "node_modules/@emnapi/runtime": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz", + "integrity": "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==", + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.3", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.3.tgz", @@ -1383,101 +1394,587 @@ "node": ">=18" } }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.27.3", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.3.tgz", - "integrity": "sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==", + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.3.tgz", + "integrity": "sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.3.tgz", + "integrity": "sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.3.tgz", + "integrity": "sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.3", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.3.tgz", + "integrity": "sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@hapi/hoek": { + "version": "9.3.0", + "resolved": "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz", + "integrity": "sha512-/c6rf4UJlmHlC9b5BaNvzAcFv7HZ2QHaV0D4/HNlBdvFnvQq8RI4kYdhyPCl7Xj+oWvTWQ8ujhqS53LIgAe6KQ==", + "dev": true, + "license": "BSD-3-Clause" + }, + "node_modules/@hapi/topo": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/@hapi/topo/-/topo-5.1.0.tgz", + "integrity": "sha512-foQZKJig7Ob0BMAYBfcJk8d77QtOe7Wo4ox7ff1lQYoNNAb6jwcY1ncdoy2e9wQZzvNy7ODZCYJkK8kzmcAnAg==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "@hapi/hoek": "^9.0.0" + } + }, + "node_modules/@hono/node-server": { + "version": "1.19.9", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz", + "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, + "node_modules/@huggingface/jinja": { + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.5.tgz", + "integrity": "sha512-xRlzazC+QZwr6z4ixEqYHo9fgwhTZ3xNSdljlKfUFGZSdlvt166DljRELFUfFytlYOYvo3vTisA/AFOuOAzFQQ==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@huggingface/transformers": { + "version": "3.8.1", + "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.8.1.tgz", + "integrity": "sha512-tsTk4zVjImqdqjS8/AOZg2yNLd1z9S5v+7oUPpXaasDRwEDhB+xnglK1k5cad26lL5/ZIaeREgWWy0bs9y9pPA==", + "license": "Apache-2.0", + "dependencies": { + "@huggingface/jinja": "^0.5.3", + "onnxruntime-node": "1.21.0", + "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4", + "sharp": "^0.34.1" + } + }, + "node_modules/@img/colour": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz", + "integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/@img/sharp-darwin-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz", + "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-darwin-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz", + "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-x64": "1.2.4" + } + }, + "node_modules/@img/sharp-libvips-darwin-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz", + "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-darwin-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz", + "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-arm": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz", + "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==", + "cpu": [ + "arm" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz", + "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-ppc64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz", + "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==", + "cpu": [ + "ppc64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-riscv64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz", + "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==", + "cpu": [ + "riscv64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-s390x": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz", + "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==", + "cpu": [ + "s390x" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz", + "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linuxmusl-arm64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz", + "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==", + "cpu": [ + "arm64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linuxmusl-x64": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz", + "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==", + "cpu": [ + "x64" + ], + "license": "LGPL-3.0-or-later", + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-linux-arm": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz", + "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==", + "cpu": [ + "arm" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz", + "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-ppc64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz", + "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==", + "cpu": [ + "ppc64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-ppc64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-riscv64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz", + "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==", + "cpu": [ + "riscv64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-riscv64": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-s390x": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz", + "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==", + "cpu": [ + "s390x" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-s390x": "1.2.4" + } + }, + "node_modules/@img/sharp-linux-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz", + "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-x64": "1.2.4" + } + }, + "node_modules/@img/sharp-linuxmusl-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz", + "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==", + "cpu": [ + "arm64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" + } + }, + "node_modules/@img/sharp-linuxmusl-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz", + "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==", + "cpu": [ + "x64" + ], + "license": "Apache-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-x64": "1.2.4" + } + }, + "node_modules/@img/sharp-wasm32": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz", + "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==", "cpu": [ - "x64" + "wasm32" ], - "dev": true, - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", "optional": true, - "os": [ - "sunos" - ], + "dependencies": { + "@emnapi/runtime": "^1.7.0" + }, "engines": { - "node": ">=18" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.27.3", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.3.tgz", - "integrity": "sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==", + "node_modules/@img/sharp-win32-arm64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz", + "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==", "cpu": [ "arm64" ], - "dev": true, - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=18" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.27.3", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.3.tgz", - "integrity": "sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==", + "node_modules/@img/sharp-win32-ia32": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz", + "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==", "cpu": [ "ia32" ], - "dev": true, - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=18" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@esbuild/win32-x64": { - "version": "0.27.3", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.3.tgz", - "integrity": "sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==", + "node_modules/@img/sharp-win32-x64": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz", + "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==", "cpu": [ "x64" ], - "dev": true, - "license": "MIT", + "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ "win32" ], "engines": { - "node": ">=18" - } - }, - "node_modules/@hapi/hoek": { - "version": "9.3.0", - "resolved": "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz", - "integrity": "sha512-/c6rf4UJlmHlC9b5BaNvzAcFv7HZ2QHaV0D4/HNlBdvFnvQq8RI4kYdhyPCl7Xj+oWvTWQ8ujhqS53LIgAe6KQ==", - "dev": true, - "license": "BSD-3-Clause" - }, - "node_modules/@hapi/topo": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/@hapi/topo/-/topo-5.1.0.tgz", - "integrity": "sha512-foQZKJig7Ob0BMAYBfcJk8d77QtOe7Wo4ox7ff1lQYoNNAb6jwcY1ncdoy2e9wQZzvNy7ODZCYJkK8kzmcAnAg==", - "dev": true, - "license": "BSD-3-Clause", - "dependencies": { - "@hapi/hoek": "^9.0.0" - } - }, - "node_modules/@hono/node-server": { - "version": "1.19.9", - "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz", - "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==", - "license": "MIT", - "engines": { - "node": ">=18.14.1" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" }, - "peerDependencies": { - "hono": "^4" + "funding": { + "url": "https://opencollective.com/libvips" } }, "node_modules/@isaacs/cliui": { @@ -1587,7 +2084,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", - "dev": true, "license": "ISC", "dependencies": { "minipass": "^7.0.4" @@ -2049,6 +2545,70 @@ "node": ">=18" } }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", + "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", + "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", + "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", + "license": "BSD-3-Clause" + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-rc.3", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.3.tgz", @@ -2686,6 +3246,66 @@ "node": ">=14.0.0" } }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": { + "version": "1.7.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.1.0", + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": { + "version": "1.7.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": { + "version": "1.1.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": { + "version": "1.1.0", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "^1.7.1", + "@emnapi/runtime": "^1.7.1", + "@tybys/wasm-util": "^0.10.1" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "dev": true, + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": { + "version": "2.8.1", + "dev": true, + "inBundle": true, + "license": "0BSD", + "optional": true + }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { "version": "4.1.18", "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.18.tgz", @@ -2875,7 +3495,6 @@ "version": "25.2.1", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.1.tgz", "integrity": "sha512-CPrnr8voK8vC6eEtyRzvMpgp3VyVRhgclonE7qYi6P9sXwYb59ucfrnmFBTaP0yUi8Gk4yZg/LlTJULGxvTNsg==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~7.16.0" @@ -3655,9 +4274,7 @@ "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==", "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", - "dev": true, - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/brace-expansion": { "version": "2.0.2", @@ -4410,9 +5027,7 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -4429,9 +5044,7 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz", "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", @@ -4476,9 +5089,7 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz", "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==", - "dev": true, - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/dir-compare": { "version": "4.2.0", @@ -5097,9 +5708,7 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz", "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==", - "dev": true, - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/esbuild": { "version": "0.27.3", @@ -5163,9 +5772,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", - "dev": true, "license": "MIT", - "optional": true, "engines": { "node": ">=10" }, @@ -5491,6 +6098,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/flatbuffers": { + "version": "25.9.23", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz", + "integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==", + "license": "Apache-2.0" + }, "node_modules/follow-redirects": { "version": "1.15.11", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", @@ -5734,9 +6347,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz", "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==", - "dev": true, "license": "BSD-3-Clause", - "optional": true, "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", @@ -5753,9 +6364,7 @@ "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", - "dev": true, "license": "ISC", - "optional": true, "bin": { "semver": "bin/semver.js" }, @@ -5767,9 +6376,7 @@ "version": "1.0.4", "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" @@ -5825,6 +6432,12 @@ "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", "license": "ISC" }, + "node_modules/guid-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", + "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", + "license": "ISC" + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -5839,9 +6452,7 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "es-define-property": "^1.0.0" }, @@ -6300,9 +6911,7 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", - "dev": true, - "license": "ISC", - "optional": true + "license": "ISC" }, "node_modules/json5": { "version": "2.2.3", @@ -6641,6 +7250,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/long": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz", + "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==", + "license": "Apache-2.0" + }, "node_modules/lowercase-keys": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", @@ -6698,9 +7313,7 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz", "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "escape-string-regexp": "^4.0.0" }, @@ -6823,7 +7436,6 @@ "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, "license": "ISC", "engines": { "node": ">=16 || 14 >=14.17" @@ -6963,7 +7575,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", - "dev": true, "license": "MIT", "dependencies": { "minipass": "^7.1.2" @@ -7212,9 +7823,7 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", - "dev": true, "license": "MIT", - "optional": true, "engines": { "node": ">= 0.4" } @@ -7267,6 +7876,49 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/onnxruntime-common": { + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.21.0.tgz", + "integrity": "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==", + "license": "MIT" + }, + "node_modules/onnxruntime-node": { + "version": "1.21.0", + "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.21.0.tgz", + "integrity": "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==", + "hasInstallScript": true, + "license": "MIT", + "os": [ + "win32", + "darwin", + "linux" + ], + "dependencies": { + "global-agent": "^3.0.0", + "onnxruntime-common": "1.21.0", + "tar": "^7.0.1" + } + }, + "node_modules/onnxruntime-web": { + "version": "1.22.0-dev.20250409-89f8206ba4", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.22.0-dev.20250409-89f8206ba4.tgz", + "integrity": "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==", + "license": "MIT", + "dependencies": { + "flatbuffers": "^25.1.24", + "guid-typescript": "^1.0.9", + "long": "^5.2.3", + "onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4", + "platform": "^1.3.6", + "protobufjs": "^7.2.4" + } + }, + "node_modules/onnxruntime-web/node_modules/onnxruntime-common": { + "version": "1.22.0-dev.20250409-89f8206ba4", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.22.0-dev.20250409-89f8206ba4.tgz", + "integrity": "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==", + "license": "MIT" + }, "node_modules/ora": { "version": "5.4.1", "resolved": "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz", @@ -7536,6 +8188,12 @@ "node": ">=16.20.0" } }, + "node_modules/platform": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", + "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", + "license": "MIT" + }, "node_modules/playwright": { "version": "1.58.2", "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", @@ -7736,6 +8394,30 @@ "dev": true, "license": "ISC" }, + "node_modules/protobufjs": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz", + "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/node": ">=13.7.0", + "long": "^5.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/proxy-addr": { "version": "2.0.7", "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", @@ -8075,9 +8757,7 @@ "version": "2.15.4", "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz", "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==", - "dev": true, "license": "BSD-3-Clause", - "optional": true, "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", @@ -8227,9 +8907,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz", "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==", - "dev": true, - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/send": { "version": "1.2.1", @@ -8286,9 +8964,7 @@ "version": "7.0.1", "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz", "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==", - "dev": true, "license": "MIT", - "optional": true, "dependencies": { "type-fest": "^0.13.1" }, @@ -8324,6 +9000,62 @@ "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", "license": "ISC" }, + "node_modules/sharp": { + "version": "0.34.5", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", + "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "@img/colour": "^1.0.0", + "detect-libc": "^2.1.2", + "semver": "^7.7.3" + }, + "engines": { + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-darwin-arm64": "0.34.5", + "@img/sharp-darwin-x64": "0.34.5", + "@img/sharp-libvips-darwin-arm64": "1.2.4", + "@img/sharp-libvips-darwin-x64": "1.2.4", + "@img/sharp-libvips-linux-arm": "1.2.4", + "@img/sharp-libvips-linux-arm64": "1.2.4", + "@img/sharp-libvips-linux-ppc64": "1.2.4", + "@img/sharp-libvips-linux-riscv64": "1.2.4", + "@img/sharp-libvips-linux-s390x": "1.2.4", + "@img/sharp-libvips-linux-x64": "1.2.4", + "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", + "@img/sharp-libvips-linuxmusl-x64": "1.2.4", + "@img/sharp-linux-arm": "0.34.5", + "@img/sharp-linux-arm64": "0.34.5", + "@img/sharp-linux-ppc64": "0.34.5", + "@img/sharp-linux-riscv64": "0.34.5", + "@img/sharp-linux-s390x": "0.34.5", + "@img/sharp-linux-x64": "0.34.5", + "@img/sharp-linuxmusl-arm64": "0.34.5", + "@img/sharp-linuxmusl-x64": "0.34.5", + "@img/sharp-wasm32": "0.34.5", + "@img/sharp-win32-arm64": "0.34.5", + "@img/sharp-win32-ia32": "0.34.5", + "@img/sharp-win32-x64": "0.34.5" + } + }, + "node_modules/sharp/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -8619,9 +9351,7 @@ "version": "1.1.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==", - "dev": true, - "license": "BSD-3-Clause", - "optional": true + "license": "BSD-3-Clause" }, "node_modules/ssri": { "version": "12.0.0", @@ -8811,7 +9541,6 @@ "version": "7.5.7", "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.7.tgz", "integrity": "sha512-fov56fJiRuThVFXD6o6/Q354S7pnWMJIVlDBYijsTNx6jKSE4pvrDTs6lUnmGvNyfJwFQQwWy3owKz1ucIhveQ==", - "dev": true, "license": "BlueOak-1.0.0", "dependencies": { "@isaacs/fs-minipass": "^4.0.0", @@ -8856,7 +9585,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=18" @@ -8866,7 +9594,6 @@ "version": "5.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=18" @@ -9059,7 +9786,7 @@ "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, + "devOptional": true, "license": "0BSD" }, "node_modules/tunnel-agent": { @@ -9087,9 +9814,7 @@ "version": "0.13.1", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==", - "dev": true, "license": "(MIT OR CC0-1.0)", - "optional": true, "engines": { "node": ">=10" }, @@ -9154,7 +9879,6 @@ "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", - "dev": true, "license": "MIT" }, "node_modules/unique-filename": { diff --git a/package.json b/package.json index 6c04e1a..c03e664 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "papershelf", - "version": "1.2.1", + "version": "2.0.0-beta.4", "productName": "PaperShelf", "description": "A native-feeling Mac desktop app for searching arXiv, organizing research papers, annotating PDFs, and chatting with your paper library using RAG.", "author": "Daniel Klevebring", @@ -13,9 +13,9 @@ "build:main": "tsc -p tsconfig.main.json", "build:renderer": "vite build", "build": "npm run build:main && npm run build:renderer", - "postinstall": "npx @electron/rebuild --force", + "postinstall": "npx @electron/rebuild --force --only better-sqlite3", "rebuild:node": "npm rebuild better-sqlite3", - "rebuild:electron": "npx @electron/rebuild --force", + "rebuild:electron": "npx @electron/rebuild --force --only better-sqlite3", "test": "npm run rebuild:node && (vitest run; TEST_EXIT=$?; npm run rebuild:electron; exit $TEST_EXIT)", "test:watch": "npm run rebuild:node && vitest", "test:e2e": "npm run rebuild:electron && npm run build && npx playwright test", @@ -28,6 +28,7 @@ "prepare": "husky" }, "dependencies": { + "@huggingface/transformers": "^3.8.1", "@modelcontextprotocol/sdk": "^1.26.0", "better-sqlite3": "^12.6.2", "electron-log": "^5.4.3", diff --git a/src/main/__tests__/database.test.ts b/src/main/__tests__/database.test.ts index 3841c57..6aa3a87 100644 --- a/src/main/__tests__/database.test.ts +++ b/src/main/__tests__/database.test.ts @@ -212,6 +212,10 @@ describe('getPapers', () => { const papers = getPapers({ view: 'all-papers' }); expect(papers).toHaveLength(2); + // Papers without embedding_status rows should default to 'pending' + for (const paper of papers) { + expect(paper.embeddingStatus).toBe('pending'); + } }); it('returns only favorites', () => { diff --git a/src/main/__tests__/embedding-worker.test.ts b/src/main/__tests__/embedding-worker.test.ts new file mode 100644 index 0000000..de20d52 --- /dev/null +++ b/src/main/__tests__/embedding-worker.test.ts @@ -0,0 +1,109 @@ +import { fork } from 'child_process'; +import fs from 'fs'; +import path from 'path'; +import { afterEach, describe, expect, it } from 'vitest'; + +const WORKER_SCRIPT = path.join(__dirname, '../../..', 'dist/main/main/services/embedding-worker.js'); +const workerExists = fs.existsSync(WORKER_SCRIPT); + +// These tests fork the compiled worker and require native modules (onnxruntime-node). +// They are skipped in CI where the worker isn't built and native deps aren't available. +describe.skipIf(!workerExists)('embedding-worker child process', () => { + const children: ReturnType[] = []; + + afterEach(() => { + for (const child of children) { + if (child.connected) { + child.kill(); + } + } + children.length = 0; + }); + + it('starts without crashing (sharp stub works)', async () => { + const child = fork(WORKER_SCRIPT, [], { + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + }); + children.push(child); + + const result = await new Promise<{ started: boolean; error?: string }>((resolve) => { + const timeout = setTimeout(() => { + resolve({ started: true }); + }, 3000); + + child.on('error', (err) => { + clearTimeout(timeout); + resolve({ started: false, error: err.message }); + }); + + child.on('exit', (code) => { + clearTimeout(timeout); + if (code !== 0 && code !== null) { + resolve({ started: false, error: `Worker exited with code ${code}` }); + } + }); + }); + + expect(result.started).toBe(true); + + child.send({ type: 'shutdown' }); + }); + + it('responds to shutdown message', async () => { + const child = fork(WORKER_SCRIPT, [], { + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + }); + children.push(child); + + // Wait for process to be ready + await new Promise((resolve) => setTimeout(resolve, 500)); + + const exitCode = await new Promise((resolve) => { + child.on('exit', (code) => resolve(code)); + child.send({ type: 'shutdown' }); + }); + + expect(exitCode).toBe(0); + }); + + it('sends error for unknown message types', async () => { + const child = fork(WORKER_SCRIPT, [], { + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + }); + children.push(child); + + // Wait for process to be ready + await new Promise((resolve) => setTimeout(resolve, 500)); + + const response = await new Promise<{ type: string; error?: string }>((resolve) => { + child.on('message', (msg) => resolve(msg as { type: string; error?: string })); + child.send({ type: 'invalid-type', id: 'test-1' }); + }); + + expect(response.type).toBe('error'); + expect(response.error).toContain('Unknown message type'); + + child.send({ type: 'shutdown' }); + }); + + it('collects stderr without sharp import errors', async () => { + const child = fork(WORKER_SCRIPT, [], { + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + }); + children.push(child); + + const stderrChunks: string[] = []; + child.stderr?.on('data', (data: Buffer) => { + stderrChunks.push(data.toString()); + }); + + // Wait for process to start and any immediate errors + await new Promise((resolve) => setTimeout(resolve, 1000)); + + const stderr = stderrChunks.join(''); + expect(stderr).not.toContain('Cannot find package'); + expect(stderr).not.toContain('sharp'); + + child.send({ type: 'shutdown' }); + }); +}); diff --git a/src/main/__tests__/indexing-service.test.ts b/src/main/__tests__/indexing-service.test.ts new file mode 100644 index 0000000..6d73325 --- /dev/null +++ b/src/main/__tests__/indexing-service.test.ts @@ -0,0 +1,290 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../paths', () => ({ + getDataDir: () => '', +})); + +vi.mock('../services/embedding-service', () => ({ + embedDocumentTexts: vi.fn(), + ensureModelLoaded: vi.fn().mockResolvedValue(undefined), +})); + +import { closeDatabase, initDatabase } from '../database'; +import { getDb } from '../db/connection'; +import { insertPaper } from '../db/papers'; +import { getEmbeddingStatusForPapers, getPapersNeedingEmbedding, setEmbeddingStatus } from '../db/vector-store'; +import { DataChangeEvent, eventEmitter } from '../event-emitter'; +import { embedDocumentTexts } from '../services/embedding-service'; +import { indexAllPapers, indexPaper } from '../services/indexing-service'; + +let dbPath: string; + +function makePaper(overrides: Partial[0]> = {}) { + return { + arxivId: null as string | null, + title: 'Test Paper', + authors: ['Author One'], + abstract: 'Test abstract about machine learning.', + publishedDate: '2024-01-01T00:00:00Z', + updatedDate: '2024-01-01T00:00:00Z', + categories: ['cs.AI'], + arxivUrl: '', + pdfUrl: '', + pdfPath: null, + fullText: null, + ...overrides, + }; +} + +function makeMockEmbedding(seed: number = 1): Float32Array { + const embedding = new Float32Array(256); + for (let i = 0; i < 256; i++) { + embedding[i] = Math.sin(seed * (i + 1)); + } + let norm = 0; + for (const v of embedding) norm += v * v; + norm = Math.sqrt(norm); + for (let i = 0; i < 256; i++) embedding[i] /= norm; + return embedding; +} + +beforeEach(() => { + dbPath = path.join(os.tmpdir(), `papershelf-idx-test-${Date.now()}.db`); + initDatabase(dbPath); + vi.clearAllMocks(); +}); + +afterEach(() => { + closeDatabase(); + eventEmitter.removeAllListeners(); + try { + fs.unlinkSync(dbPath); + fs.unlinkSync(`${dbPath}-wal`); + fs.unlinkSync(`${dbPath}-shm`); + } catch { + // cleanup best-effort + } +}); + +describe('indexPaper', () => { + it('indexes a paper and sets status to complete', async () => { + const paper = insertPaper(makePaper({ arxivId: '1', title: 'ML Paper', abstract: 'About ML.' })); + const db = getDb(); + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockResolvedValue([makeMockEmbedding(1)]); + + await indexPaper(paper.id); + + const statusMap = getEmbeddingStatusForPapers(db, [paper.id]); + expect(statusMap.get(paper.id)).toBe('complete'); + + const chunks = db.prepare('SELECT COUNT(*) as count FROM paper_chunks WHERE paper_id = ?').get(paper.id) as { + count: number; + }; + expect(chunks.count).toBeGreaterThan(0); + }); + + it('sets status to failed when embedding throws', async () => { + const paper = insertPaper(makePaper({ arxivId: '2' })); + const db = getDb(); + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockRejectedValue(new Error('Worker crashed')); + + await expect(indexPaper(paper.id)).rejects.toThrow('Worker crashed'); + + const statusMap = getEmbeddingStatusForPapers(db, [paper.id]); + expect(statusMap.get(paper.id)).toBe('failed'); + }); + + it('clears old chunks on re-index', async () => { + const paper = insertPaper(makePaper({ arxivId: '3', title: 'Reindex Me', abstract: 'Abstract.' })); + const db = getDb(); + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockResolvedValue([makeMockEmbedding(1)]); + + await indexPaper(paper.id); + const firstCount = ( + db.prepare('SELECT COUNT(*) as count FROM paper_chunks WHERE paper_id = ?').get(paper.id) as { count: number } + ).count; + + // Re-index — old chunks should be deleted first + mockEmbed.mockResolvedValue([makeMockEmbedding(2)]); + await indexPaper(paper.id); + + const secondCount = ( + db.prepare('SELECT COUNT(*) as count FROM paper_chunks WHERE paper_id = ?').get(paper.id) as { count: number } + ).count; + expect(secondCount).toBe(firstCount); + }); + + it('skips paper that does not exist', async () => { + const mockEmbed = vi.mocked(embedDocumentTexts); + await indexPaper('nonexistent-id'); + expect(mockEmbed).not.toHaveBeenCalled(); + }); +}); + +describe('indexAllPapers', () => { + it('emits indexing → indexed → complete events for each paper', async () => { + insertPaper(makePaper({ arxivId: '10', title: 'Paper A', abstract: 'About A.' })); + insertPaper(makePaper({ arxivId: '11', title: 'Paper B', abstract: 'About B.' })); + + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockResolvedValue([makeMockEmbedding(1)]); + + const events: { status: string; paperId: string; current: number; total: number }[] = []; + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (event) => { + events.push({ + status: event.status, + paperId: event.paperId, + current: event.current, + total: event.total, + }); + }); + + await indexAllPapers(); + + // Should have: indexing(A), indexed(A), indexing(B), indexed(B), complete + expect(events).toHaveLength(5); + expect(events[0].status).toBe('indexing'); + expect(events[0].current).toBe(1); + expect(events[0].total).toBe(2); + + expect(events[1].status).toBe('indexed'); + expect(events[1].current).toBe(1); + + expect(events[2].status).toBe('indexing'); + expect(events[2].current).toBe(2); + + expect(events[3].status).toBe('indexed'); + expect(events[3].current).toBe(2); + + expect(events[4].status).toBe('complete'); + expect(events[4].current).toBe(2); + expect(events[4].total).toBe(2); + }); + + it('emits error event for failed papers but continues to next paper', async () => { + const paperA = insertPaper(makePaper({ arxivId: '20', title: 'Paper A', abstract: 'About A.' })); + insertPaper(makePaper({ arxivId: '21', title: 'Paper B', abstract: 'About B.' })); + + const mockEmbed = vi.mocked(embedDocumentTexts); + // Fail consistently for paper A (odd calls), succeed for paper B (even calls) + let callCount = 0; + mockEmbed.mockImplementation(async () => { + callCount++; + if (callCount % 2 === 1) { + throw new Error('Embedding failed'); + } + return [makeMockEmbedding(1)]; + }); + + const events: { status: string; paperId: string; error?: string }[] = []; + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (event) => { + events.push({ status: event.status, paperId: event.paperId, error: event.error }); + }); + + await indexAllPapers(); + + // First round: Paper A fails, Paper B succeeds + const errorEvents = events.filter((e) => e.status === 'error'); + expect(errorEvents.length).toBeGreaterThanOrEqual(1); + expect(errorEvents[0].paperId).toBe(paperA.id); + expect(errorEvents[0].error).toBe('Embedding failed'); + + const indexedEvents = events.filter((e) => e.status === 'indexed'); + expect(indexedEvents.length).toBeGreaterThanOrEqual(1); + + const completeEvents = events.filter((e) => e.status === 'complete'); + expect(completeEvents.length).toBeGreaterThanOrEqual(1); + }); + + it('skips papers that are already indexed', async () => { + const paper1 = insertPaper(makePaper({ arxivId: '30', title: 'Already Done' })); + insertPaper(makePaper({ arxivId: '31', title: 'Needs Index', abstract: 'About something.' })); + const db = getDb(); + + setEmbeddingStatus(db, paper1.id, 'complete', undefined, 1); + + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockResolvedValue([makeMockEmbedding(1)]); + + const events: { status: string }[] = []; + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (event) => { + events.push({ status: event.status }); + }); + + await indexAllPapers(); + + // Only 1 paper needs indexing, so: indexing, indexed, complete + expect(events).toHaveLength(3); + expect(mockEmbed).toHaveBeenCalledTimes(1); + }); + + it('is a no-op when no papers need indexing', async () => { + const paper = insertPaper(makePaper({ arxivId: '40' })); + const db = getDb(); + setEmbeddingStatus(db, paper.id, 'complete', undefined, 1); + + const mockEmbed = vi.mocked(embedDocumentTexts); + + const events: { status: string }[] = []; + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (event) => { + events.push({ status: event.status }); + }); + + await indexAllPapers(); + + expect(events).toHaveLength(0); + expect(mockEmbed).not.toHaveBeenCalled(); + }); + + it('follow-up does not retry failed papers (only picks up pending)', async () => { + const paper = insertPaper(makePaper({ arxivId: '45', title: 'Will Fail', abstract: 'About failure.' })); + const db = getDb(); + + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockRejectedValue(new Error('Sharp not found')); + + const events: { status: string; paperId: string }[] = []; + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (event) => { + events.push({ status: event.status, paperId: event.paperId }); + }); + + await indexAllPapers(); + + // Paper should have failed + const statusMap = getEmbeddingStatusForPapers(db, [paper.id]); + expect(statusMap.get(paper.id)).toBe('failed'); + + // The follow-up should NOT have re-triggered (paper is failed, not pending) + // We expect exactly: indexing, error, complete — no second round + const indexingEvents = events.filter((e) => e.status === 'indexing'); + expect(indexingEvents).toHaveLength(1); + + const completeEvents = events.filter((e) => e.status === 'complete'); + expect(completeEvents).toHaveLength(1); + }); + + it('re-indexes failed papers on subsequent call', async () => { + const paper = insertPaper(makePaper({ arxivId: '50', title: 'Retry Me', abstract: 'About retry.' })); + const db = getDb(); + + setEmbeddingStatus(db, paper.id, 'failed', 'Previous error'); + + const needsEmbedding = getPapersNeedingEmbedding(db); + expect(needsEmbedding).toHaveLength(1); + expect(needsEmbedding[0].id).toBe(paper.id); + + const mockEmbed = vi.mocked(embedDocumentTexts); + mockEmbed.mockResolvedValue([makeMockEmbedding(1)]); + + await indexAllPapers(); + + const statusMap = getEmbeddingStatusForPapers(db, [paper.id]); + expect(statusMap.get(paper.id)).toBe('complete'); + }); +}); diff --git a/src/main/__tests__/vector-store.test.ts b/src/main/__tests__/vector-store.test.ts new file mode 100644 index 0000000..1599040 --- /dev/null +++ b/src/main/__tests__/vector-store.test.ts @@ -0,0 +1,249 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../paths', () => ({ + getDataDir: () => '', +})); + +import { closeDatabase, initDatabase } from '../database'; +import { getDb } from '../db/connection'; +import { insertPaper } from '../db/papers'; +import { + deleteChunksForPaper, + getEmbeddingStatusForPapers, + getIndexingStats, + getPapersNeedingEmbedding, + insertChunkWithEmbedding, + setEmbeddingStatus, + vectorSearch, +} from '../db/vector-store'; + +let dbPath: string; + +function makePaper(overrides: Partial[0]> = {}) { + return { + arxivId: '2401.00001' as string | null, + title: 'Test Paper', + authors: ['Author One'], + abstract: 'Test abstract', + publishedDate: '2024-01-01T00:00:00Z', + updatedDate: '2024-01-01T00:00:00Z', + categories: ['cs.AI'], + arxivUrl: 'https://arxiv.org/abs/2401.00001', + pdfUrl: 'https://arxiv.org/pdf/2401.00001', + pdfPath: null, + fullText: null, + ...overrides, + }; +} + +function makeMockEmbedding(seed: number = 1): Float32Array { + const embedding = new Float32Array(256); + for (let i = 0; i < 256; i++) { + embedding[i] = Math.sin(seed * (i + 1)); + } + // L2 normalize + let norm = 0; + for (const v of embedding) norm += v * v; + norm = Math.sqrt(norm); + for (let i = 0; i < 256; i++) embedding[i] /= norm; + return embedding; +} + +beforeEach(() => { + dbPath = path.join(os.tmpdir(), `papershelf-vec-test-${Date.now()}.db`); + initDatabase(dbPath); +}); + +afterEach(() => { + closeDatabase(); + try { + fs.unlinkSync(dbPath); + fs.unlinkSync(`${dbPath}-wal`); + fs.unlinkSync(`${dbPath}-shm`); + } catch { + // cleanup best-effort + } +}); + +describe('vector store', () => { + it('inserts and searches chunks', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + const embedding = makeMockEmbedding(1); + + insertChunkWithEmbedding( + db, + { + paperId: paper.id, + chunkType: 'title_abstract', + chunkIndex: 0, + chunkText: 'Test paper about AI', + tokenCount: 5, + }, + embedding, + ); + + const results = vectorSearch(db, embedding, 10); + expect(results).toHaveLength(1); + expect(results[0].paperId).toBe(paper.id); + expect(results[0].chunkType).toBe('title_abstract'); + expect(results[0].distance).toBeCloseTo(0, 1); + }); + + it('deletes chunks for a paper', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + + insertChunkWithEmbedding( + db, + { + paperId: paper.id, + chunkType: 'title_abstract', + chunkIndex: 0, + chunkText: 'Test', + tokenCount: 1, + }, + makeMockEmbedding(1), + ); + + insertChunkWithEmbedding( + db, + { + paperId: paper.id, + chunkType: 'body', + chunkIndex: 1, + chunkText: 'Body text', + tokenCount: 2, + }, + makeMockEmbedding(2), + ); + + deleteChunksForPaper(db, paper.id); + + const results = vectorSearch(db, makeMockEmbedding(1), 10); + expect(results).toHaveLength(0); + }); + + it('tracks embedding status', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + + setEmbeddingStatus(db, paper.id, 'indexing'); + let stats = getIndexingStats(db); + expect(stats.totalPapers).toBe(1); + expect(stats.indexed).toBe(0); + + setEmbeddingStatus(db, paper.id, 'complete', undefined, 3); + stats = getIndexingStats(db); + expect(stats.indexed).toBe(1); + expect(stats.pending).toBe(0); + }); + + it('tracks failed status', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + + setEmbeddingStatus(db, paper.id, 'failed', 'Model load error'); + const stats = getIndexingStats(db); + expect(stats.failed).toBe(1); + }); + + it('finds papers needing embedding', () => { + const paper1 = insertPaper(makePaper({ arxivId: '1' })); + const paper2 = insertPaper(makePaper({ arxivId: '2' })); + const db = getDb(); + + setEmbeddingStatus(db, paper1.id, 'complete', undefined, 1); + // paper2 has no status — should need embedding + + const needsEmbedding = getPapersNeedingEmbedding(db); + expect(needsEmbedding).toHaveLength(1); + expect(needsEmbedding[0].id).toBe(paper2.id); + }); + + it('finds failed papers for re-indexing', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + + setEmbeddingStatus(db, paper.id, 'failed', 'Error'); + + const needsEmbedding = getPapersNeedingEmbedding(db); + expect(needsEmbedding).toHaveLength(1); + expect(needsEmbedding[0].id).toBe(paper.id); + }); + + it('returns correct indexing stats', () => { + const db = getDb(); + insertPaper(makePaper({ arxivId: '1' })); + insertPaper(makePaper({ arxivId: '2' })); + insertPaper(makePaper({ arxivId: '3' })); + + const paper1 = insertPaper(makePaper({ arxivId: '4' })); + setEmbeddingStatus(db, paper1.id, 'complete', undefined, 2); + + const stats = getIndexingStats(db); + expect(stats.totalPapers).toBe(4); + expect(stats.indexed).toBe(1); + expect(stats.pending).toBe(3); + expect(stats.failed).toBe(0); + }); + + it('returns embedding status for multiple papers', () => { + const db = getDb(); + const paper1 = insertPaper(makePaper({ arxivId: '1' })); + const paper2 = insertPaper(makePaper({ arxivId: '2' })); + const paper3 = insertPaper(makePaper({ arxivId: '3' })); + + setEmbeddingStatus(db, paper1.id, 'complete', undefined, 3); + setEmbeddingStatus(db, paper2.id, 'failed', 'Some error'); + // paper3 has no status row + + const statusMap = getEmbeddingStatusForPapers(db, [paper1.id, paper2.id, paper3.id]); + expect(statusMap.get(paper1.id)).toBe('complete'); + expect(statusMap.get(paper2.id)).toBe('failed'); + expect(statusMap.has(paper3.id)).toBe(false); + }); + + it('returns empty map for empty paper ids', () => { + const db = getDb(); + const statusMap = getEmbeddingStatusForPapers(db, []); + expect(statusMap.size).toBe(0); + }); + + it('does not include papers with no embedding_status row', () => { + const db = getDb(); + const paper = insertPaper(makePaper()); + const statusMap = getEmbeddingStatusForPapers(db, [paper.id]); + expect(statusMap.has(paper.id)).toBe(false); + }); + + it('cascade deletes chunks when paper is deleted', () => { + const paper = insertPaper(makePaper()); + const db = getDb(); + + insertChunkWithEmbedding( + db, + { + paperId: paper.id, + chunkType: 'title_abstract', + chunkIndex: 0, + chunkText: 'Test', + tokenCount: 1, + }, + makeMockEmbedding(1), + ); + setEmbeddingStatus(db, paper.id, 'complete', undefined, 1); + + // Delete the paper — paper_chunks should cascade delete + db.prepare('DELETE FROM papers WHERE id = ?').run(paper.id); + + const chunkRows = db.prepare('SELECT COUNT(*) as count FROM paper_chunks').get() as { count: number }; + expect(chunkRows.count).toBe(0); + + const statusRows = db.prepare('SELECT COUNT(*) as count FROM embedding_status').get() as { count: number }; + expect(statusRows.count).toBe(0); + }); +}); diff --git a/src/main/database.ts b/src/main/database.ts index 789ffbd..74fdd52 100644 --- a/src/main/database.ts +++ b/src/main/database.ts @@ -1,6 +1,13 @@ // Re-export from split db modules for backward compatibility -export type { LibraryStats } from './db/index'; +export type { + ChunkInsertData, + EmbeddingStatusValue, + IndexingStats, + LibraryStats, + SemanticSearchResult, + VectorSearchResult, +} from './db/index'; export { addPaperToCollection, addTagToPaper, @@ -8,6 +15,7 @@ export { closeDatabase, createCollection, createTag, + deleteChunksForPaper, deleteCollection, deletePaper, deleteTag, @@ -16,27 +24,34 @@ export { getCollections, getCollectionsForPaper, getCollectionsForPapers, + getIndexingStats, + getIndexingStatsFromDb, getLibraryStats, getPaperByArxivId, getPaperById, getPapers, + getPapersNeedingEmbedding, getTagByName, getTags, getTagsForPaper, getTagsForPapers, getToolStats, getViewerState, + hybridSearch, initDatabase, + insertChunkWithEmbedding, insertPaper, logToolCall, removePaperFromCollection, removeTagFromPaper, saveViewerState, searchLibrary, + setEmbeddingStatus, toggleFavorite, updateCollection, updatePaperMetadata, updatePaperPdf, updatePaperPdfPath, updateTag, + vectorSearch, } from './db/index'; diff --git a/src/main/db/connection.ts b/src/main/db/connection.ts index 1dcbb4a..3825865 100644 --- a/src/main/db/connection.ts +++ b/src/main/db/connection.ts @@ -4,6 +4,7 @@ import path from 'path'; import type { Collection, LibraryPaper, PaperSource, Tag } from '../../shared/types'; import { getDataDir } from '../paths'; import { runMigrations } from './migrations'; +import { createVectorSchema } from './vector-store'; let db: Database.Database; @@ -19,6 +20,13 @@ export function initDatabase(customPath?: string): void { createSchema(); runMigrations(db); createIndexes(); + + // Initialize vector store tables + try { + createVectorSchema(db); + } catch (err) { + console.warn('Vector store initialization failed:', err instanceof Error ? err.message : err); + } } export function closeDatabase(): void { @@ -171,7 +179,12 @@ export interface PaperRow { created_at: string; } -export function rowToLibraryPaper(row: PaperRow, collections: Collection[] = [], tags: Tag[] = []): LibraryPaper { +export function rowToLibraryPaper( + row: PaperRow, + collections: Collection[] = [], + tags: Tag[] = [], + embeddingStatus?: LibraryPaper['embeddingStatus'], +): LibraryPaper { return { id: row.id, arxivId: row.arxiv_id, @@ -189,6 +202,7 @@ export function rowToLibraryPaper(row: PaperRow, collections: Collection[] = [], fullText: row.full_text, isFavorite: row.is_favorite === 1, createdAt: row.created_at, + embeddingStatus, collections, tags, }; diff --git a/src/main/db/hybrid-search.ts b/src/main/db/hybrid-search.ts new file mode 100644 index 0000000..73fd7f0 --- /dev/null +++ b/src/main/db/hybrid-search.ts @@ -0,0 +1,122 @@ +import type { LibraryPaper } from '../../shared/types'; +import { getCollectionsForPapers } from './collections'; +import { getDb, type PaperRow, rowToLibraryPaper } from './connection'; +import { getTagsForPapers } from './tags'; +import { vectorSearch } from './vector-store'; + +export interface SemanticSearchResult { + paper: LibraryPaper; + score: number; + matchType: 'hybrid' | 'keyword' | 'semantic'; +} + +const RRF_K = 60; + +function hydratePaperRows(rows: PaperRow[]): LibraryPaper[] { + if (rows.length === 0) return []; + const paperIds = rows.map((r) => r.id); + const collectionsMap = getCollectionsForPapers(paperIds); + const tagsMap = getTagsForPapers(paperIds); + return rows.map((r) => rowToLibraryPaper(r, collectionsMap.get(r.id) ?? [], tagsMap.get(r.id) ?? [])); +} + +export function hybridSearch(query: string, queryEmbedding: Float32Array, limit: number = 20): SemanticSearchResult[] { + const db = getDb(); + const fetchLimit = limit * 2; + + // 1. FTS5 keyword search + const ftsRows = db + .prepare(` + SELECT p.* FROM papers p + JOIN papers_fts fts ON p.rowid = fts.rowid + WHERE papers_fts MATCH ? + ORDER BY rank + LIMIT ? + `) + .all(query, fetchLimit) as PaperRow[]; + + // 2. Vector search — deduplicate by paper_id (keep best chunk per paper) + const vecResults = vectorSearch(db, queryEmbedding, fetchLimit); + const bestVecByPaper = new Map(); + for (const result of vecResults) { + const existing = bestVecByPaper.get(result.paperId); + if (existing === undefined || result.distance < existing) { + bestVecByPaper.set(result.paperId, result.distance); + } + } + + // Sort vector results by distance (ascending = best first) + const sortedVecPaperIds = [...bestVecByPaper.entries()].sort((a, b) => a[1] - b[1]).map(([paperId]) => paperId); + + // 3. Build RRF scores + const rrfScores = new Map(); + const matchTypes = new Map>(); + + // FTS ranked list + for (let rank = 0; rank < ftsRows.length; rank++) { + const paperId = ftsRows[rank].id; + const score = 1 / (RRF_K + rank + 1); + rrfScores.set(paperId, (rrfScores.get(paperId) ?? 0) + score); + if (!matchTypes.has(paperId)) matchTypes.set(paperId, new Set()); + matchTypes.get(paperId)!.add('keyword'); + } + + // Vector ranked list + for (let rank = 0; rank < sortedVecPaperIds.length; rank++) { + const paperId = sortedVecPaperIds[rank]; + const score = 1 / (RRF_K + rank + 1); + rrfScores.set(paperId, (rrfScores.get(paperId) ?? 0) + score); + if (!matchTypes.has(paperId)) matchTypes.set(paperId, new Set()); + matchTypes.get(paperId)!.add('semantic'); + } + + // 4. Sort by combined RRF score + const sortedPaperIds = [...rrfScores.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, limit) + .map(([paperId, score]) => ({ paperId, score })); + + if (sortedPaperIds.length === 0) return []; + + // 5. Fetch full paper rows + const placeholders = sortedPaperIds.map(() => '?').join(','); + const paperRows = db + .prepare(`SELECT * FROM papers WHERE id IN (${placeholders})`) + .all(...sortedPaperIds.map((r) => r.paperId)) as PaperRow[]; + + const papersById = new Map(); + for (const row of paperRows) { + papersById.set(row.id, row); + } + + // Hydrate only the rows we need + const rowsToHydrate = sortedPaperIds + .map((r) => papersById.get(r.paperId)) + .filter((r): r is PaperRow => r !== undefined); + + const hydratedPapers = hydratePaperRows(rowsToHydrate); + const hydratedById = new Map(); + for (const paper of hydratedPapers) { + hydratedById.set(paper.id, paper); + } + + // 6. Build results in sorted order + return sortedPaperIds + .map(({ paperId, score }) => { + const paper = hydratedById.get(paperId); + if (!paper) return null; + + const types = matchTypes.get(paperId) ?? new Set(); + let matchType: 'hybrid' | 'keyword' | 'semantic'; + if (types.has('keyword') && types.has('semantic')) { + matchType = 'hybrid'; + } else if (types.has('keyword')) { + matchType = 'keyword'; + } else { + matchType = 'semantic'; + } + + return { paper, score, matchType }; + }) + .filter((r): r is SemanticSearchResult => r !== null); +} diff --git a/src/main/db/index.ts b/src/main/db/index.ts index 6fbde9d..c7234c2 100644 --- a/src/main/db/index.ts +++ b/src/main/db/index.ts @@ -10,6 +10,8 @@ export { updateCollection, } from './collections'; export { closeDatabase, initDatabase } from './connection'; +export type { SemanticSearchResult } from './hybrid-search'; +export { hybridSearch } from './hybrid-search'; export type { LibraryStats } from './papers'; export { checkPapersInLibrary, @@ -38,4 +40,14 @@ export { updateTag, } from './tags'; export { getToolStats, logToolCall } from './tool-stats'; +export type { ChunkInsertData, EmbeddingStatusValue, IndexingStats, VectorSearchResult } from './vector-store'; +export { + deleteChunksForPaper, + getIndexingStats, + getIndexingStatsFromDb, + getPapersNeedingEmbedding, + insertChunkWithEmbedding, + setEmbeddingStatus, + vectorSearch, +} from './vector-store'; export { getViewerState, saveViewerState } from './viewer-state'; diff --git a/src/main/db/papers.ts b/src/main/db/papers.ts index fd4b530..5242d35 100644 --- a/src/main/db/papers.ts +++ b/src/main/db/papers.ts @@ -3,6 +3,7 @@ import { DataChangeEvent, eventEmitter } from '../event-emitter'; import { getCollectionsForPaper, getCollectionsForPapers } from './collections'; import { generateId, getDb, type PaperRow, rowToLibraryPaper, serializeArray } from './connection'; import { getTagsForPaper, getTagsForPapers } from './tags'; +import { getEmbeddingStatusForPapers } from './vector-store'; function hydratePaperRows(rows: PaperRow[]): LibraryPaper[] { if (rows.length === 0) return []; @@ -10,8 +11,16 @@ function hydratePaperRows(rows: PaperRow[]): LibraryPaper[] { const paperIds = rows.map((row) => row.id); const collectionsMap = getCollectionsForPapers(paperIds); const tagsMap = getTagsForPapers(paperIds); - - return rows.map((row) => rowToLibraryPaper(row, collectionsMap.get(row.id) ?? [], tagsMap.get(row.id) ?? [])); + const embeddingStatusMap = getEmbeddingStatusForPapers(getDb(), paperIds); + + return rows.map((row) => + rowToLibraryPaper( + row, + collectionsMap.get(row.id) ?? [], + tagsMap.get(row.id) ?? [], + embeddingStatusMap.get(row.id) ?? 'pending', + ), + ); } export function insertPaper(paper: { diff --git a/src/main/db/vector-store.ts b/src/main/db/vector-store.ts new file mode 100644 index 0000000..6dcb025 --- /dev/null +++ b/src/main/db/vector-store.ts @@ -0,0 +1,180 @@ +import type Database from 'better-sqlite3'; +import { generateId, getDb } from './connection'; + +const EMBEDDING_DIMS = 256; + +export function createVectorSchema(db: Database.Database): void { + db.exec(` + CREATE TABLE IF NOT EXISTS paper_chunks ( + id TEXT PRIMARY KEY, + paper_id TEXT NOT NULL REFERENCES papers(id) ON DELETE CASCADE, + chunk_type TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_text TEXT NOT NULL, + token_count INTEGER, + embedding BLOB, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + + CREATE INDEX IF NOT EXISTS idx_paper_chunks_paper_id ON paper_chunks(paper_id); + + CREATE TABLE IF NOT EXISTS embedding_status ( + paper_id TEXT PRIMARY KEY REFERENCES papers(id) ON DELETE CASCADE, + status TEXT NOT NULL DEFAULT 'pending', + error_message TEXT, + chunk_count INTEGER DEFAULT 0, + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + `); +} + +export interface ChunkInsertData { + paperId: string; + chunkType: string; + chunkIndex: number; + chunkText: string; + tokenCount: number; +} + +export function insertChunkWithEmbedding( + db: Database.Database, + chunk: ChunkInsertData, + embedding: Float32Array, +): string { + const chunkId = generateId(); + const embeddingBlob = Buffer.from(embedding.buffer, embedding.byteOffset, embedding.byteLength); + + db.prepare(` + INSERT INTO paper_chunks (id, paper_id, chunk_type, chunk_index, chunk_text, token_count, embedding) + VALUES (?, ?, ?, ?, ?, ?, ?) + `).run(chunkId, chunk.paperId, chunk.chunkType, chunk.chunkIndex, chunk.chunkText, chunk.tokenCount, embeddingBlob); + + return chunkId; +} + +export function deleteChunksForPaper(db: Database.Database, paperId: string): void { + db.prepare('DELETE FROM paper_chunks WHERE paper_id = ?').run(paperId); +} + +export type EmbeddingStatusValue = 'pending' | 'indexing' | 'complete' | 'failed'; + +export function setEmbeddingStatus( + db: Database.Database, + paperId: string, + status: EmbeddingStatusValue, + errorMessage?: string, + chunkCount?: number, +): void { + db.prepare(` + INSERT INTO embedding_status (paper_id, status, error_message, chunk_count, updated_at) + VALUES (?, ?, ?, ?, datetime('now')) + ON CONFLICT(paper_id) DO UPDATE SET + status = excluded.status, + error_message = excluded.error_message, + chunk_count = COALESCE(excluded.chunk_count, chunk_count), + updated_at = datetime('now') + `).run(paperId, status, errorMessage ?? null, chunkCount ?? 0); +} + +export interface VectorSearchResult { + chunkId: string; + paperId: string; + distance: number; + chunkText: string; + chunkType: string; +} + +function cosineSimilarity(a: Float32Array, b: Float32Array): number { + let dot = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + } + // Vectors are already L2-normalized, so dot product = cosine similarity + return dot; +} + +export function vectorSearch(db: Database.Database, queryEmbedding: Float32Array, limit: number): VectorSearchResult[] { + const rows = db + .prepare('SELECT id, paper_id, chunk_text, chunk_type, embedding FROM paper_chunks WHERE embedding IS NOT NULL') + .all() as { + id: string; + paper_id: string; + chunk_text: string; + chunk_type: string; + embedding: Buffer; + }[]; + + const scored = rows.map((row) => { + const embeddingArray = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, EMBEDDING_DIMS); + const similarity = cosineSimilarity(queryEmbedding, embeddingArray); + // Convert similarity to distance (lower = more similar) for compatibility + const distance = 1 - similarity; + return { + chunkId: row.id, + paperId: row.paper_id, + distance, + chunkText: row.chunk_text, + chunkType: row.chunk_type, + }; + }); + + scored.sort((a, b) => a.distance - b.distance); + return scored.slice(0, limit); +} + +export function getEmbeddingStatusForPapers( + db: Database.Database, + paperIds: string[], +): Map { + if (paperIds.length === 0) return new Map(); + + const placeholders = paperIds.map(() => '?').join(','); + const rows = db + .prepare(`SELECT paper_id, status FROM embedding_status WHERE paper_id IN (${placeholders})`) + .all(...paperIds) as { paper_id: string; status: string }[]; + + const map = new Map(); + for (const row of rows) { + map.set(row.paper_id, row.status as EmbeddingStatusValue); + } + return map; +} + +export function getPapersNeedingEmbedding(db: Database.Database): { id: string; title: string }[] { + return db + .prepare(` + SELECT p.id, p.title FROM papers p + LEFT JOIN embedding_status es ON p.id = es.paper_id + WHERE es.paper_id IS NULL + OR es.status IN ('pending', 'failed') + `) + .all() as { id: string; title: string }[]; +} + +export interface IndexingStats { + totalPapers: number; + indexed: number; + pending: number; + failed: number; +} + +export function getIndexingStats(db: Database.Database): IndexingStats { + const totalPapers = (db.prepare('SELECT COUNT(*) as count FROM papers').get() as { count: number }).count; + + const indexed = ( + db.prepare("SELECT COUNT(*) as count FROM embedding_status WHERE status = 'complete'").get() as { count: number } + ).count; + + const failed = ( + db.prepare("SELECT COUNT(*) as count FROM embedding_status WHERE status = 'failed'").get() as { count: number } + ).count; + + const pending = totalPapers - indexed - failed; + + return { totalPapers, indexed, pending, failed }; +} + +// Convenience wrappers that use the default db connection +export function getIndexingStatsFromDb(): IndexingStats { + return getIndexingStats(getDb()); +} diff --git a/src/main/event-emitter.ts b/src/main/event-emitter.ts index 3ce12f7..83f7f26 100644 --- a/src/main/event-emitter.ts +++ b/src/main/event-emitter.ts @@ -7,6 +7,8 @@ export enum DataChangeEvent { ANNOTATIONS_CHANGED = 'annotations:changed', IMPORT_PROGRESS = 'import:progress', METADATA_RESOLUTION_PROGRESS = 'metadata:resolution-progress', + EMBEDDING_PROGRESS = 'embedding:progress', + INDEXING_PROGRESS = 'indexing:progress', } class AppEventEmitter extends EventEmitter {} diff --git a/src/main/index.ts b/src/main/index.ts index 723eefc..5056d79 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -109,6 +109,17 @@ import('electron').then(({ app, BrowserWindow, Menu }) => { }); } + // Trigger background semantic indexing after startup + setTimeout(async () => { + try { + const { resetStaleIndexingPapers, indexAllPapers } = await import('./services/indexing-service.js'); + resetStaleIndexingPapers(); + await indexAllPapers(); + } catch (err) { + console.warn('Background indexing failed:', err instanceof Error ? err.message : err); + } + }, 5000); + app.on('activate', () => { if (BrowserWindow.getAllWindows().length === 0) { createWindow(); @@ -116,6 +127,11 @@ import('electron').then(({ app, BrowserWindow, Menu }) => { }); }); + app.on('will-quit', async () => { + const { shutdownEmbeddingService } = await import('./services/embedding-service.js'); + shutdownEmbeddingService(); + }); + app.on('window-all-closed', () => { if (process.platform !== 'darwin') { app.quit(); diff --git a/src/main/ipc-handlers.ts b/src/main/ipc-handlers.ts index 1b7a76b..0ca2acc 100644 --- a/src/main/ipc-handlers.ts +++ b/src/main/ipc-handlers.ts @@ -26,7 +26,9 @@ import { import { getDisabledTools, getToolModes, setDisabledTools, setMcpServerEnabled, setToolMode } from './mcp/tool-config'; import { TOOL_METADATA } from './mcp/tools'; import { fetchAndCachePdf, getDefaultPapersDir } from './pdf-processor'; +import { embedQuery } from './services/embedding-service'; import { importLocalPdfs } from './services/import-pdf'; +import { indexAllPapers, indexPaper } from './services/indexing-service'; import { addHighlightAnnotation, addStickyNoteAnnotation, @@ -190,6 +192,32 @@ export function registerIpcHandlers(): void { } }); + // --- Semantic Search & Indexing --- + ipcMain.handle('search:semantic', async (_event, query: string) => { + try { + const queryEmbedding = await embedQuery(query); + return db.hybridSearch(query, queryEmbedding); + } catch { + // Fallback to FTS-only on embedding failure + const papers = db.searchLibrary(query); + return papers.map((paper) => ({ paper, score: 1, matchType: 'keyword' as const })); + } + }); + + ipcMain.handle('indexing:stats', () => { + return db.getIndexingStatsFromDb(); + }); + + ipcMain.handle('indexing:reindexAll', () => { + indexAllPapers().catch((err) => { + console.warn('Reindex all failed:', err); + }); + }); + + ipcMain.handle('indexing:reindexPaper', async (_event, paperId: string) => { + await indexPaper(paperId); + }); + // --- Collections --- ipcMain.handle('collections:list', () => { return db.getCollections(); @@ -520,4 +548,16 @@ export function registerIpcHandlers(): void { window.webContents.send('data:metadata-resolution-progress', progress); }); }); + + eventEmitter.on(DataChangeEvent.EMBEDDING_PROGRESS, (progress) => { + BrowserWindow.getAllWindows().forEach((window) => { + window.webContents.send('data:embedding-progress', progress); + }); + }); + + eventEmitter.on(DataChangeEvent.INDEXING_PROGRESS, (progress) => { + BrowserWindow.getAllWindows().forEach((window) => { + window.webContents.send('data:indexing-progress', progress); + }); + }); } diff --git a/src/main/mcp/tools/index.ts b/src/main/mcp/tools/index.ts index c24f1ce..7d5d903 100644 --- a/src/main/mcp/tools/index.ts +++ b/src/main/mcp/tools/index.ts @@ -12,7 +12,7 @@ export { resolveCollectionId, resolvePaperId, resolveTagId } from './resolvers'; export const TOOL_METADATA: { name: string; description: string }[] = [ { name: 'search_arxiv', description: 'Search arXiv for papers by keyword, author, title, or topic' }, - { name: 'search_library', description: 'Full-text search across saved papers with optional filters' }, + { name: 'search_library', description: 'Hybrid keyword + semantic search across saved papers with optional filters' }, { name: 'get_paper', description: 'Get detailed info about a paper by ID' }, { name: 'list_papers', description: 'List papers in the library with optional collection/tag filters' }, { name: 'save_paper', description: 'Save an arXiv paper to the library' }, diff --git a/src/main/mcp/tools/search-tools.ts b/src/main/mcp/tools/search-tools.ts index eb1ff13..3a238eb 100644 --- a/src/main/mcp/tools/search-tools.ts +++ b/src/main/mcp/tools/search-tools.ts @@ -3,6 +3,7 @@ import { z } from 'zod'; import { ARXIV_CATEGORIES } from '../../arxiv/categories'; import { searchArxiv } from '../../arxiv-client'; import * as db from '../../database'; +import { embedQuery } from '../../services/embedding-service'; import { formatPaper, resolveCollectionId, resolveTagId } from './resolvers'; export function registerSearchTools(server: McpServer): void { @@ -65,7 +66,7 @@ export function registerSearchTools(server: McpServer): void { 'search_library', { description: - 'Full-text search across papers saved in the PaperShelf library. Searches titles, abstracts, authors, and extracted PDF text. Optionally filter results by collection, tag, or favorites.', + 'Full-text search across papers saved in the PaperShelf library. Searches titles, abstracts, authors, and extracted PDF text. Optionally filter results by collection, tag, or favorites. Supports keyword, semantic (vector), or hybrid search modes.', inputSchema: { query: z .string() @@ -73,10 +74,29 @@ export function registerSearchTools(server: McpServer): void { collection: z.string().optional().describe('Filter by collection (ID or name)'), tag: z.string().optional().describe('Filter by tag (ID or name)'), favorites_only: z.boolean().optional().default(false).describe('Only return favorited papers'), + mode: z + .enum(['keyword', 'semantic', 'hybrid']) + .default('hybrid') + .describe('Search mode: keyword (FTS5), semantic (vector), or hybrid (both combined via RRF)'), }, }, - async ({ query, collection, tag, favorites_only }) => { - let papers = db.searchLibrary(query); + async ({ query, collection, tag, favorites_only, mode }) => { + let papers: db.SemanticSearchResult[] | null = null; + let plainPapers: ReturnType | null = null; + + if (mode === 'keyword') { + plainPapers = db.searchLibrary(query); + } else { + try { + const queryEmbedding = await embedQuery(query); + papers = db.hybridSearch(query, queryEmbedding); + } catch { + plainPapers = db.searchLibrary(query); + } + } + + // Normalize to LibraryPaper[] for filtering + let filteredPapers = papers ? papers.map((r) => r.paper) : (plainPapers ?? []); if (collection) { const resolved = resolveCollectionId(collection); @@ -84,7 +104,7 @@ export function registerSearchTools(server: McpServer): void { return { content: [{ type: 'text' as const, text: `Collection not found: ${collection}` }] }; } const collectionId = resolved.id; - papers = papers.filter((p) => p.collections.some((c) => c.id === collectionId)); + filteredPapers = filteredPapers.filter((p) => p.collections.some((c) => c.id === collectionId)); } if (tag) { @@ -93,18 +113,18 @@ export function registerSearchTools(server: McpServer): void { return { content: [{ type: 'text' as const, text: `Tag not found: ${tag}` }] }; } const tagId = resolved.id; - papers = papers.filter((p) => p.tags.some((t) => t.id === tagId)); + filteredPapers = filteredPapers.filter((p) => p.tags.some((t) => t.id === tagId)); } if (favorites_only) { - papers = papers.filter((p) => p.isFavorite); + filteredPapers = filteredPapers.filter((p) => p.isFavorite); } - if (papers.length === 0) { + if (filteredPapers.length === 0) { return { content: [{ type: 'text' as const, text: 'No matching papers in library.' }] }; } - const text = papers.map((p, i) => `### ${i + 1}. ${formatPaper(p)}`).join('\n\n---\n\n'); + const text = filteredPapers.map((p, i) => `### ${i + 1}. ${formatPaper(p)}`).join('\n\n---\n\n'); return { content: [{ type: 'text' as const, text }] }; }, ); diff --git a/src/main/preload.ts b/src/main/preload.ts index a62e975..a2f31af 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -24,6 +24,10 @@ const api: ElectronAPI = { toggleFavorite: (id) => ipcRenderer.invoke('papers:toggleFavorite', id), checkPapersInLibrary: (arxivIds) => ipcRenderer.invoke('papers:checkInLibrary', arxivIds), searchLibrary: (query) => ipcRenderer.invoke('papers:search', query), + semanticSearch: (query) => ipcRenderer.invoke('search:semantic', query), + getIndexingStats: () => ipcRenderer.invoke('indexing:stats'), + reindexAllPapers: () => ipcRenderer.invoke('indexing:reindexAll'), + reindexPaper: (paperId) => ipcRenderer.invoke('indexing:reindexPaper', paperId), importLocalPdfs: () => ipcRenderer.invoke('papers:importLocal'), importFiles: (filePaths) => ipcRenderer.invoke('papers:importFiles', filePaths), getPathForFile: (file) => webUtils.getPathForFile(file), @@ -115,6 +119,18 @@ const api: ElectronAPI = { return () => ipcRenderer.removeListener('data:metadata-resolution-progress', handler); }, + onEmbeddingProgress: (callback) => { + const handler = (_event: unknown, progress: Parameters[0]) => callback(progress); + ipcRenderer.on('data:embedding-progress', handler); + return () => ipcRenderer.removeListener('data:embedding-progress', handler); + }, + + onIndexingProgress: (callback) => { + const handler = (_event: unknown, progress: Parameters[0]) => callback(progress); + ipcRenderer.on('data:indexing-progress', handler); + return () => ipcRenderer.removeListener('data:indexing-progress', handler); + }, + // App Updates getAppVersion: () => ipcRenderer.invoke('app:getVersion'), checkForUpdates: () => ipcRenderer.invoke('updater:check'), diff --git a/src/main/services/chunker.ts b/src/main/services/chunker.ts new file mode 100644 index 0000000..3a3868a --- /dev/null +++ b/src/main/services/chunker.ts @@ -0,0 +1,98 @@ +export interface TextChunk { + chunkType: 'title_abstract' | 'body'; + chunkIndex: number; + text: string; + estimatedTokens: number; +} + +const CHARS_PER_TOKEN = 4; +const TARGET_CHUNK_TOKENS = 1500; +const TARGET_CHUNK_CHARS = TARGET_CHUNK_TOKENS * CHARS_PER_TOKEN; +const OVERLAP_FRACTION = 0.1; +const OVERLAP_CHARS = Math.round(TARGET_CHUNK_CHARS * OVERLAP_FRACTION); + +function estimateTokens(text: string): number { + return Math.ceil(text.length / CHARS_PER_TOKEN); +} + +function findBreakPoint(text: string, maxLength: number): number { + const slice = text.slice(0, maxLength); + + // Paragraph break + const paragraphBreak = slice.lastIndexOf('\n\n'); + if (paragraphBreak > maxLength * 0.5) return paragraphBreak + 2; + + // Line break + const lineBreak = slice.lastIndexOf('\n'); + if (lineBreak > maxLength * 0.5) return lineBreak + 1; + + // Sentence break + const sentenceBreak = slice.lastIndexOf('. '); + if (sentenceBreak > maxLength * 0.5) return sentenceBreak + 2; + + // Hard cut + return maxLength; +} + +function splitBodyText(text: string): TextChunk[] { + const chunks: TextChunk[] = []; + let offset = 0; + let chunkIndex = 1; // body chunks start at 1 (0 is title_abstract) + + while (offset < text.length) { + const remaining = text.length - offset; + + if (remaining <= TARGET_CHUNK_CHARS * 1.2) { + // Last chunk — take everything remaining + const chunkText = text.slice(offset).trim(); + if (chunkText.length > 0) { + chunks.push({ + chunkType: 'body', + chunkIndex, + text: chunkText, + estimatedTokens: estimateTokens(chunkText), + }); + } + break; + } + + const breakPoint = findBreakPoint(text.slice(offset), TARGET_CHUNK_CHARS); + const chunkText = text.slice(offset, offset + breakPoint).trim(); + + if (chunkText.length > 0) { + chunks.push({ + chunkType: 'body', + chunkIndex, + text: chunkText, + estimatedTokens: estimateTokens(chunkText), + }); + chunkIndex++; + } + + // Advance with overlap + offset += breakPoint - OVERLAP_CHARS; + } + + return chunks; +} + +export function chunkPaper(title: string, abstract: string, fullText: string | null): TextChunk[] { + const chunks: TextChunk[] = []; + + // Chunk 0: title + abstract (always present) + const titleAbstractText = abstract ? `${title}\n\n${abstract}` : title; + chunks.push({ + chunkType: 'title_abstract', + chunkIndex: 0, + text: titleAbstractText, + estimatedTokens: estimateTokens(titleAbstractText), + }); + + // Body chunks from full text + if (fullText && fullText.trim().length > 0) { + const bodyChunks = splitBodyText(fullText.trim()); + chunks.push(...bodyChunks); + } + + return chunks; +} diff --git a/src/main/services/embedding-service.ts b/src/main/services/embedding-service.ts new file mode 100644 index 0000000..907b646 --- /dev/null +++ b/src/main/services/embedding-service.ts @@ -0,0 +1,284 @@ +import type { ChildProcess } from 'child_process'; +import { execSync, fork } from 'child_process'; +import crypto from 'crypto'; +import path from 'path'; +import { DataChangeEvent, eventEmitter } from '../event-emitter'; +import { getDataDir } from '../paths'; + +let child: ChildProcess | null = null; +let modelLoaded = false; +let loadingPromise: Promise | null = null; + +type PendingRequest = { + resolve: (value: unknown) => void; + reject: (reason: Error) => void; +}; + +const pendingRequests = new Map(); + +// Serial queue: ensures only one embedding request is in-flight at a time +let queueTail: Promise = Promise.resolve(); + +function enqueue(fn: () => Promise): Promise { + const result = queueTail.then(fn, fn); + queueTail = result.then( + () => {}, + () => {}, + ); + return result; +} + +function getModelCacheDir(): string { + return path.join(getDataDir(), 'models'); +} + +function getNodeBinaryPath(): string { + const candidates = ['/opt/homebrew/bin/node', '/usr/local/bin/node', '/usr/bin/node']; + + try { + const nodePath = execSync('which node', { encoding: 'utf-8' }).trim(); + if (nodePath) return nodePath; + } catch { + // which failed, try known paths + } + + for (const candidate of candidates) { + try { + execSync(`${candidate} --version`, { encoding: 'utf-8' }); + return candidate; + } catch { + // not found, try next + } + } + + throw new Error('Could not find system Node.js binary. Install Node.js to enable semantic search.'); +} + +function getWorkerScriptPath(): string { + const scriptPath = path.join(__dirname, 'embedding-worker.js'); + // In packaged app, the worker must be outside .asar + return scriptPath.replace('.asar', '.asar.unpacked'); +} + +function generateRequestId(): string { + return crypto.randomUUID(); +} + +function rejectAllPending(error: Error): void { + if (pendingRequests.size > 0) { + console.warn(`[embedding-service] Rejecting ${pendingRequests.size} pending requests: ${error.message}`); + } + for (const [id, request] of pendingRequests) { + request.reject(error); + pendingRequests.delete(id); + } +} + +function spawnChild(): ChildProcess { + const nodeBinary = getNodeBinaryPath(); + const workerScript = getWorkerScriptPath(); + + console.log(`[embedding-service] Spawning worker: ${nodeBinary} ${workerScript}`); + + const childProcess = fork(workerScript, [], { + execPath: nodeBinary, + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + env: { + ...process.env, + ELECTRON_RUN_AS_NODE: '1', + }, + }); + + childProcess.on('message', (message: { type: string; id?: string; [key: string]: unknown }) => { + switch (message.type) { + case 'progress': { + eventEmitter.emit(DataChangeEvent.EMBEDDING_PROGRESS, { + status: message.status, + progress: message.progress, + file: message.file, + error: message.error, + }); + break; + } + + case 'loaded': { + console.log('[embedding-service] Model loaded successfully'); + modelLoaded = true; + break; + } + + case 'embeddings': { + const request = pendingRequests.get(message.id!); + if (request) { + pendingRequests.delete(message.id!); + const rawEmbeddings = message.embeddings as number[][]; + console.log(`[embedding-service] Got ${rawEmbeddings.length} embeddings for request ${message.id}`); + const result = rawEmbeddings.map((emb) => Float32Array.from(emb)); + request.resolve(result); + } else { + console.warn(`[embedding-service] No pending request for embeddings response ${message.id}`); + } + break; + } + + case 'embedding': { + const request = pendingRequests.get(message.id!); + if (request) { + pendingRequests.delete(message.id!); + request.resolve(Float32Array.from(message.embedding as number[])); + } else { + console.warn(`[embedding-service] No pending request for embedding response ${message.id}`); + } + break; + } + + case 'error': { + console.error(`[embedding-service] Worker error for request ${message.id}: ${message.error}`); + if (message.id) { + const request = pendingRequests.get(message.id); + if (request) { + pendingRequests.delete(message.id); + request.reject(new Error(message.error as string)); + } + } + break; + } + } + }); + + childProcess.on('error', (err) => { + console.error('Embedding worker process error:', err.message); + rejectAllPending(err); + resetState(); + }); + + childProcess.on('exit', (code, signal) => { + if (code !== 0 && code !== null) { + console.warn(`Embedding worker exited with code ${code}, signal ${signal}`); + rejectAllPending(new Error(`Embedding worker crashed (code ${code})`)); + } + resetState(); + }); + + childProcess.stdout?.on('data', (data: Buffer) => { + console.log('[embedding-worker stdout]', data.toString().trimEnd()); + }); + + childProcess.stderr?.on('data', (data: Buffer) => { + console.warn('[embedding-worker stderr]', data.toString().trimEnd()); + }); + + return childProcess; +} + +function resetState(): void { + child = null; + modelLoaded = false; + loadingPromise = null; +} + +function ensureChild(): ChildProcess { + if (!child || !child.connected) { + child = spawnChild(); + } + return child; +} + +export async function ensureModelLoaded(): Promise { + if (modelLoaded && child?.connected) return; + + if (loadingPromise) { + console.log('[embedding-service] Waiting on existing loadingPromise'); + await loadingPromise; + return; + } + + console.log('[embedding-service] Starting model load via child process'); + + loadingPromise = new Promise((resolve, reject) => { + try { + const childProcess = ensureChild(); + + const onLoaded = (message: { type: string }) => { + if (message.type === 'loaded') { + childProcess.removeListener('message', onLoaded); + resolve(); + } else if (message.type === 'error' && !modelLoaded) { + childProcess.removeListener('message', onLoaded); + const errorMessage = (message as { error?: string }).error || 'Failed to load model'; + reject(new Error(errorMessage)); + } + }; + + childProcess.on('message', onLoaded); + childProcess.send({ type: 'init', cacheDir: getModelCacheDir() }); + } catch (err) { + resetState(); + reject(err); + } + }).catch((err) => { + resetState(); + eventEmitter.emit(DataChangeEvent.EMBEDDING_PROGRESS, { + status: 'error', + error: err instanceof Error ? err.message : 'Failed to load model', + }); + throw err; + }); + + await loadingPromise; +} + +export function isModelLoaded(): boolean { + return modelLoaded; +} + +function sendEmbedDocuments(texts: string[]): Promise { + const id = generateRequestId(); + const childProcess = ensureChild(); + console.log(`[embedding-service] embedDocumentTexts: ${texts.length} texts, request ${id}`); + + return new Promise((resolve, reject) => { + pendingRequests.set(id, { + resolve: resolve as (value: unknown) => void, + reject, + }); + childProcess.send({ type: 'embedDocuments', id, texts }); + }); +} + +function sendEmbedQuery(query: string): Promise { + const id = generateRequestId(); + const childProcess = ensureChild(); + + return new Promise((resolve, reject) => { + pendingRequests.set(id, { + resolve: resolve as (value: unknown) => void, + reject, + }); + childProcess.send({ type: 'embedQuery', id, query }); + }); +} + +export async function embedDocumentTexts(texts: string[]): Promise { + await ensureModelLoaded(); + return enqueue(() => sendEmbedDocuments(texts)); +} + +export async function embedQuery(query: string): Promise { + await ensureModelLoaded(); + return enqueue(() => sendEmbedQuery(query)); +} + +export function shutdownEmbeddingService(): void { + if (child?.connected) { + child.send({ type: 'shutdown' }); + setTimeout(() => { + if (child && !child.killed) { + child.kill(); + } + resetState(); + }, 2000); + } else { + resetState(); + } +} diff --git a/src/main/services/embedding-worker.ts b/src/main/services/embedding-worker.ts new file mode 100644 index 0000000..9dbf5be --- /dev/null +++ b/src/main/services/embedding-worker.ts @@ -0,0 +1,146 @@ +/** + * Standalone Node.js child process for running embedding inference. + * + * Spawned via child_process.fork() with the system Node binary to avoid + * SIGTRAP crashes from onnxruntime-node inside Electron's main process. + */ + +// Stub 'sharp' before loading @huggingface/transformers. +// Transformers imports sharp for image processing which we don't need for text embeddings. +// In the packaged app, sharp isn't available in the asar.unpacked path. +// We use require() below (not import()) so transformers resolves to the CJS build, +// which goes through Module._resolveFilename where we can intercept. +import Module from 'node:module'; + +// biome-ignore lint/suspicious/noExplicitAny: accessing private _resolveFilename API +const ModuleInternal = Module as any; +const _originalResolveFilename = ModuleInternal._resolveFilename; +ModuleInternal._resolveFilename = function (...args: unknown[]) { + if (args[0] === 'sharp') { + return 'sharp-stub'; + } + return _originalResolveFilename.apply(this, args); +}; +require.cache['sharp-stub'] = { + id: 'sharp-stub', + filename: 'sharp-stub', + loaded: true, + exports: {}, + children: [], + paths: [], + path: '', + parent: null, + isPreloading: false, + require: require, +} as unknown as NodeModule; + +const MODEL_ID = 'nomic-ai/nomic-embed-text-v1.5'; +const EMBEDDING_DIMS = 256; +const SEARCH_PREFIX = 'search_query: '; +const DOCUMENT_PREFIX = 'search_document: '; + +// biome-ignore lint/suspicious/noExplicitAny: pipeline type from dynamic import +let pipeline: any = null; +let cacheDir: string | null = null; + +function truncateAndNormalize(embedding: number[]): number[] { + const truncated = embedding.slice(0, EMBEDDING_DIMS); + let norm = 0; + for (const val of truncated) { + norm += val * val; + } + norm = Math.sqrt(norm); + + const result = new Array(EMBEDDING_DIMS); + for (let i = 0; i < EMBEDDING_DIMS; i++) { + result[i] = norm > 0 ? truncated[i] / norm : 0; + } + return result; +} + +async function loadPipeline(): Promise { + // Use require() so transformers resolves to the CJS build (where our sharp stub works) + // eslint-disable-next-line @typescript-eslint/no-var-requires + const transformers = require('@huggingface/transformers'); + + send({ type: 'progress', status: 'downloading', progress: 0 }); + + pipeline = await transformers.pipeline('feature-extraction', MODEL_ID, { + dtype: 'q8' as const, + cache_dir: cacheDir!, + progress_callback: (progress: { status: string; progress?: number; file?: string }) => { + if (progress.status === 'progress') { + send({ type: 'progress', status: 'downloading', progress: progress.progress, file: progress.file }); + } + }, + }); + + send({ type: 'progress', status: 'ready' }); +} + +function send(message: Record): void { + if (process.send) { + process.send(message); + } +} + +async function handleMessage(message: { + type: string; + id?: string; + cacheDir?: string; + texts?: string[]; + query?: string; +}): Promise { + try { + switch (message.type) { + case 'init': { + cacheDir = message.cacheDir!; + await loadPipeline(); + send({ type: 'loaded' }); + break; + } + + case 'embedDocuments': { + const BATCH_SIZE = 4; + const prefixedTexts = message.texts!.map((t: string) => `${DOCUMENT_PREFIX}${t}`); + const allEmbeddings: number[][] = []; + + for (let i = 0; i < prefixedTexts.length; i += BATCH_SIZE) { + const batch = prefixedTexts.slice(i, i + BATCH_SIZE); + const output = await pipeline(batch, { pooling: 'mean', normalize: true }); + const rawEmbeddings = output.tolist() as number[][]; + for (const emb of rawEmbeddings) { + allEmbeddings.push(truncateAndNormalize(emb)); + } + } + + send({ type: 'embeddings', id: message.id, embeddings: allEmbeddings }); + break; + } + + case 'embedQuery': { + const prefixedQuery = `${SEARCH_PREFIX}${message.query}`; + const output = await pipeline([prefixedQuery], { pooling: 'mean', normalize: true }); + const rawEmbeddings = output.tolist() as number[][]; + const embedding = truncateAndNormalize(rawEmbeddings[0]); + send({ type: 'embedding', id: message.id, embedding }); + break; + } + + case 'shutdown': { + process.exit(0); + break; + } + + default: + send({ type: 'error', id: message.id, error: `Unknown message type: ${message.type}` }); + } + } catch (err) { + const errorMessage = err instanceof Error ? err.message : 'Unknown error'; + send({ type: 'error', id: message.id, error: errorMessage }); + } +} + +process.on('message', (message: { type: string; id?: string; cacheDir?: string; texts?: string[]; query?: string }) => { + handleMessage(message); +}); diff --git a/src/main/services/import-pdf.ts b/src/main/services/import-pdf.ts index f8114f7..6436162 100644 --- a/src/main/services/import-pdf.ts +++ b/src/main/services/import-pdf.ts @@ -5,6 +5,7 @@ import type { ImportBatchResult, LibraryPaper } from '../../shared/types'; import { insertPaper } from '../db/papers'; import { DataChangeEvent, eventEmitter } from '../event-emitter'; import { extractText, getPapersDir } from '../pdf-processor'; +import { indexAllPapers } from './indexing-service'; function titleFromFilename(filename: string): string { return path.basename(filename, '.pdf').replace(/[_-]/g, ' ').replace(/\s+/g, ' ').trim(); @@ -64,5 +65,12 @@ export async function importLocalPdfs(filePaths: string[]): Promise 0) { + indexAllPapers().catch((err) => { + console.warn('Background indexing failed:', err); + }); + } + return { imported, failed, totalCount: filePaths.length }; } diff --git a/src/main/services/indexing-service.ts b/src/main/services/indexing-service.ts new file mode 100644 index 0000000..0ac3cea --- /dev/null +++ b/src/main/services/indexing-service.ts @@ -0,0 +1,174 @@ +import { getDb } from '../db/connection'; +import { + deleteChunksForPaper, + getPapersNeedingEmbedding, + insertChunkWithEmbedding, + setEmbeddingStatus, +} from '../db/vector-store'; +import { DataChangeEvent, eventEmitter } from '../event-emitter'; +import { chunkPaper } from './chunker'; +import { embedDocumentTexts } from './embedding-service'; + +let indexingInProgress = false; + +export function isIndexingInProgress(): boolean { + return indexingInProgress; +} + +/** + * Reset any papers stuck in 'indexing' state back to 'pending'. + * This happens when the worker crashes mid-indexing. + */ +export function resetStaleIndexingPapers(): void { + const db = getDb(); + const stale = db.prepare("SELECT paper_id FROM embedding_status WHERE status = 'indexing'").all() as { + paper_id: string; + }[]; + + if (stale.length > 0) { + console.log(`[indexing-service] Resetting ${stale.length} papers stuck in 'indexing' state`); + const reset = db.prepare( + "UPDATE embedding_status SET status = 'pending', error_message = NULL WHERE status = 'indexing'", + ); + reset.run(); + } +} + +export async function indexPaper(paperId: string): Promise { + const db = getDb(); + + const paper = db.prepare('SELECT id, title, abstract, full_text FROM papers WHERE id = ?').get(paperId) as + | { id: string; title: string; abstract: string; full_text: string | null } + | undefined; + + if (!paper) return; + + try { + setEmbeddingStatus(db, paperId, 'indexing'); + + // Delete existing chunks for re-indexing + deleteChunksForPaper(db, paperId); + + const chunks = chunkPaper(paper.title, paper.abstract, paper.full_text); + console.log(`[indexing-service] Paper ${paperId}: ${chunks.length} chunks from "${paper.title.slice(0, 60)}"`); + + if (chunks.length === 0) { + console.log(`[indexing-service] Paper ${paperId}: no chunks, marking complete`); + setEmbeddingStatus(db, paperId, 'complete', undefined, 0); + return; + } + + const chunkTexts = chunks.map((c) => c.text); + const embeddings = await embedDocumentTexts(chunkTexts); + console.log(`[indexing-service] Paper ${paperId}: got ${embeddings.length} embeddings`); + + // Insert all chunks + embeddings in a transaction + const insertAll = db.transaction(() => { + for (let i = 0; i < chunks.length; i++) { + insertChunkWithEmbedding( + db, + { + paperId, + chunkType: chunks[i].chunkType, + chunkIndex: chunks[i].chunkIndex, + chunkText: chunks[i].text, + tokenCount: chunks[i].estimatedTokens, + }, + embeddings[i], + ); + } + }); + + insertAll(); + setEmbeddingStatus(db, paperId, 'complete', undefined, chunks.length); + console.log(`[indexing-service] Paper ${paperId}: indexed successfully (${chunks.length} chunks)`); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : 'Unknown error'; + console.error(`[indexing-service] Paper ${paperId}: failed — ${errorMessage}`); + setEmbeddingStatus(db, paperId, 'failed', errorMessage); + throw err; + } +} + +export async function indexAllPapers(): Promise { + if (indexingInProgress) return; + + indexingInProgress = true; + const db = getDb(); + + try { + const papersToIndex = getPapersNeedingEmbedding(db); + if (papersToIndex.length === 0) { + console.log('[indexing-service] No papers need indexing'); + return; + } + + console.log(`[indexing-service] Starting indexing of ${papersToIndex.length} papers`); + + for (let i = 0; i < papersToIndex.length; i++) { + const paper = papersToIndex[i]; + + console.log( + `[indexing-service] Indexing paper ${i + 1}/${papersToIndex.length}: ${paper.id} "${paper.title.slice(0, 60)}"`, + ); + + eventEmitter.emit(DataChangeEvent.INDEXING_PROGRESS, { + paperId: paper.id, + paperTitle: paper.title, + current: i + 1, + total: papersToIndex.length, + status: 'indexing', + }); + + try { + await indexPaper(paper.id); + eventEmitter.emit(DataChangeEvent.INDEXING_PROGRESS, { + paperId: paper.id, + paperTitle: paper.title, + current: i + 1, + total: papersToIndex.length, + status: 'indexed', + }); + } catch (err) { + console.warn(`[indexing-service] Failed to index paper ${paper.id}:`, err instanceof Error ? err.message : err); + eventEmitter.emit(DataChangeEvent.INDEXING_PROGRESS, { + paperId: paper.id, + paperTitle: paper.title, + current: i + 1, + total: papersToIndex.length, + status: 'error', + error: err instanceof Error ? err.message : 'Unknown error', + }); + // Continue with next paper + } + } + + console.log(`[indexing-service] Indexing complete (${papersToIndex.length} papers processed)`); + + eventEmitter.emit(DataChangeEvent.INDEXING_PROGRESS, { + paperId: '', + paperTitle: '', + current: papersToIndex.length, + total: papersToIndex.length, + status: 'complete', + }); + } finally { + indexingInProgress = false; + } + + // Papers may have been added while we were indexing — pick them up. + // Only check for pending (new) papers, not failed ones (those need explicit re-index). + const remaining = db + .prepare( + `SELECT p.id FROM papers p + LEFT JOIN embedding_status es ON p.id = es.paper_id + WHERE es.paper_id IS NULL OR es.status = 'pending'`, + ) + .all(); + if (remaining.length > 0) { + console.log(`[indexing-service] Follow-up: ${remaining.length} new papers to index`); + indexAllPapers().catch((err) => { + console.warn('[indexing-service] Follow-up indexing failed:', err); + }); + } +} diff --git a/src/main/services/save-paper.ts b/src/main/services/save-paper.ts index 05b3526..2b8a4c7 100644 --- a/src/main/services/save-paper.ts +++ b/src/main/services/save-paper.ts @@ -1,6 +1,7 @@ import type { ArxivPaper, LibraryPaper } from '../../shared/types'; import * as db from '../database'; import { downloadAndExtractPdf } from '../pdf-processor'; +import { indexAllPapers } from './indexing-service'; export interface SavePaperFromArxivResult { paper: LibraryPaper; @@ -42,5 +43,10 @@ export async function savePaperFromArxivPaper(paper: ArxivPaper): Promise { + console.warn('Background indexing failed:', err); + }); + return { paper: saved, alreadyExisted: false, pdfDownloaded: pdfPath !== null, textExtracted: fullText !== null }; } diff --git a/src/renderer/__tests__/indexing-status-updates.test.ts b/src/renderer/__tests__/indexing-status-updates.test.ts new file mode 100644 index 0000000..f32773a --- /dev/null +++ b/src/renderer/__tests__/indexing-status-updates.test.ts @@ -0,0 +1,293 @@ +import { describe, expect, it } from 'vitest'; +import type { IndexingProgress } from '../../shared/types'; + +/** + * Tests for the indexing progress → UI state logic used in LibraryList. + * This mirrors the onIndexingProgress handler and isQueuedForIndexing derivation + * without needing a DOM environment or React Testing Library. + */ + +interface IndexingUIState { + activelyIndexingPaperId: string | null; + indexingActive: boolean; +} + +function applyProgressEvent(state: IndexingUIState, progress: IndexingProgress): IndexingUIState { + if (progress.status === 'indexing') { + return { activelyIndexingPaperId: progress.paperId, indexingActive: true }; + } + if (progress.status === 'complete') { + return { activelyIndexingPaperId: null, indexingActive: false }; + } + // 'indexed' or 'error' — clear the actively indexing paper but keep queue active + return { activelyIndexingPaperId: null, indexingActive: state.indexingActive }; +} + +function isQueuedForIndexing(state: IndexingUIState, paperId: string, embeddingStatus: string | undefined): boolean { + return state.indexingActive && embeddingStatus !== 'complete' && paperId !== state.activelyIndexingPaperId; +} + +function isActivelyIndexing(state: IndexingUIState, paperId: string): boolean { + return paperId === state.activelyIndexingPaperId; +} + +const initialState: IndexingUIState = { activelyIndexingPaperId: null, indexingActive: false }; + +describe('indexing progress state machine', () => { + it('sets actively indexing paper on indexing event', () => { + const state = applyProgressEvent(initialState, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 3, + status: 'indexing', + }); + + expect(state.activelyIndexingPaperId).toBe('paper-1'); + expect(state.indexingActive).toBe(true); + }); + + it('clears actively indexing paper on indexed event but keeps queue active', () => { + let state = applyProgressEvent(initialState, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 3, + status: 'indexing', + }); + + state = applyProgressEvent(state, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 3, + status: 'indexed', + }); + + expect(state.activelyIndexingPaperId).toBeNull(); + expect(state.indexingActive).toBe(true); + }); + + it('clears all state on complete event', () => { + let state = applyProgressEvent(initialState, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 1, + status: 'indexing', + }); + + state = applyProgressEvent(state, { + paperId: '', + paperTitle: '', + current: 1, + total: 1, + status: 'complete', + }); + + expect(state.activelyIndexingPaperId).toBeNull(); + expect(state.indexingActive).toBe(false); + }); + + it('clears actively indexing paper on error event but keeps queue active', () => { + let state = applyProgressEvent(initialState, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 2, + status: 'indexing', + }); + + state = applyProgressEvent(state, { + paperId: 'paper-1', + paperTitle: 'Paper 1', + current: 1, + total: 2, + status: 'error', + error: 'Something broke', + }); + + expect(state.activelyIndexingPaperId).toBeNull(); + expect(state.indexingActive).toBe(true); + }); + + it('transitions through full 3-paper indexing sequence', () => { + let state: IndexingUIState = { ...initialState }; + + // Paper A: indexing + state = applyProgressEvent(state, { + paperId: 'A', + paperTitle: 'A', + current: 1, + total: 3, + status: 'indexing', + }); + expect(state).toEqual({ activelyIndexingPaperId: 'A', indexingActive: true }); + + // Paper A: indexed + state = applyProgressEvent(state, { + paperId: 'A', + paperTitle: 'A', + current: 1, + total: 3, + status: 'indexed', + }); + expect(state).toEqual({ activelyIndexingPaperId: null, indexingActive: true }); + + // Paper B: indexing + state = applyProgressEvent(state, { + paperId: 'B', + paperTitle: 'B', + current: 2, + total: 3, + status: 'indexing', + }); + expect(state).toEqual({ activelyIndexingPaperId: 'B', indexingActive: true }); + + // Paper B: indexed + state = applyProgressEvent(state, { + paperId: 'B', + paperTitle: 'B', + current: 2, + total: 3, + status: 'indexed', + }); + expect(state).toEqual({ activelyIndexingPaperId: null, indexingActive: true }); + + // Paper C: indexing + state = applyProgressEvent(state, { + paperId: 'C', + paperTitle: 'C', + current: 3, + total: 3, + status: 'indexing', + }); + expect(state).toEqual({ activelyIndexingPaperId: 'C', indexingActive: true }); + + // Paper C: indexed + state = applyProgressEvent(state, { + paperId: 'C', + paperTitle: 'C', + current: 3, + total: 3, + status: 'indexed', + }); + expect(state).toEqual({ activelyIndexingPaperId: null, indexingActive: true }); + + // Complete + state = applyProgressEvent(state, { + paperId: '', + paperTitle: '', + current: 3, + total: 3, + status: 'complete', + }); + expect(state).toEqual({ activelyIndexingPaperId: null, indexingActive: false }); + }); +}); + +describe('isQueuedForIndexing derivation', () => { + it('returns false when indexing is not active', () => { + const state: IndexingUIState = { activelyIndexingPaperId: null, indexingActive: false }; + expect(isQueuedForIndexing(state, 'paper-1', 'pending')).toBe(false); + }); + + it('returns false for the paper currently being indexed', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isQueuedForIndexing(state, 'paper-1', 'pending')).toBe(false); + }); + + it('returns false for papers already complete', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isQueuedForIndexing(state, 'paper-2', 'complete')).toBe(false); + }); + + it('returns true for pending papers while another is indexing', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isQueuedForIndexing(state, 'paper-2', 'pending')).toBe(true); + }); + + it('returns true for failed papers while indexing is active', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isQueuedForIndexing(state, 'paper-3', 'failed')).toBe(true); + }); + + it('returns true for papers with undefined status while indexing is active', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isQueuedForIndexing(state, 'paper-4', undefined)).toBe(true); + }); +}); + +describe('badge visual state derivation', () => { + // Mirrors EmbeddingStatusBadge logic + type BadgeVisual = 'blue-pulsing' | 'blue-hollow' | 'green' | 'red' | 'gray'; + + function getBadgeVisual( + embeddingStatus: 'pending' | 'indexing' | 'complete' | 'failed', + isActivelyIndexingProp: boolean, + isQueuedProp: boolean, + ): BadgeVisual { + if (isActivelyIndexingProp) return 'blue-pulsing'; + if (isQueuedProp) return 'blue-hollow'; + switch (embeddingStatus) { + case 'complete': + return 'green'; + case 'failed': + return 'red'; + case 'indexing': + return 'blue-pulsing'; + case 'pending': + return 'gray'; + } + } + + it('shows blue-pulsing from DB status even when event state is cleared (React batching)', () => { + // React batches rapid events: by render time, activelyIndexingPaperId may be null + // but loadPapers returned embeddingStatus: 'indexing' from the DB + expect(getBadgeVisual('indexing', false, false)).toBe('blue-pulsing'); + }); + + it('shows blue-pulsing from event override before DB refresh', () => { + expect(getBadgeVisual('pending', true, false)).toBe('blue-pulsing'); + }); + + it('shows blue-hollow for queued papers', () => { + expect(getBadgeVisual('pending', false, true)).toBe('blue-hollow'); + }); + + it('shows green for complete papers', () => { + expect(getBadgeVisual('complete', false, false)).toBe('green'); + }); + + it('shows red for failed papers', () => { + expect(getBadgeVisual('failed', false, false)).toBe('red'); + }); + + it('shows gray for pending papers', () => { + expect(getBadgeVisual('pending', false, false)).toBe('gray'); + }); + + it('event override takes priority over DB status', () => { + // isActivelyIndexing from event arrives before loadPapers updates embeddingStatus + expect(getBadgeVisual('pending', true, false)).toBe('blue-pulsing'); + // isQueued takes priority over pending + expect(getBadgeVisual('pending', false, true)).toBe('blue-hollow'); + }); +}); + +describe('isActivelyIndexing derivation', () => { + it('returns true for the paper being indexed', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isActivelyIndexing(state, 'paper-1')).toBe(true); + }); + + it('returns false for other papers', () => { + const state: IndexingUIState = { activelyIndexingPaperId: 'paper-1', indexingActive: true }; + expect(isActivelyIndexing(state, 'paper-2')).toBe(false); + }); + + it('returns false when no paper is indexing', () => { + const state: IndexingUIState = { activelyIndexingPaperId: null, indexingActive: false }; + expect(isActivelyIndexing(state, 'paper-1')).toBe(false); + }); +}); diff --git a/src/renderer/components/IndexNewButton.tsx b/src/renderer/components/IndexNewButton.tsx new file mode 100644 index 0000000..f3b9d11 --- /dev/null +++ b/src/renderer/components/IndexNewButton.tsx @@ -0,0 +1,56 @@ +import { useCallback, useEffect, useState } from 'react'; +import type { IndexingStats } from '../../shared/types'; + +export function IndexNewButton() { + const [stats, setStats] = useState(null); + const [isIndexing, setIsIndexing] = useState(false); + + const refreshStats = useCallback(async () => { + try { + const result = await window.electronAPI.getIndexingStats(); + setStats(result); + } catch { + // Vector store may not be available + } + }, []); + + useEffect(() => { + refreshStats(); + + const unsubIndex = window.electronAPI.onIndexingProgress((progress) => { + if (progress.status === 'indexing') { + setIsIndexing(true); + } else if (progress.status === 'complete') { + setIsIndexing(false); + refreshStats(); + } else if (progress.status === 'indexed' || progress.status === 'error') { + refreshStats(); + } + }); + + return unsubIndex; + }, [refreshStats]); + + const handleClick = useCallback(async () => { + setIsIndexing(true); + await window.electronAPI.reindexAllPapers(); + }, []); + + const pending = (stats?.pending ?? 0) + (stats?.failed ?? 0); + if (pending === 0 && !isIndexing) return null; + + return ( + + ); +} diff --git a/src/renderer/components/LibraryList.tsx b/src/renderer/components/LibraryList.tsx index a65af16..cc5d06c 100644 --- a/src/renderer/components/LibraryList.tsx +++ b/src/renderer/components/LibraryList.tsx @@ -1,4 +1,4 @@ -import { useEffect } from 'react'; +import { useEffect, useState } from 'react'; import type { PaperFilter } from '../../shared/types'; import { usePaperStore } from '../stores/paperStore'; import { toast } from '../stores/toastStore'; @@ -9,6 +9,8 @@ export function LibraryList() { const { sidebarView, selectedCollectionId, selectedTagId, sortBy, sortOrder } = useUIStore(); const { papers, selectedLibraryPaper, setSelectedLibraryPaper, loadPapers, loading, addTagToPaper, tags } = usePaperStore(); + const [activelyIndexingPaperId, setActivelyIndexingPaperId] = useState(null); + const [indexingActive, setIndexingActive] = useState(false); useEffect(() => { const filter: PaperFilter = { view: sidebarView as PaperFilter['view'], sortBy, sortOrder }; @@ -20,10 +22,27 @@ export function LibraryList() { } loadPapers(filter); - const unsubscribe = window.electronAPI.onPapersChanged(() => { + const unsubPapers = window.electronAPI.onPapersChanged(() => { loadPapers(filter); }); - return unsubscribe; + const unsubIndexing = window.electronAPI.onIndexingProgress((progress) => { + if (progress.status === 'indexing') { + setActivelyIndexingPaperId(progress.paperId); + setIndexingActive(true); + } else if (progress.status === 'complete') { + setActivelyIndexingPaperId(null); + setIndexingActive(false); + loadPapers(filter); + } else { + // 'indexed' or 'error' — clear active paper, refresh to pick up new DB status + setActivelyIndexingPaperId(null); + loadPapers(filter); + } + }); + return () => { + unsubPapers(); + unsubIndexing(); + }; // eslint-disable-next-line react-hooks/exhaustive-deps }, [sidebarView, selectedCollectionId, selectedTagId, sortBy, sortOrder, loadPapers]); @@ -77,6 +96,11 @@ export function LibraryList() { categories={paper.categories} isSelected={selectedLibraryPaper?.id === paper.id} isFavorite={paper.isFavorite} + embeddingStatus={paper.embeddingStatus} + isActivelyIndexing={paper.id === activelyIndexingPaperId} + isQueuedForIndexing={ + indexingActive && paper.embeddingStatus !== 'complete' && paper.id !== activelyIndexingPaperId + } onClick={() => { setSelectedLibraryPaper(paper); useUIStore.getState().setFocusedPaperIndex(index); diff --git a/src/renderer/components/PaperList.tsx b/src/renderer/components/PaperList.tsx index 7594d7d..3c3dce0 100644 --- a/src/renderer/components/PaperList.tsx +++ b/src/renderer/components/PaperList.tsx @@ -1,6 +1,7 @@ import { useSearch } from '../hooks/useSearch'; import { usePaperStore } from '../stores/paperStore'; import { useUIStore } from '../stores/uiStore'; +import { IndexNewButton } from './IndexNewButton'; import { LibraryList } from './LibraryList'; import { LibrarySearchResults } from './LibrarySearchResults'; import { NavigationHints } from './NavigationHints'; @@ -66,6 +67,7 @@ export function PaperList({ width }: { width: number }) { {getViewTitle(sidebarView, collections, tags, selectedCollectionId, selectedTagId)} + void; rightSlot?: React.ReactNode; paperId?: string; @@ -30,6 +33,51 @@ interface PaperListItemProps { paperIndex?: number; } +function EmbeddingStatusBadge({ + status, + isActivelyIndexing, + isQueued, +}: { + status: 'pending' | 'indexing' | 'complete' | 'failed'; + isActivelyIndexing?: boolean; + isQueued?: boolean; +}) { + if (isActivelyIndexing) { + return ( + + ); + } + + if (isQueued) { + return ( + + ); + } + + switch (status) { + case 'complete': + return ( + + ); + case 'failed': + return ; + case 'indexing': + return ( + + ); + case 'pending': + return ( + + ); + } +} + export function PaperListItem({ title, authors, @@ -38,6 +86,9 @@ export function PaperListItem({ isSelected, isFavorite, inLibrary, + embeddingStatus, + isActivelyIndexing, + isQueuedForIndexing, onClick, rightSlot, paperId, @@ -96,6 +147,13 @@ export function PaperListItem({

{truncateAuthors(authors)}

{formatDate(date)} + {embeddingStatus && ( + + )} {inLibrary && ( In Library diff --git a/src/renderer/hooks/useSearch.ts b/src/renderer/hooks/useSearch.ts index fd304e9..74f65aa 100644 --- a/src/renderer/hooks/useSearch.ts +++ b/src/renderer/hooks/useSearch.ts @@ -35,7 +35,8 @@ export function useSearch() { const results = await window.electronAPI.searchArxiv(query); setState((prev) => ({ ...prev, results, libraryResults: [], loading: false })); } else { - const libraryResults = await window.electronAPI.searchLibrary(query); + const semanticResults = await window.electronAPI.semanticSearch(query); + const libraryResults = semanticResults.map((r) => r.paper); setState((prev) => ({ ...prev, libraryResults, results: [], loading: false })); } } catch (err) { diff --git a/src/renderer/stores/paperStore.ts b/src/renderer/stores/paperStore.ts index 3b823e9..bf0695e 100644 --- a/src/renderer/stores/paperStore.ts +++ b/src/renderer/stores/paperStore.ts @@ -107,7 +107,8 @@ export const usePaperStore = create((set, get) => ({ // --- Papers --- loadPapers: async (filter) => { - set({ loading: true }); + const isInitialLoad = get().papers.length === 0; + if (isInitialLoad) set({ loading: true }); const papers = await window.electronAPI.getPapers(filter); set({ papers, loading: false }); }, diff --git a/src/shared/types.ts b/src/shared/types.ts index 63746ad..9570d99 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -29,6 +29,7 @@ export interface LibraryPaper { fullText: string | null; isFavorite: boolean; createdAt: string; + embeddingStatus?: 'pending' | 'indexing' | 'complete' | 'failed'; collections: Collection[]; tags: Tag[]; } @@ -103,6 +104,37 @@ export interface ViewerState { scrollLeft: number; } +// --- Semantic Search --- + +export interface SemanticSearchResult { + paper: LibraryPaper; + score: number; + matchType: 'hybrid' | 'keyword' | 'semantic'; +} + +export interface IndexingProgress { + paperId: string; + paperTitle: string; + current: number; + total: number; + status: 'indexing' | 'indexed' | 'complete' | 'error'; + error?: string; +} + +export interface EmbeddingProgress { + status: 'downloading' | 'loading' | 'ready' | 'error'; + progress?: number; + file?: string; + error?: string; +} + +export interface IndexingStats { + totalPapers: number; + indexed: number; + pending: number; + failed: number; +} + export type SortBy = 'created_at' | 'published_date' | 'title' | 'first_author'; export type SortOrder = 'asc' | 'desc'; @@ -187,6 +219,10 @@ export interface ElectronAPI { toggleFavorite: (id: string) => Promise; checkPapersInLibrary: (arxivIds: string[]) => Promise; searchLibrary: (query: string) => Promise; + semanticSearch: (query: string) => Promise; + getIndexingStats: () => Promise; + reindexAllPapers: () => Promise; + reindexPaper: (paperId: string) => Promise; importLocalPdfs: () => Promise; importFiles: (filePaths: string[]) => Promise; getPathForFile: (file: File) => string; @@ -281,6 +317,8 @@ export interface ElectronAPI { onAnnotationsChanged: (callback: () => void) => () => void; onImportProgress: (callback: (progress: ImportProgress) => void) => () => void; onMetadataResolutionProgress: (callback: (progress: MetadataResolutionProgress) => void) => () => void; + onEmbeddingProgress: (callback: (progress: EmbeddingProgress) => void) => () => void; + onIndexingProgress: (callback: (progress: IndexingProgress) => void) => () => void; } declare global {