diff --git a/.changeset/busy-sheep-burn.md b/.changeset/busy-sheep-burn.md new file mode 100644 index 00000000..ff44e517 --- /dev/null +++ b/.changeset/busy-sheep-burn.md @@ -0,0 +1,5 @@ +--- +"evalite": minor +--- + +Added forceRerunTriggers to the config to match Vitest's version. diff --git a/apps/evalite-docs/src/content/docs/api/cli.mdx b/apps/evalite-docs/src/content/docs/api/cli.mdx index 905e2a48..da5df10a 100644 --- a/apps/evalite-docs/src/content/docs/api/cli.mdx +++ b/apps/evalite-docs/src/content/docs/api/cli.mdx @@ -74,6 +74,29 @@ evalite watch path/to/eval.eval.ts **Note:** `--outputPath` is not supported in watch mode. +#### Watching Additional Files + +By default, `evalite watch` only triggers reruns when your `*.eval.ts` files change. + +If your evals depend on other files that Vitest can't automatically detect (e.g., prompt templates, external data files, or CLI build outputs), you can configure extra watch globs in `evalite.config.ts`: + +```ts +// evalite.config.ts +import { defineConfig } from "evalite/config"; + +export default defineConfig({ + forceRerunTriggers: [ + "src/**/*.ts", // helper / model code + "prompts/**/*", // prompt templates + "data/**/*.json", // test data + ], +}); +``` + +These globs are passed through to Vitest's [`forceRerunTriggers`](https://vitest.dev/config/#forcereruntriggers) option, so any change to a matching file will trigger a full eval rerun. + +> **Note:** Globs are resolved relative to the directory where you run evalite (the Evalite cwd). + **Examples:** ```bash diff --git a/apps/evalite-docs/src/content/docs/api/define-config.mdx b/apps/evalite-docs/src/content/docs/api/define-config.mdx index 99b3f5a0..8c786c16 100644 --- a/apps/evalite-docs/src/content/docs/api/define-config.mdx +++ b/apps/evalite-docs/src/content/docs/api/define-config.mdx @@ -18,6 +18,9 @@ defineConfig(config: { maxConcurrency?: number; trialCount?: number; setupFiles?: string[]; + cache?: boolean; + viteConfig?: ViteUserConfig; + forceRerunTriggers?: string[]; }): Evalite.Config ``` @@ -167,6 +170,57 @@ export default defineConfig({ **Note:** `.env` files are loaded automatically via `dotenv/config` - no need to configure them here. +### `cache` + +**Type:** `boolean` + +**Default:** `true` + +Enable or disable caching of AI SDK model outputs. See [Vercel AI SDK caching](/tips/vercel-ai-sdk#caching) for details. + +```typescript +export default defineConfig({ + cache: false, // Disable cache entirely +}); +``` + +### `viteConfig` + +**Type:** `ViteUserConfig` + +Pass-through Vite/Vitest configuration options. This allows you to import and use your existing `vite.config.ts` explicitly. + +```typescript +import { defineConfig } from "evalite/config"; +import viteConfig from "./vite.config.ts"; + +export default defineConfig({ + viteConfig: viteConfig, +}); +``` + +**Note:** `testTimeout`, `maxConcurrency`, and `setupFiles` must be configured at the root level of `evalite.config.ts`, not in `viteConfig.test`. + +### `forceRerunTriggers` + +**Type:** `string[]` + +**Default:** `[]` + +Extra file globs that trigger eval reruns in watch mode. This maps onto Vitest's [`forceRerunTriggers`](https://vitest.dev/config/#forcereruntriggers) option. + +```typescript +export default defineConfig({ + forceRerunTriggers: [ + "src/**/*.ts", // helper / model code + "prompts/**/*", // prompt templates + "data/**/*.json", // test data + ], +}); +``` + +Useful when your evals depend on files that Vitest can't automatically detect as dependencies (e.g., prompt templates, external data files). + ## Complete Example ```typescript @@ -196,6 +250,12 @@ export default defineConfig({ // Setup setupFiles: ["./test-setup.ts"], + + // Caching + cache: true, + + // Watch mode triggers + forceRerunTriggers: ["src/**/*.ts", "prompts/**/*"], }); ``` diff --git a/apps/evalite-docs/src/content/docs/api/run-evalite.mdx b/apps/evalite-docs/src/content/docs/api/run-evalite.mdx index b9f3912f..7d2d5a76 100644 --- a/apps/evalite-docs/src/content/docs/api/run-evalite.mdx +++ b/apps/evalite-docs/src/content/docs/api/run-evalite.mdx @@ -15,6 +15,7 @@ runEvalite(opts: { outputPath?: string; hideTable?: boolean; storage?: Evalite.Storage; + forceRerunTriggers?: string[]; }): Promise ``` @@ -140,6 +141,23 @@ await runEvalite({ See [Storage](/api/storage) for more details. +### `opts.forceRerunTriggers` + +**Type:** `string[]` (optional) + +Extra file globs that trigger eval reruns in watch mode. This overrides any `forceRerunTriggers` setting in `evalite.config.ts`. + +```typescript +await runEvalite({ + mode: "watch-for-file-changes", + forceRerunTriggers: ["src/**/*.ts", "prompts/**/*", "data/**/*.json"], +}); +``` + +This is useful when your evals depend on files that aren't automatically detected as dependencies (e.g., prompt templates, external data files). + +> **Tip:** `forceRerunTriggers` globs are evaluated relative to the `cwd` you pass (or `process.cwd()` if unspecified). + ## Usage Examples ### Basic CI/CD Script diff --git a/apps/evalite-docs/src/content/docs/guides/configuration.mdx b/apps/evalite-docs/src/content/docs/guides/configuration.mdx index ca4769a1..7a6c1780 100644 --- a/apps/evalite-docs/src/content/docs/guides/configuration.mdx +++ b/apps/evalite-docs/src/content/docs/guides/configuration.mdx @@ -36,6 +36,7 @@ export default defineConfig({ - **`setupFiles`**: Array of file paths to run before tests (e.g., for loading environment variables). - **`cache`**: Enable or disable caching of AI SDK model outputs. Default is true. See [Vercel AI SDK](/tips/vercel-ai-sdk#caching) for details. - **`viteConfig`**: Pass through Vite/Vitest configuration options. This allows you to import and use your existing vite.config.ts explicitly. +- **`forceRerunTriggers`**: Extra file globs that trigger eval reruns in watch mode. This maps onto Vitest's `test.forceRerunTriggers` option (globs resolved relative to the directory where you run Evalite). See [Watching Additional Files](/api/cli#watching-additional-files). ## Important Configuration Options diff --git a/packages/evalite-tests/tests/fixtures/config-watchfiles/evalite.config.ts b/packages/evalite-tests/tests/fixtures/config-watchfiles/evalite.config.ts new file mode 100644 index 00000000..a7975299 --- /dev/null +++ b/packages/evalite-tests/tests/fixtures/config-watchfiles/evalite.config.ts @@ -0,0 +1,5 @@ +import { defineConfig } from "evalite/config"; + +export default defineConfig({ + forceRerunTriggers: ["src/**/*.ts", "data/**/*.json"], +}); diff --git a/packages/evalite-tests/tests/fixtures/config-watchfiles/test.eval.ts b/packages/evalite-tests/tests/fixtures/config-watchfiles/test.eval.ts new file mode 100644 index 00000000..caeecfe3 --- /dev/null +++ b/packages/evalite-tests/tests/fixtures/config-watchfiles/test.eval.ts @@ -0,0 +1,7 @@ +import { evalite } from "evalite"; + +evalite("WatchFiles Config Test", { + data: () => [{ input: "hello", expected: "hello" }], + task: async (input) => input, + scorers: [], +}); diff --git a/packages/evalite-tests/tests/test-utils.ts b/packages/evalite-tests/tests/test-utils.ts index 6d955079..b1cff940 100644 --- a/packages/evalite-tests/tests/test-utils.ts +++ b/packages/evalite-tests/tests/test-utils.ts @@ -1,11 +1,12 @@ import { randomUUID } from "crypto"; +import type { Evalite } from "evalite"; +import { DB_LOCATION } from "evalite/backend-only-constants"; +import { createInMemoryStorage } from "evalite/in-memory-storage"; +import { runEvalite } from "evalite/runner"; import { cpSync, rmSync } from "fs"; import path from "path"; import { Writable } from "stream"; import stripAnsi from "strip-ansi"; -import type { Evalite } from "evalite"; -import { runEvalite } from "evalite/runner"; -import { createInMemoryStorage } from "evalite/in-memory-storage"; import type { Vitest } from "vitest/node"; const FIXTURES_DIR = path.join(import.meta.dirname, "./fixtures"); @@ -29,7 +30,7 @@ export const loadFixture = async ( const captured = captureStdout(); - let vitestInstance: Vitest | undefined = undefined; + let vitestInstance: Vitest | undefined; return { dir: dirPath, @@ -69,6 +70,7 @@ export const loadFixture = async ( * Enable cache for AI SDK model outputs. */ cacheEnabled?: boolean; + forceRerunTriggers?: string[]; }) => { const result = await runEvalite({ ...opts, diff --git a/packages/evalite-tests/tests/watch-files.test.ts b/packages/evalite-tests/tests/watch-files.test.ts new file mode 100644 index 00000000..237af4fa --- /dev/null +++ b/packages/evalite-tests/tests/watch-files.test.ts @@ -0,0 +1,57 @@ +import { expect, it } from "vitest"; +import { getSuitesAsRecordViaStorage, loadFixture } from "./test-utils.js"; + +it("forceRerunTriggers in evalite.config.ts should configure Vitest forceRerunTriggers", async () => { + await using fixture = await loadFixture("config-watchfiles"); + + const vitest = await fixture.run({ + mode: "run-once-and-exit", + }); + + // Verify the forceRerunTriggers includes our configured triggers + const forceRerunTriggers = vitest.config.forceRerunTriggers; + + expect(forceRerunTriggers).toContain("src/**/*.ts"); + expect(forceRerunTriggers).toContain("data/**/*.json"); + + const suites = await getSuitesAsRecordViaStorage(fixture.storage); + + // Should complete successfully + expect(suites["WatchFiles Config Test"]).toHaveLength(1); + expect(suites["WatchFiles Config Test"]?.[0]?.status).toBe("success"); +}); + +it("forceRerunTriggers passed to runEvalite should override evalite.config.ts", async () => { + await using fixture = await loadFixture("config-watchfiles"); + + // Override the config's forceRerunTriggers with different values + const vitest = await fixture.run({ + mode: "run-once-and-exit", + forceRerunTriggers: ["custom/**/*.md"], + }); + + const forceRerunTriggers = vitest.config.forceRerunTriggers; + + // Should contain the override value + expect(forceRerunTriggers).toContain("custom/**/*.md"); + + // Should NOT contain the config file values since we overrode them + expect(forceRerunTriggers).not.toContain("src/**/*.ts"); + expect(forceRerunTriggers).not.toContain("data/**/*.json"); +}); + +it("empty forceRerunTriggers array should not add any extra triggers", async () => { + await using fixture = await loadFixture("config-watchfiles"); + + // Override with empty array - should result in only Vitest defaults + const vitest = await fixture.run({ + mode: "run-once-and-exit", + forceRerunTriggers: [], + }); + + const forceRerunTriggers = vitest.config.forceRerunTriggers; + + // Should NOT contain the config file values since we overrode with empty array + expect(forceRerunTriggers).not.toContain("src/**/*.ts"); + expect(forceRerunTriggers).not.toContain("data/**/*.json"); +}); diff --git a/packages/evalite/package-lock.json b/packages/evalite/package-lock.json new file mode 100644 index 00000000..2df591ba --- /dev/null +++ b/packages/evalite/package-lock.json @@ -0,0 +1,721 @@ +{ + "name": "evalite", + "version": "0.19.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "evalite", + "version": "0.19.0", + "dependencies": { + "@ai-sdk/provider": "^2.0.0", + "@fastify/static": "^8.2.0", + "@fastify/websocket": "11.2.0", + "@stricli/auto-complete": "^1.2.0", + "@stricli/core": "^1.2.0", + "@vitest/runner": "^4.0.0", + "@vitest/utils": "^4.0.1", + "better-sqlite3": "^11.6.0", + "fastify": "^5.6.1", + "file-type": "^19.6.0", + "jiti": "^2.6.1", + "table": "^6.9.0", + "tinyrainbow": "^3.0.3" + }, + "bin": { + "evalite": "dist/bin.js" + }, + "devDependencies": { + "@types/better-sqlite3": "^7.6.13", + "@types/ws": "^8.18.1", + "ai": "^5.0.59", + "autoevals": "^0.0.131", + "unstorage": "^1.17.1" + } + }, + "../../node_modules/.pnpm/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider": { + "version": "2.0.0", + "license": "Apache-2.0", + "dependencies": { + "json-schema": "^0.4.0" + }, + "devDependencies": { + "@types/json-schema": "7.0.15", + "@types/node": "20.17.24", + "@vercel/ai-tsconfig": "0.0.0", + "tsup": "^8", + "typescript": "5.8.3" + }, + "engines": { + "node": ">=18" + } + }, + "../../node_modules/.pnpm/@fastify+static@8.2.0/node_modules/@fastify/static": { + "version": "8.2.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/accept-negotiator": "^2.0.0", + "@fastify/send": "^4.0.0", + "content-disposition": "^0.5.4", + "fastify-plugin": "^5.0.0", + "fastq": "^1.17.1", + "glob": "^11.0.0" + }, + "devDependencies": { + "@fastify/compress": "^8.0.0", + "@fastify/pre-commit": "^2.1.0", + "@types/node": "^22.0.0", + "borp": "^0.20.0", + "c8": "^10.1.3", + "concat-stream": "^2.0.0", + "eslint": "^9.17.0", + "fastify": "^5.1.0", + "neostandard": "^0.12.0", + "pino": "^9.1.0", + "proxyquire": "^2.1.3", + "tsd": "^0.32.0" + } + }, + "../../node_modules/.pnpm/@fastify+websocket@11.2.0/node_modules/@fastify/websocket": { + "version": "11.2.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "duplexify": "^4.1.3", + "fastify-plugin": "^5.0.0", + "ws": "^8.16.0" + }, + "devDependencies": { + "@fastify/pre-commit": "^2.1.0", + "@fastify/type-provider-typebox": "^5.0.0", + "@types/node": "^24.0.9", + "@types/ws": "^8.5.10", + "c8": "^10.1.3", + "eslint": "^9.17.0", + "fastify": "^5.0.0", + "fastify-tsconfig": "^3.0.0", + "neostandard": "^0.12.0", + "split2": "^4.2.0", + "tsd": "^0.32.0" + } + }, + "../../node_modules/.pnpm/@stricli+auto-complete@1.2.0/node_modules/@stricli/auto-complete": { + "version": "1.2.0", + "license": "Apache-2.0", + "dependencies": { + "@stricli/core": "^1.2.0" + }, + "bin": { + "auto-complete": "dist/bin/cli.js" + }, + "devDependencies": { + "@typescript-eslint/eslint-plugin": "^8.2.0", + "@typescript-eslint/parser": "^8.2.0", + "eslint": "^8.57.0", + "eslint-plugin-import": "^2.26.0", + "eslint-plugin-prettier": "^5.0.0", + "prettier": "^3.0.0", + "tsup": "^6.7.0", + "typescript": "5.6.x" + } + }, + "../../node_modules/.pnpm/@stricli+core@1.2.0/node_modules/@stricli/core": { + "version": "1.2.0", + "license": "Apache-2.0", + "devDependencies": { + "@types/chai": "^4.3.11", + "@types/fs-extra": "^11.0.4", + "@types/mocha": "^10.0.6", + "@types/sinon": "^17.0.2", + "@typescript-eslint/eslint-plugin": "^8.2.0", + "@typescript-eslint/parser": "^8.2.0", + "c8": "^8.0.1", + "chai": "^4.3.10", + "eslint": "^8.57.0", + "eslint-plugin-header": "^3.1.1", + "eslint-plugin-import": "^2.29.1", + "eslint-plugin-prettier": "^5.1.3", + "fs-extra": "^11.2.0", + "mocha": "^10.2.0", + "prettier": "^3.2.5", + "sinon": "^17.0.1", + "tsup": "^8.0.1", + "tsx": "^4.8.2", + "typescript": "5.6.x" + } + }, + "../../node_modules/.pnpm/@types+better-sqlite3@7.6.13/node_modules/@types/better-sqlite3": { + "version": "7.6.13", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "../../node_modules/.pnpm/@types+ws@8.18.1/node_modules/@types/ws": { + "version": "8.18.1", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "../../node_modules/.pnpm/@vitest+runner@4.0.1/node_modules/@vitest/runner": { + "version": "4.0.1", + "license": "MIT", + "dependencies": { + "@vitest/utils": "4.0.1", + "pathe": "^2.0.3" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "../../node_modules/.pnpm/@vitest+utils@4.0.1/node_modules/@vitest/utils": { + "version": "4.0.1", + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "4.0.1", + "tinyrainbow": "^3.0.3" + }, + "devDependencies": { + "@jridgewell/trace-mapping": "0.3.31", + "@types/estree": "^1.0.8", + "diff-sequences": "^29.6.3", + "loupe": "^3.2.1", + "tinyhighlight": "^0.3.2" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "../../node_modules/.pnpm/ai@5.0.59_zod@3.25.76/node_modules/ai": { + "version": "5.0.59", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@ai-sdk/gateway": "1.0.32", + "@ai-sdk/provider": "2.0.0", + "@ai-sdk/provider-utils": "3.0.10", + "@opentelemetry/api": "1.9.0" + }, + "devDependencies": { + "@edge-runtime/vm": "^5.0.0", + "@types/json-schema": "7.0.15", + "@types/node": "20.17.24", + "@vercel/ai-tsconfig": "0.0.0", + "eslint": "8.57.1", + "eslint-config-vercel-ai": "0.0.0", + "tsup": "^7.2.0", + "typescript": "5.8.3", + "zod": "3.25.76" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "zod": "^3.25.76 || ^4.1.8" + } + }, + "../../node_modules/.pnpm/autoevals@0.0.131_ws@8.18.0/node_modules/autoevals": { + "version": "0.0.131", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^8.17.1", + "compute-cosine-similarity": "^1.1.0", + "js-levenshtein": "^1.1.6", + "js-yaml": "^4.1.0", + "linear-sum-assignment": "^1.0.7", + "mustache": "^4.2.0", + "openai": "^4.104.0", + "zod": "^3.25.76", + "zod-to-json-schema": "^3.24.6" + }, + "devDependencies": { + "@rollup/plugin-yaml": "^4.1.2", + "@types/js-levenshtein": "^1.1.3", + "@types/js-yaml": "^4.0.9", + "@types/mustache": "^4.2.6", + "@types/node": "^20.19.11", + "msw": "^2.10.5", + "tsup": "^8.5.0", + "tsx": "^3.14.0", + "typedoc": "^0.25.13", + "typedoc-plugin-markdown": "^3.17.1", + "typescript": "^5.9.2", + "vitest": "^2.1.9" + } + }, + "../../node_modules/.pnpm/better-sqlite3@11.6.0/node_modules/better-sqlite3": { + "version": "11.6.0", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "bindings": "^1.5.0", + "prebuild-install": "^7.1.1" + }, + "devDependencies": { + "chai": "^4.3.8", + "cli-color": "^2.0.3", + "fs-extra": "^11.1.1", + "mocha": "^10.2.0", + "nodemark": "^0.3.0", + "prebuild": "^13.0.0", + "sqlite": "^5.0.1", + "sqlite3": "^5.1.6" + } + }, + "../../node_modules/.pnpm/fastify@5.6.1/node_modules/fastify": { + "version": "5.6.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/ajv-compiler": "^4.0.0", + "@fastify/error": "^4.0.0", + "@fastify/fast-json-stringify-compiler": "^5.0.0", + "@fastify/proxy-addr": "^5.0.0", + "abstract-logging": "^2.0.1", + "avvio": "^9.0.0", + "fast-json-stringify": "^6.0.0", + "find-my-way": "^9.0.0", + "light-my-request": "^6.0.0", + "pino": "^9.0.0", + "process-warning": "^5.0.0", + "rfdc": "^1.3.1", + "secure-json-parse": "^4.0.0", + "semver": "^7.6.0", + "toad-cache": "^3.7.0" + }, + "devDependencies": { + "@fastify/pre-commit": "^2.1.0", + "@jsumners/line-reporter": "^1.0.1", + "@sinclair/typebox": "^0.34.13", + "@sinonjs/fake-timers": "^11.2.2", + "@stylistic/eslint-plugin": "^5.1.0", + "@stylistic/eslint-plugin-js": "^4.1.0", + "@types/node": "^24.0.12", + "ajv": "^8.12.0", + "ajv-errors": "^3.0.0", + "ajv-formats": "^3.0.1", + "ajv-i18n": "^4.2.0", + "ajv-merge-patch": "^5.0.1", + "autocannon": "^8.0.0", + "borp": "^0.20.0", + "branch-comparer": "^1.1.0", + "concurrently": "^9.1.2", + "cross-env": "^10.0.0", + "eslint": "^9.0.0", + "fast-json-body": "^1.1.0", + "fastify-plugin": "^5.0.0", + "fluent-json-schema": "^6.0.0", + "h2url": "^0.2.0", + "http-errors": "^2.0.0", + "joi": "^17.12.3", + "json-schema-to-ts": "^3.0.1", + "JSONStream": "^1.3.5", + "markdownlint-cli2": "^0.18.1", + "neostandard": "^0.12.0", + "node-forge": "^1.3.1", + "proxyquire": "^2.1.3", + "split2": "^4.2.0", + "tsd": "^0.32.0", + "typescript": "~5.9.2", + "undici": "^7.11.0", + "vary": "^1.1.2", + "yup": "^1.4.0" + } + }, + "../../node_modules/.pnpm/file-type@19.6.0/node_modules/file-type": { + "version": "19.6.0", + "license": "MIT", + "dependencies": { + "get-stream": "^9.0.1", + "strtok3": "^9.0.1", + "token-types": "^6.0.0", + "uint8array-extras": "^1.3.0" + }, + "devDependencies": { + "@tokenizer/token": "^0.3.0", + "@types/node": "^20.10.7", + "ava": "^6.0.1", + "commonmark": "^0.30.0", + "noop-stream": "^1.0.0", + "tsd": "^0.30.3", + "xo": "^0.56.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sindresorhus/file-type?sponsor=1" + } + }, + "../../node_modules/.pnpm/jiti@2.6.1/node_modules/jiti": { + "version": "2.6.1", + "license": "MIT", + "bin": { + "jiti": "lib/jiti-cli.mjs" + }, + "devDependencies": { + "@babel/core": "^7.28.4", + "@babel/helper-module-imports": "^7.27.1", + "@babel/helper-module-transforms": "^7.28.3", + "@babel/helper-plugin-utils": "^7.27.1", + "@babel/helper-simple-access": "^7.27.1", + "@babel/plugin-proposal-decorators": "^7.28.0", + "@babel/plugin-syntax-class-properties": "^7.12.13", + "@babel/plugin-syntax-import-assertions": "^7.27.1", + "@babel/plugin-syntax-jsx": "^7.27.1", + "@babel/plugin-transform-export-namespace-from": "^7.27.1", + "@babel/plugin-transform-react-jsx": "^7.27.1", + "@babel/plugin-transform-typescript": "^7.28.0", + "@babel/preset-typescript": "^7.27.1", + "@babel/template": "^7.27.2", + "@babel/traverse": "^7.28.4", + "@babel/types": "^7.28.4", + "@rspack/cli": "^1.5.8", + "@rspack/core": "^1.5.8", + "@types/babel__core": "^7.20.5", + "@types/babel__helper-module-imports": "^7.18.3", + "@types/babel__helper-plugin-utils": "^7.10.3", + "@types/babel__template": "^7.4.4", + "@types/babel__traverse": "^7.28.0", + "@types/node": "^24.6.1", + "@vitest/coverage-v8": "^3.2.4", + "acorn": "^8.15.0", + "babel-plugin-parameter-decorator": "^1.0.16", + "changelogen": "^0.6.2", + "config": "^4.1.1", + "consola": "^3.4.2", + "defu": "^6.1.4", + "destr": "^2.0.5", + "escape-string-regexp": "^5.0.0", + "eslint": "^9.36.0", + "eslint-config-unjs": "^0.5.0", + "estree-walker": "^3.0.3", + "etag": "^1.8.1", + "fast-glob": "^3.3.3", + "is-installed-globally": "^1.0.0", + "mime": "^4.1.0", + "mlly": "^1.8.0", + "moment-timezone": "^0.6.0", + "nano-jsx": "^0.2.0", + "pathe": "^2.0.3", + "pkg-types": "^2.3.0", + "preact": "^10.27.2", + "preact-render-to-string": "^6.6.2", + "prettier": "^3.6.2", + "react": "^19.1.1", + "react-dom": "^19.1.1", + "reflect-metadata": "^0.2.2", + "solid-js": "^1.9.9", + "std-env": "^3.9.0", + "terser-webpack-plugin": "^5.3.14", + "tinyexec": "^1.0.1", + "ts-loader": "^9.5.4", + "typescript": "^5.9.3", + "vitest": "^3.2.4", + "vue": "^3.5.22", + "yoctocolors": "^2.1.2", + "zod": "^4.1.11" + } + }, + "../../node_modules/.pnpm/table@6.9.0/node_modules/table": { + "version": "6.9.0", + "license": "BSD-3-Clause", + "dependencies": { + "ajv": "^8.0.1", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1" + }, + "devDependencies": { + "@types/chai": "^4.2.16", + "@types/lodash.mapvalues": "^4.6.6", + "@types/lodash.truncate": "^4.4.6", + "@types/mocha": "^9.0.0", + "@types/node": "^14.14.37", + "@types/sinon": "^10.0.0", + "@types/slice-ansi": "^4.0.0", + "ajv-cli": "^5.0.0", + "ajv-keywords": "^5.0.0", + "chai": "^4.2.0", + "chalk": "^4.1.0", + "coveralls": "^3.1.0", + "eslint": "^7.32.0", + "eslint-config-canonical": "^25.0.0", + "gitdown": "^3.1.4", + "husky": "^4.3.6", + "js-beautify": "^1.14.0", + "lodash.mapvalues": "^4.6.0", + "mkdirp": "^1.0.4", + "mocha": "^8.2.1", + "nyc": "^15.1.0", + "semantic-release": "^17.3.1", + "sinon": "^12.0.1", + "ts-node": "^9.1.1", + "typescript": "4.5.2" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "../../node_modules/.pnpm/tinyrainbow@3.0.3/node_modules/tinyrainbow": { + "version": "3.0.3", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "../../node_modules/.pnpm/unstorage@1.17.1/node_modules/unstorage": { + "version": "1.17.1", + "dev": true, + "license": "MIT", + "dependencies": { + "anymatch": "^3.1.3", + "chokidar": "^4.0.3", + "destr": "^2.0.5", + "h3": "^1.15.4", + "lru-cache": "^10.4.3", + "node-fetch-native": "^1.6.7", + "ofetch": "^1.4.1", + "ufo": "^1.6.1" + }, + "devDependencies": { + "@azure/app-configuration": "^1.9.0", + "@azure/cosmos": "^4.5.0", + "@azure/data-tables": "^13.3.1", + "@azure/identity": "^4.11.1", + "@azure/keyvault-secrets": "^4.10.0", + "@azure/storage-blob": "^12.28.0", + "@capacitor/preferences": "^7.0.2", + "@cloudflare/workers-types": "^4.20250903.0", + "@deno/kv": "^0.12.0", + "@electric-sql/pglite": "^0.3.7", + "@libsql/client": "^0.15.14", + "@netlify/blobs": "^10.0.10", + "@planetscale/database": "^1.19.0", + "@types/deno": "^2.3.0", + "@types/ioredis-mock": "^8.2.6", + "@types/jsdom": "^21.1.7", + "@types/node": "^24.3.0", + "@upstash/redis": "^1.35.3", + "@vercel/blob": "^1.1.1", + "@vercel/functions": "^3.0.0", + "@vercel/kv": "^3.0.0", + "@vitest/coverage-v8": "^3.2.4", + "aws4fetch": "^1.0.20", + "azurite": "^3.35.0", + "better-sqlite3": "^12.2.0", + "changelogen": "^0.6.2", + "citty": "^0.1.6", + "db0": "^0.3.2", + "eslint": "^9.34.0", + "eslint-config-unjs": "^0.5.0", + "fake-indexeddb": "^6.2.2", + "get-port-please": "^3.2.0", + "idb-keyval": "^6.2.2", + "ioredis": "^5.7.0", + "ioredis-mock": "^8.9.0", + "jiti": "^2.5.1", + "jsdom": "^26.1.0", + "listhen": "^1.9.0", + "mitata": "^1.0.34", + "mlly": "^1.8.0", + "mongodb": "^6.19.0", + "mongodb-memory-server": "^10.2.0", + "prettier": "^3.6.2", + "scule": "^1.3.0", + "types-cloudflare-worker": "^1.2.0", + "typescript": "^5.9.2", + "unbuild": "^3.6.1", + "uploadthing": "^7.7.4", + "vite": "^7.1.4", + "vitest": "^3.2.4", + "wrangler": "^4.33.2" + }, + "peerDependencies": { + "@azure/app-configuration": "^1.8.0", + "@azure/cosmos": "^4.2.0", + "@azure/data-tables": "^13.3.0", + "@azure/identity": "^4.6.0", + "@azure/keyvault-secrets": "^4.9.0", + "@azure/storage-blob": "^12.26.0", + "@capacitor/preferences": "^6.0.3 || ^7.0.0", + "@deno/kv": ">=0.9.0", + "@netlify/blobs": "^6.5.0 || ^7.0.0 || ^8.1.0 || ^9.0.0 || ^10.0.0", + "@planetscale/database": "^1.19.0", + "@upstash/redis": "^1.34.3", + "@vercel/blob": ">=0.27.1", + "@vercel/functions": "^2.2.12 || ^3.0.0", + "@vercel/kv": "^1.0.1", + "aws4fetch": "^1.0.20", + "db0": ">=0.2.1", + "idb-keyval": "^6.2.1", + "ioredis": "^5.4.2", + "uploadthing": "^7.4.4" + }, + "peerDependenciesMeta": { + "@azure/app-configuration": { + "optional": true + }, + "@azure/cosmos": { + "optional": true + }, + "@azure/data-tables": { + "optional": true + }, + "@azure/identity": { + "optional": true + }, + "@azure/keyvault-secrets": { + "optional": true + }, + "@azure/storage-blob": { + "optional": true + }, + "@capacitor/preferences": { + "optional": true + }, + "@deno/kv": { + "optional": true + }, + "@netlify/blobs": { + "optional": true + }, + "@planetscale/database": { + "optional": true + }, + "@upstash/redis": { + "optional": true + }, + "@vercel/blob": { + "optional": true + }, + "@vercel/functions": { + "optional": true + }, + "@vercel/kv": { + "optional": true + }, + "aws4fetch": { + "optional": true + }, + "db0": { + "optional": true + }, + "idb-keyval": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "uploadthing": { + "optional": true + } + } + }, + "node_modules/@ai-sdk/provider": { + "resolved": "../../node_modules/.pnpm/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider", + "link": true + }, + "node_modules/@fastify/static": { + "resolved": "../../node_modules/.pnpm/@fastify+static@8.2.0/node_modules/@fastify/static", + "link": true + }, + "node_modules/@fastify/websocket": { + "resolved": "../../node_modules/.pnpm/@fastify+websocket@11.2.0/node_modules/@fastify/websocket", + "link": true + }, + "node_modules/@stricli/auto-complete": { + "resolved": "../../node_modules/.pnpm/@stricli+auto-complete@1.2.0/node_modules/@stricli/auto-complete", + "link": true + }, + "node_modules/@stricli/core": { + "resolved": "../../node_modules/.pnpm/@stricli+core@1.2.0/node_modules/@stricli/core", + "link": true + }, + "node_modules/@types/better-sqlite3": { + "resolved": "../../node_modules/.pnpm/@types+better-sqlite3@7.6.13/node_modules/@types/better-sqlite3", + "link": true + }, + "node_modules/@types/ws": { + "resolved": "../../node_modules/.pnpm/@types+ws@8.18.1/node_modules/@types/ws", + "link": true + }, + "node_modules/@vitest/runner": { + "resolved": "../../node_modules/.pnpm/@vitest+runner@4.0.1/node_modules/@vitest/runner", + "link": true + }, + "node_modules/@vitest/utils": { + "resolved": "../../node_modules/.pnpm/@vitest+utils@4.0.1/node_modules/@vitest/utils", + "link": true + }, + "node_modules/ai": { + "resolved": "../../node_modules/.pnpm/ai@5.0.59_zod@3.25.76/node_modules/ai", + "link": true + }, + "node_modules/autoevals": { + "resolved": "../../node_modules/.pnpm/autoevals@0.0.131_ws@8.18.0/node_modules/autoevals", + "link": true + }, + "node_modules/better-sqlite3": { + "resolved": "../../node_modules/.pnpm/better-sqlite3@11.6.0/node_modules/better-sqlite3", + "link": true + }, + "node_modules/fastify": { + "resolved": "../../node_modules/.pnpm/fastify@5.6.1/node_modules/fastify", + "link": true + }, + "node_modules/file-type": { + "resolved": "../../node_modules/.pnpm/file-type@19.6.0/node_modules/file-type", + "link": true + }, + "node_modules/jiti": { + "resolved": "../../node_modules/.pnpm/jiti@2.6.1/node_modules/jiti", + "link": true + }, + "node_modules/table": { + "resolved": "../../node_modules/.pnpm/table@6.9.0/node_modules/table", + "link": true + }, + "node_modules/tinyrainbow": { + "resolved": "../../node_modules/.pnpm/tinyrainbow@3.0.3/node_modules/tinyrainbow", + "link": true + }, + "node_modules/unstorage": { + "resolved": "../../node_modules/.pnpm/unstorage@1.17.1/node_modules/unstorage", + "link": true + } + } +} diff --git a/packages/evalite/src/config.ts b/packages/evalite/src/config.ts index 5c382067..1419ec95 100644 --- a/packages/evalite/src/config.ts +++ b/packages/evalite/src/config.ts @@ -1,6 +1,6 @@ +import path from "node:path"; import { createJiti } from "jiti"; import { access } from "fs/promises"; -import path from "path"; import type { Evalite } from "./types.js"; /** @@ -17,7 +17,8 @@ import type { Evalite } from "./types.js"; * storage: () => createSqliteStorage("./custom.db"), * server: { port: 3001 }, * scoreThreshold: 80, - * hideTable: true + * hideTable: true, + * forceRerunTriggers: ["src/**\/*.ts", "prompts/**\/*"], * }) * ``` */ diff --git a/packages/evalite/src/run-evalite.ts b/packages/evalite/src/run-evalite.ts index ec0ef78f..37138462 100644 --- a/packages/evalite/src/run-evalite.ts +++ b/packages/evalite/src/run-evalite.ts @@ -1,6 +1,7 @@ import { mkdir, writeFile } from "fs/promises"; import path from "path"; -import { Writable } from "stream"; +import type { Writable } from "stream"; +import { configDefaults } from "vitest/config"; import { createVitest, registerConsoleShortcuts } from "vitest/node"; import getPort from "get-port"; import { FILES_LOCATION } from "./backend-only-constants.js"; @@ -173,6 +174,7 @@ const exportResultsToJSON = async (opts: { * @param opts.mode - Execution mode: "watch-for-file-changes", "run-once-and-exit", "run-once-and-serve", or "run-once" * @param opts.scoreThreshold - Optional score threshold (0-100) to fail the process if scores are below * @param opts.outputPath - Optional path to write test results in JSON format after completion + * @param opts.forceRerunTriggers - Optional extra file globs that trigger reruns in watch mode (overrides evalite.config.ts if provided) * * @example * ```typescript @@ -190,6 +192,12 @@ const exportResultsToJSON = async (opts: { * mode: "watch-for-file-changes" * }); * + * // Watch mode with extra file triggers + * await runEvalite({ + * mode: "watch-for-file-changes", + * forceRerunTriggers: ["src/**\/*.ts", "prompts/**\/*"] + * }); + * * // Run specific eval file with custom working directory * await runEvalite({ * path: "tests/my-eval.eval.ts", @@ -211,6 +219,11 @@ export const runEvalite = async (opts: { disableServer?: boolean; cacheEnabled?: boolean; cacheDebug?: boolean; + /** + * Extra file globs that should trigger reruns in watch mode. + * Overrides `forceRerunTriggers` from evalite.config.ts if provided. + */ + forceRerunTriggers?: string[]; }) => { const cwd = opts.cwd ?? process.cwd(); const filesLocation = path.join(cwd, FILES_LOCATION); @@ -257,6 +270,14 @@ export const runEvalite = async (opts: { // 2. Add setupFiles from evalite.config.ts const setupFiles = ["evalite/env-setup-file", ...(config?.setupFiles || [])]; + // Evalite-level "extra watch files": + // Node API (opts.forceRerunTriggers) takes precedence over evalite.config.ts. + // If opts.forceRerunTriggers is defined (even []), it wins. + const forceRerunTriggers = + (opts.forceRerunTriggers !== undefined + ? opts.forceRerunTriggers + : config?.forceRerunTriggers) ?? configDefaults.forceRerunTriggers; + const filters = opts.path ? [opts.path] : undefined; process.env.EVALITE_REPORT_TRACES = "true"; @@ -282,7 +303,7 @@ export const runEvalite = async (opts: { } } - let exitCode: number | undefined = undefined; + let exitCode: number | undefined; // Merge user's viteConfig with evalite defaults const mergedViteConfig: ViteUserConfig = { @@ -322,6 +343,7 @@ export const runEvalite = async (opts: { hideTable: hideTable, }), ], + forceRerunTriggers: forceRerunTriggers, root: cwd, include: ["**/*.eval.?(m)ts"], watch: opts.mode === "watch-for-file-changes", diff --git a/packages/evalite/src/types.ts b/packages/evalite/src/types.ts index 2de53593..29208e3c 100644 --- a/packages/evalite/src/types.ts +++ b/packages/evalite/src/types.ts @@ -154,6 +154,29 @@ export declare namespace Evalite { | "testNamePattern" >; }; + + /** + * Extra file globs that should trigger eval reruns in watch mode. + * + * This maps directly onto Vitest's `test.forceRerunTriggers` option + * (glob patterns, relative to the project root / Evalite cwd). + * + * This is useful when your evals depend on files that Vitest can't + * automatically detect as dependencies (e.g., prompt templates, + * external data files, or CLI build outputs). + * + * @example + * ```ts + * export default defineConfig({ + * forceRerunTriggers: [ + * "src/**\/*.ts", // model / helper code + * "prompts/**\/*", // prompt templates + * "data/**\/*.json", // test data + * ] + * }) + * ``` + */ + forceRerunTriggers?: string[]; } export type RunType = "full" | "partial";