From 73df42676ba77b005121e9b89f51d4c4a2468a02 Mon Sep 17 00:00:00 2001 From: z8 <1234yamd@gmail.com> Date: Sat, 20 Dec 2025 09:58:28 +0200 Subject: [PATCH 1/5] Refactor evaluhealth package and update documentation - Renamed evalite to evaluhealth in various files and configurations. - Added new files and updated existing ones in the evaluhealth-docs directory. - Removed evalite-related files and configurations. - Updated package.json and README files to reflect the changes. - Added .gitignore entries for evaluhealth-specific files. This commit enhances the structure and clarity of the evaluhealth package, ensuring consistency across documentation and codebase. --- .changeset/config.json | 7 +- .changeset/tasty-parents-bathe.md | 4 +- .github/workflows/preview.yml | 2 +- .prettierignore | 6 +- CLAUDE.md | 94 +- CONTRIBUTING.md | 16 +- apps/evalite-ui/README.md | 15 - .../.gitignore | 0 .../CLAUDE.md | 0 .../README.md | 0 .../astro.config.mts | 20 +- .../package.json | 2 +- .../public/favicon.ico | Bin .../public/hero.webp | Bin .../public/og-image.jpg | Bin .../src/assets/houston.webp | Bin .../src/assets/logo-dark.svg | 0 .../src/assets/logo-light.svg | 0 .../src/components/Banner.astro | 6 +- .../src/content/config.ts | 0 .../src/content/docs/examples/ai-sdk.md | 16 +- .../src/content/docs/guides/ci.mdx | 38 +- .../src/content/docs/guides/cli.mdx | 28 +- .../src/content/docs/guides/configuration.mdx | 38 +- .../docs/guides/customizing-the-ui.mdx | 14 +- .../docs/guides/environment-variables.mdx | 6 +- .../src/content/docs/guides/multi-modal.mdx | 24 +- .../docs/guides/running-programmatically.mdx | 28 +- .../src/content/docs/guides/scorers.mdx | 22 +- .../src/content/docs/guides/skipping.mdx | 6 +- .../src/content/docs/guides/streams.md | 6 +- .../src/content/docs/guides/traces.mdx | 12 +- .../docs/guides/variant-comparison.mdx | 10 +- .../src/content/docs/index.mdx | 4 +- .../src/content/docs/quickstart.mdx | 22 +- .../src/content/docs/what-is-evaluhealth.mdx} | 16 +- .../src/env.d.ts | 0 .../tsconfig.json | 0 .../.eslintrc.cjs | 0 .../{evalite-ui => evaluhealth-ui}/.gitignore | 0 apps/evaluhealth-ui/README.md | 15 + .../app/components/display-input.tsx | 20 +- .../app/components/logo.tsx | 2 +- .../app/components/page-layout.tsx | 0 .../app/components/score.tsx | 4 +- .../app/components/ui/breadcrumb.tsx | 0 .../app/components/ui/button.tsx | 0 .../app/components/ui/chart.tsx | 0 .../app/components/ui/copy-button.tsx | 0 .../app/components/ui/input.tsx | 0 .../app/components/ui/line-chart.tsx | 0 .../app/components/ui/live-date.tsx | 0 .../app/components/ui/separator.tsx | 0 .../app/components/ui/sheet.tsx | 0 .../app/components/ui/sidebar.tsx | 0 .../app/components/ui/skeleton.tsx | 0 .../app/components/ui/table.tsx | 0 .../app/components/ui/tooltip.tsx | 0 .../app/data/queries.ts | 0 .../app/data/use-subscribe-to-socket.ts | 8 +- .../app/hooks/use-mobile.ts | 0 .../app/hooks/use-mobile.tsx | 0 .../app/hooks/use-server-state-utils.ts | 4 +- .../app/lib/utils.ts | 0 .../app/main.tsx | 2 +- .../app/routes/$.tsx | 2 +- .../app/routes/__root.tsx | 6 +- .../routes/eval.$name.result.$resultIndex.tsx | 6 +- .../app/routes/eval.$name.tsx | 18 +- .../{evalite-ui => evaluhealth-ui}/app/sdk.ts | 33 +- .../app/tailwind.css | 0 .../app/utils.test.ts | 0 .../app/utils.ts | 4 +- .../components.json | 0 .../eslint.config.js | 0 .../{evalite-ui => evaluhealth-ui}/index.html | 0 .../package.json | 6 +- .../public/assets/favicon.svg | 0 .../tsconfig.json | 0 .../tsr.config.json | 0 .../vite.config.ts | 0 package.json | 20 +- packages/evalite-tests/.gitignore | 2 - .../fixtures/config-precedence/vite.config.ts | 8 - packages/evalite/src/constants.ts | 2 - packages/evalite/src/index.ts | 4 - packages/evaluhealth-tests/.gitignore | 2 + .../CHANGELOG.md | 30 +- .../package.json | 6 +- .../tests/ai-sdk-traces.test.ts | 0 .../tests/basics.test.ts | 2 +- .../tests/columns-with-scores-traces.test.ts | 0 .../tests/columns.test.ts | 0 .../tests/config.test.ts | 6 +- .../tests/custom-scorer.test.ts | 2 +- .../tests/export-static.test.ts | 26 +- .../tests/failing.test.ts | 4 +- .../tests/files.test.ts | 18 +- .../ai-sdk-traces-stream/traces.eval.ts | 6 +- .../fixtures/ai-sdk-traces/traces.eval.ts | 6 +- .../tests/fixtures/basics}/basics.eval.ts | 4 +- .../columns-with-scores-traces.eval.ts | 6 +- .../tests/fixtures/columns/columns.eval.ts | 4 +- .../fixtures/config-includes}/basics.eval.ts | 4 +- .../fixtures/config-includes/vite.config.ts | 0 .../config-precedence/evaluhealth.config.ts} | 2 +- .../fixtures/config-precedence/test.eval.ts | 4 +- .../fixtures/config-precedence/vite.config.ts | 8 + .../config-setupfiles/evaluhealth.config.ts} | 2 +- .../tests/fixtures/config-setupfiles/setup.ts | 0 .../fixtures/config-setupfiles/test.eval.ts | 4 +- .../fixtures/custom-scorer/index.eval.ts | 4 +- .../evaluhealth-config}/basics.eval.ts | 4 +- .../evaluhealth-config/evaluhealth.config.ts} | 2 +- .../basics.eval.ts | 4 +- .../evaluhealth.config.ts} | 2 +- .../experimental_columns/files-4.eval.ts | 6 +- .../fixtures/experimental_columns/test.png | Bin .../tests/fixtures/export/export.eval.ts | 16 +- .../tests/fixtures/export/test.png | Bin .../failing-test-in-data/failing-test.eval.ts | 4 +- .../failing-test}/failing-test.eval.ts | 4 +- .../fixtures/failing-test/vite.config.ts | 0 .../tests/fixtures/files/files-1.eval.ts | 6 +- .../tests/fixtures/files/files-2.eval.ts | 6 +- .../tests/fixtures/files/files-3.eval.ts | 6 +- .../tests/fixtures/files/files-4.eval.ts | 6 +- .../tests/fixtures/files/test.png | Bin .../fixtures/long-text/long-text.eval.ts | 4 +- .../module-level-error.eval.ts | 4 +- .../module-level-error/vite.config.ts | 0 .../fixtures/much-data/much-data.eval.ts | 4 +- .../tests/fixtures/multi/multi-1.eval.ts | 4 +- .../tests/fixtures/multi/multi-2.eval.ts | 4 +- .../tests/fixtures/multi/multi-3.eval.ts | 6 +- .../fixtures/no-scorers/no-scorers.eval.ts | 4 +- .../non-serializable-data.eval.ts | 4 +- .../tests/fixtures/objects/objects.eval.ts | 4 +- .../only-flag-multiple.eval.ts | 4 +- .../only-flag-none/only-flag-none.eval.ts | 4 +- .../only-flag-single/only-flag-single.eval.ts | 4 +- .../only-flag-variants.eval.ts | 4 +- .../fixtures/paths/should-not-run.eval.ts | 4 +- .../tests/fixtures/paths/should-run.eval.ts | 4 +- .../polymorphic-data/polymorphic.eval.ts | 6 +- .../tests/fixtures/stream/file.txt | 0 .../tests/fixtures/stream/stream.eval.ts | 4 +- .../test-modifiers.eval.ts | 8 +- .../test-modifiers.eval.ts | 6 +- .../tests/fixtures/threshold/basics.eval.ts | 4 +- .../tests/fixtures/timeout/timeout.eval.ts | 4 +- .../tests/fixtures/timeout/vite.config.ts | 0 .../tests/fixtures/traces/traces.eval.ts | 6 +- .../trial-count-config/evaluhealth.config.ts} | 2 +- .../fixtures/trial-count-config/test.eval.ts | 4 +- .../evaluhealth.config.ts} | 2 +- .../trial-count-precedence/test.eval.ts | 4 +- .../fixtures/trial-count/trial-count.eval.ts | 4 +- .../tests/fixtures/variants/variants.eval.ts | 4 +- .../watch-mode-fail}/failing-test.eval.ts | 4 +- .../tests/long-text.test.ts | 0 .../tests/much-data.test.ts | 0 .../tests/multi.test.ts | 0 .../tests/no-scorers.test.ts | 0 .../tests/non-serializable-data.test.ts | 0 .../tests/objects.test.ts | 0 .../tests/only-flag.test.ts | 0 .../tests/output-path.test.ts | 2 +- .../tests/paths.test.ts | 0 .../tests/polymorphic-data.test.ts | 0 .../tests/stream.test.ts | 0 .../tests/test-modifiers.test.ts | 2 +- .../tests/test-utils.ts | 21 +- .../tests/threshold.test.ts | 0 .../tests/timeout.test.ts | 0 .../tests/traces.test.ts | 0 .../tests/trial-count.test.ts | 4 +- .../tests/variants.test.ts | 0 .../tests/watch-mode.test.ts | 0 .../tsconfig.json | 0 .../vitest.config.ts | 0 packages/{evalite => evaluhealth}/.gitignore | 2 +- .../{evalite => evaluhealth}/CHANGELOG.md | 82 +- .../{evalite => evaluhealth}/package.json | 14 +- packages/{evalite => evaluhealth}/readme.md | 26 +- .../{evalite => evaluhealth}/src/ai-sdk.ts | 0 .../src/backend-only-constants.ts | 0 packages/{evalite => evaluhealth}/src/bin.ts | 0 .../src/command.test.ts | 20 +- .../{evalite => evaluhealth}/src/command.ts | 20 +- .../{evalite => evaluhealth}/src/config.ts | 34 +- packages/evaluhealth/src/constants.ts | 2 + .../src/create-scorer.ts | 8 +- .../src/evaluhealth.ts} | 62 +- .../src/export-static.ts | 105 +- packages/evaluhealth/src/index.ts | 4 + .../{evalite => evaluhealth}/src/reporter.ts | 28 +- .../src/reporter/EvaluhealthRunner.ts} | 30 +- .../src/reporter/events.ts | 24 +- .../src/reporter/rendering.ts | 24 +- .../src/run-evaluhealth.ts} | 42 +- packages/{evalite => evaluhealth}/src/sdk.ts | 34 +- .../{evalite => evaluhealth}/src/server.ts | 24 +- .../src/storage/in-memory.ts | 74 +- .../src/storage/sqlite.ts | 100 +- .../src/storage/storage.test.ts | 2 +- .../src/storage/test-utils.ts | 6 +- .../src/storage/utils.ts | 4 +- .../{evalite => evaluhealth}/src/traces.ts | 10 +- .../{evalite => evaluhealth}/src/types.ts | 100 +- .../{evalite => evaluhealth}/src/utils.ts | 20 +- .../src/write-file-queue-local-storage.ts | 0 .../tsconfig-warning/index.d.ts | 8 +- .../tsconfig-warning/index.js | 4 +- .../{evalite => evaluhealth}/tsconfig.json | 0 packages/example/.gitignore | 4 +- packages/example/CHANGELOG.md | 18 +- ...valite.config.ts => evaluhealth.config.ts} | 2 +- packages/example/package.json | 4 +- .../example/src/content-generation.eval.ts | 6 +- packages/example/src/example.2.eval.ts | 6 +- packages/example/src/example.eval.ts | 6 +- packages/example/src/fail.eval.ts | 4 +- packages/example/src/files.eval.ts | 6 +- packages/example/src/issue-123.eval.ts | 4 +- packages/example/src/long.eval.ts | 4 +- packages/example/src/no-scorers.eval.ts | 4 +- packages/example/src/object.eval.ts | 4 +- packages/example/src/traces.eval.ts | 6 +- packages/example/src/trial-count.eval.ts | 4 +- packages/example/src/variants.eval.ts | 6 +- pnpm-lock.yaml | 1107 ++--------------- readme.md | 12 +- 233 files changed, 1142 insertions(+), 1975 deletions(-) delete mode 100644 apps/evalite-ui/README.md rename apps/{evalite-docs => evaluhealth-docs}/.gitignore (100%) rename apps/{evalite-docs => evaluhealth-docs}/CLAUDE.md (100%) rename apps/{evalite-docs => evaluhealth-docs}/README.md (100%) rename apps/{evalite-docs => evaluhealth-docs}/astro.config.mts (87%) rename apps/{evalite-docs => evaluhealth-docs}/package.json (91%) rename apps/{evalite-docs => evaluhealth-docs}/public/favicon.ico (100%) rename apps/{evalite-docs => evaluhealth-docs}/public/hero.webp (100%) rename apps/{evalite-docs => evaluhealth-docs}/public/og-image.jpg (100%) rename apps/{evalite-docs => evaluhealth-docs}/src/assets/houston.webp (100%) rename apps/{evalite-docs => evaluhealth-docs}/src/assets/logo-dark.svg (100%) rename apps/{evalite-docs => evaluhealth-docs}/src/assets/logo-light.svg (100%) rename apps/{evalite-docs => evaluhealth-docs}/src/components/Banner.astro (78%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/config.ts (100%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/examples/ai-sdk.md (85%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/ci.mdx (76%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/cli.mdx (70%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/configuration.mdx (64%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/customizing-the-ui.mdx (88%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/environment-variables.mdx (85%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/multi-modal.mdx (70%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/running-programmatically.mdx (76%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/scorers.mdx (93%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/skipping.mdx (82%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/streams.md (63%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/traces.mdx (82%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/variant-comparison.mdx (86%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/index.mdx (84%) rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/quickstart.mdx (75%) rename apps/{evalite-docs/src/content/docs/what-is-evalite.mdx => evaluhealth-docs/src/content/docs/what-is-evaluhealth.mdx} (76%) rename apps/{evalite-docs => evaluhealth-docs}/src/env.d.ts (100%) rename apps/{evalite-docs => evaluhealth-docs}/tsconfig.json (100%) rename apps/{evalite-ui => evaluhealth-ui}/.eslintrc.cjs (100%) rename apps/{evalite-ui => evaluhealth-ui}/.gitignore (100%) create mode 100644 apps/evaluhealth-ui/README.md rename apps/{evalite-ui => evaluhealth-ui}/app/components/display-input.tsx (94%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/logo.tsx (98%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/page-layout.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/score.tsx (96%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/breadcrumb.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/button.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/chart.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/copy-button.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/input.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/line-chart.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/live-date.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/separator.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/sheet.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/sidebar.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/skeleton.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/table.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/tooltip.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/data/queries.ts (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/data/use-subscribe-to-socket.ts (76%) rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-mobile.ts (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-mobile.tsx (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-server-state-utils.ts (79%) rename apps/{evalite-ui => evaluhealth-ui}/app/lib/utils.ts (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/main.tsx (95%) rename apps/{evalite-ui => evaluhealth-ui}/app/routes/$.tsx (94%) rename apps/{evalite-ui => evaluhealth-ui}/app/routes/__root.tsx (97%) rename apps/{evalite-ui => evaluhealth-ui}/app/routes/eval.$name.result.$resultIndex.tsx (98%) rename apps/{evalite-ui => evaluhealth-ui}/app/routes/eval.$name.tsx (97%) rename apps/{evalite-ui => evaluhealth-ui}/app/sdk.ts (79%) rename apps/{evalite-ui => evaluhealth-ui}/app/tailwind.css (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/utils.test.ts (100%) rename apps/{evalite-ui => evaluhealth-ui}/app/utils.ts (85%) rename apps/{evalite-ui => evaluhealth-ui}/components.json (100%) rename apps/{evalite-ui => evaluhealth-ui}/eslint.config.js (100%) rename apps/{evalite-ui => evaluhealth-ui}/index.html (100%) rename apps/{evalite-ui => evaluhealth-ui}/package.json (92%) rename apps/{evalite-ui => evaluhealth-ui}/public/assets/favicon.svg (100%) rename apps/{evalite-ui => evaluhealth-ui}/tsconfig.json (100%) rename apps/{evalite-ui => evaluhealth-ui}/tsr.config.json (100%) rename apps/{evalite-ui => evaluhealth-ui}/vite.config.ts (100%) delete mode 100644 packages/evalite-tests/.gitignore delete mode 100644 packages/evalite-tests/tests/fixtures/config-precedence/vite.config.ts delete mode 100644 packages/evalite/src/constants.ts delete mode 100644 packages/evalite/src/index.ts create mode 100644 packages/evaluhealth-tests/.gitignore rename packages/{evalite-tests => evaluhealth-tests}/CHANGELOG.md (66%) rename packages/{evalite-tests => evaluhealth-tests}/package.json (73%) rename packages/{evalite-tests => evaluhealth-tests}/tests/ai-sdk-traces.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/basics.test.ts (97%) rename packages/{evalite-tests => evaluhealth-tests}/tests/columns-with-scores-traces.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/columns.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/config.test.ts (82%) rename packages/{evalite-tests => evaluhealth-tests}/tests/custom-scorer.test.ts (97%) rename packages/{evalite-tests => evaluhealth-tests}/tests/export-static.test.ts (90%) rename packages/{evalite-tests => evaluhealth-tests}/tests/failing.test.ts (95%) rename packages/{evalite-tests => evaluhealth-tests}/tests/files.test.ts (87%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/ai-sdk-traces-stream/traces.eval.ts (89%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/ai-sdk-traces/traces.eval.ts (88%) rename packages/{evalite-tests/tests/fixtures/config-includes => evaluhealth-tests/tests/fixtures/basics}/basics.eval.ts (84%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/columns-with-scores-traces/columns-with-scores-traces.eval.ts (94%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/columns/columns.eval.ts (89%) rename packages/{evalite-tests/tests/fixtures/basics => evaluhealth-tests/tests/fixtures/config-includes}/basics.eval.ts (84%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-includes/vite.config.ts (100%) rename packages/{evalite-tests/tests/fixtures/config-precedence/evalite.config.ts => evaluhealth-tests/tests/fixtures/config-precedence/evaluhealth.config.ts} (60%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-precedence/test.eval.ts (72%) create mode 100644 packages/evaluhealth-tests/tests/fixtures/config-precedence/vite.config.ts rename packages/{evalite-tests/tests/fixtures/config-setupfiles/evalite.config.ts => evaluhealth-tests/tests/fixtures/config-setupfiles/evaluhealth.config.ts} (56%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-setupfiles/setup.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-setupfiles/test.eval.ts (77%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/custom-scorer/index.eval.ts (85%) rename packages/{evalite-tests/tests/fixtures/evalite-config => evaluhealth-tests/tests/fixtures/evaluhealth-config}/basics.eval.ts (78%) rename packages/{evalite-tests/tests/fixtures/evalite-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/evaluhealth-config/evaluhealth.config.ts} (71%) rename packages/{evalite-tests/tests/fixtures/evalite-timeout-config => evaluhealth-tests/tests/fixtures/evaluhealth-timeout-config}/basics.eval.ts (81%) rename packages/{evalite-tests/tests/fixtures/evalite-timeout-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/evaluhealth-timeout-config/evaluhealth.config.ts} (51%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/experimental_columns/files-4.eval.ts (76%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/experimental_columns/test.png (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/export/export.eval.ts (57%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/export/test.png (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/failing-test-in-data/failing-test.eval.ts (77%) rename packages/{evalite-tests/tests/fixtures/watch-mode-fail => evaluhealth-tests/tests/fixtures/failing-test}/failing-test.eval.ts (81%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/failing-test/vite.config.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-1.eval.ts (70%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-2.eval.ts (74%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-3.eval.ts (76%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-4.eval.ts (76%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/test.png (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/long-text/long-text.eval.ts (94%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/module-level-error/module-level-error.eval.ts (73%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/module-level-error/vite.config.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/much-data/much-data.eval.ts (89%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-1.eval.ts (83%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-2.eval.ts (83%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-3.eval.ts (86%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/no-scorers/no-scorers.eval.ts (81%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/non-serializable-data/non-serializable-data.eval.ts (87%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/objects/objects.eval.ts (86%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-multiple/only-flag-multiple.eval.ts (86%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-none/only-flag-none.eval.ts (82%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-single/only-flag-single.eval.ts (82%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-variants/only-flag-variants.eval.ts (89%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/paths/should-not-run.eval.ts (83%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/paths/should-run.eval.ts (83%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/polymorphic-data/polymorphic.eval.ts (82%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/stream/file.txt (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/stream/stream.eval.ts (87%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/test-modifiers-regular/test-modifiers.eval.ts (81%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/test-modifiers-skipped/test-modifiers.eval.ts (82%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/threshold/basics.eval.ts (76%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/timeout/timeout.eval.ts (83%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/timeout/vite.config.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/traces/traces.eval.ts (81%) rename packages/{evalite-tests/tests/fixtures/trial-count-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/trial-count-config/evaluhealth.config.ts} (50%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count-config/test.eval.ts (55%) rename packages/{evalite-tests/tests/fixtures/trial-count-precedence/evalite.config.ts => evaluhealth-tests/tests/fixtures/trial-count-precedence/evaluhealth.config.ts} (50%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count-precedence/test.eval.ts (67%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count/trial-count.eval.ts (78%) rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/variants/variants.eval.ts (87%) rename packages/{evalite-tests/tests/fixtures/failing-test => evaluhealth-tests/tests/fixtures/watch-mode-fail}/failing-test.eval.ts (81%) rename packages/{evalite-tests => evaluhealth-tests}/tests/long-text.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/much-data.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/multi.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/no-scorers.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/non-serializable-data.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/objects.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/only-flag.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/output-path.test.ts (98%) rename packages/{evalite-tests => evaluhealth-tests}/tests/paths.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/polymorphic-data.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/stream.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/test-modifiers.test.ts (93%) rename packages/{evalite-tests => evaluhealth-tests}/tests/test-utils.ts (87%) rename packages/{evalite-tests => evaluhealth-tests}/tests/threshold.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/timeout.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/traces.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/trial-count.test.ts (97%) rename packages/{evalite-tests => evaluhealth-tests}/tests/variants.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tests/watch-mode.test.ts (100%) rename packages/{evalite-tests => evaluhealth-tests}/tsconfig.json (100%) rename packages/{evalite-tests => evaluhealth-tests}/vitest.config.ts (100%) rename packages/{evalite => evaluhealth}/.gitignore (71%) rename packages/{evalite => evaluhealth}/CHANGELOG.md (78%) rename packages/{evalite => evaluhealth}/package.json (83%) rename packages/{evalite => evaluhealth}/readme.md (54%) rename packages/{evalite => evaluhealth}/src/ai-sdk.ts (100%) rename packages/{evalite => evaluhealth}/src/backend-only-constants.ts (100%) rename packages/{evalite => evaluhealth}/src/bin.ts (100%) rename packages/{evalite => evaluhealth}/src/command.test.ts (90%) rename packages/{evalite => evaluhealth}/src/command.ts (92%) rename packages/{evalite => evaluhealth}/src/config.ts (59%) create mode 100644 packages/evaluhealth/src/constants.ts rename packages/{evalite => evaluhealth}/src/create-scorer.ts (73%) rename packages/{evalite/src/evalite.ts => evaluhealth/src/evaluhealth.ts} (87%) rename packages/{evalite => evaluhealth}/src/export-static.ts (83%) create mode 100644 packages/evaluhealth/src/index.ts rename packages/{evalite => evaluhealth}/src/reporter.ts (93%) rename packages/{evalite/src/reporter/EvaliteRunner.ts => evaluhealth/src/reporter/EvaluhealthRunner.ts} (92%) rename packages/{evalite => evaluhealth}/src/reporter/events.ts (61%) rename packages/{evalite => evaluhealth}/src/reporter/rendering.ts (93%) rename packages/{evalite/src/run-evalite.ts => evaluhealth/src/run-evaluhealth.ts} (90%) rename packages/{evalite => evaluhealth}/src/sdk.ts (71%) rename packages/{evalite => evaluhealth}/src/server.ts (95%) rename packages/{evalite => evaluhealth}/src/storage/in-memory.ts (81%) rename packages/{evalite => evaluhealth}/src/storage/sqlite.ts (85%) rename packages/{evalite => evaluhealth}/src/storage/storage.test.ts (99%) rename packages/{evalite => evaluhealth}/src/storage/test-utils.ts (87%) rename packages/{evalite => evaluhealth}/src/storage/utils.ts (92%) rename packages/{evalite => evaluhealth}/src/traces.ts (66%) rename packages/{evalite => evaluhealth}/src/types.ts (86%) rename packages/{evalite => evaluhealth}/src/utils.ts (72%) rename packages/{evalite => evaluhealth}/src/write-file-queue-local-storage.ts (100%) rename packages/{evalite => evaluhealth}/tsconfig-warning/index.d.ts (79%) rename packages/{evalite => evaluhealth}/tsconfig-warning/index.js (88%) rename packages/{evalite => evaluhealth}/tsconfig.json (100%) rename packages/example/{evalite.config.ts => evaluhealth.config.ts} (57%) diff --git a/.changeset/config.json b/.changeset/config.json index 3d5c3b31..83119579 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -6,5 +6,10 @@ "linked": [], "access": "public", "baseBranch": "main", - "ignore": ["example", "evalite-tests", "evalite-docs", "evalite-ui"] + "ignore": [ + "example", + "evaluhealth-tests", + "evaluhealth-docs", + "evaluhealth-ui" + ] } diff --git a/.changeset/tasty-parents-bathe.md b/.changeset/tasty-parents-bathe.md index 720c7c61..bfeb2572 100644 --- a/.changeset/tasty-parents-bathe.md +++ b/.changeset/tasty-parents-bathe.md @@ -1,5 +1,5 @@ --- -"evalite-ui": patch +"evaluhealth-ui": patch --- -Use window.location.origin for WebSocket and BASE_URL to allow opening evalite running on remote server +Use window.location.origin for WebSocket and BASE_URL to allow opening evaluhealth running on remote server diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml index f3be05f8..f199e037 100644 --- a/.github/workflows/preview.yml +++ b/.github/workflows/preview.yml @@ -24,4 +24,4 @@ jobs: - run: pnpm build - name: Publish preview releases - run: pnpx pkg-pr-new publish packages/evalite + run: pnpx pkg-pr-new publish packages/evaluhealth diff --git a/.prettierignore b/.prettierignore index 95e4ab7c..99fcce93 100644 --- a/.prettierignore +++ b/.prettierignore @@ -3,12 +3,12 @@ dist .turbo *.tsbuildinfo coverage -.evalite -evalite.db +.evaluhealth +evaluhealth.db pnpm-lock.yaml **/tests/playground/** build vite.config.ts.timestamp* *.d.ts *.gen.ts -**/evalite-export \ No newline at end of file +**/evaluhealth-export \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index c4bab832..f705ef56 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,15 +4,15 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Overview -Evalite is a TypeScript-native, local-first tool for testing LLM-powered apps built on Vitest. It allows developers to write evaluations (evals) as `.eval.ts` files that run like tests. +Evaluhealth is a TypeScript-native, local-first tool for testing LLM-powered apps built on Vitest. It allows developers to write evaluations (evals) as `.eval.ts` files that run like tests. ## Configuration -The primary configuration method is `evalite.config.ts`. While `vitest.config.ts` is still supported for backward compatibility, it is not documented and `evalite.config.ts` should be used for all configuration needs. +The primary configuration method is `evaluhealth.config.ts`. While `vitest.config.ts` is still supported for backward compatibility, it is not documented and `evaluhealth.config.ts` should be used for all configuration needs. ## Development Commands -**Development mode** (recommended for working on Evalite itself): +**Development mode** (recommended for working on Evaluhealth itself): ```bash pnpm run dev @@ -20,8 +20,8 @@ pnpm run dev This runs: -- TypeScript type checker on `evalite` package -- Tests in `evalite-tests` package +- TypeScript type checker on `evaluhealth` package +- Tests in `evaluhealth-tests` package - Live reload for both packages **Build all packages**: @@ -30,7 +30,7 @@ This runs: pnpm build ``` -This builds `evalite` package first, then `evalite-ui`, copying UI assets to `packages/evalite/dist/ui`. +This builds `evaluhealth` package first, then `evaluhealth-ui`, copying UI assets to `packages/evaluhealth/dist/ui`. **Run CI pipeline** (build, test, lint): @@ -42,20 +42,20 @@ pnpm ci ```bash pnpm run example -# Or: cd packages/example && pnpm evalite watch +# Or: cd packages/example && pnpm evaluhealth watch ``` **Run single package tests**: ```bash -cd packages/evalite && pnpm test -cd packages/evalite-tests && pnpm test +cd packages/evaluhealth && pnpm test +cd packages/evaluhealth-tests && pnpm test ``` **Lint a package**: ```bash -cd packages/evalite && pnpm lint +cd packages/evaluhealth && pnpm lint ``` ## Working with pnpm Filters @@ -67,64 +67,64 @@ When working on specific packages in this monorepo, **use pnpm's `--filter` flag **Build a specific package**: ```bash -pnpm --filter evalite build -pnpm --filter evalite-ui build +pnpm --filter evaluhealth build +pnpm --filter evaluhealth-ui build ``` **Run tests for a specific package**: ```bash -pnpm --filter evalite-tests test +pnpm --filter evaluhealth-tests test ``` **Run dev mode for a specific package**: ```bash -pnpm --filter evalite dev -pnpm --filter evalite-ui dev +pnpm --filter evaluhealth dev +pnpm --filter evaluhealth-ui dev ``` **Lint a specific package**: ```bash -pnpm --filter evalite lint -pnpm --filter evalite-tests lint +pnpm --filter evaluhealth lint +pnpm --filter evaluhealth-tests lint ``` ### Filter Patterns pnpm supports several filter patterns: -- `--filter evalite` - Run task for the `evalite` package only -- `--filter evalite...` - Run task for `evalite` and all its dependencies -- `--filter ...evalite` - Run task for `evalite` and all packages that depend on it +- `--filter evaluhealth` - Run task for the `evaluhealth` package only +- `--filter evaluhealth...` - Run task for `evaluhealth` and all its dependencies +- `--filter ...evaluhealth` - Run task for `evaluhealth` and all packages that depend on it - `--filter "./packages/*"` - Run task for all packages in the packages directory -- `--filter "!evalite"` - Run task for all packages except `evalite` +- `--filter "!evaluhealth"` - Run task for all packages except `evaluhealth` ### Examples for Common Workflows -**Working on the main evalite package**: +**Working on the main evaluhealth package**: ```bash -# Build evalite and watch for changes -pnpm --filter evalite dev +# Build evaluhealth and watch for changes +pnpm --filter evaluhealth dev # Run tests after making changes -pnpm --filter evalite test +pnpm --filter evaluhealth test ``` **Working on the UI**: ```bash -# Build evalite first, then start UI dev server -pnpm run build:evalite && pnpm --filter evalite-ui dev +# Build evaluhealth first, then start UI dev server +pnpm run build:evaluhealth && pnpm --filter evaluhealth-ui dev ``` **Working on integration tests**: ```bash -# Ensure evalite is built before running tests -pnpm run build && pnpm --filter evalite-tests test +# Ensure evaluhealth is built before running tests +pnpm run build && pnpm --filter evaluhealth-tests test ``` ### When to Use Filters @@ -138,7 +138,7 @@ pnpm run build && pnpm --filter evalite-tests test **Direct package commands are fine for**: - Quick one-off commands (like `pnpm install`) -- Running the evalite CLI itself (e.g., `cd packages/example && pnpm evalite watch`) +- Running the evaluhealth CLI itself (e.g., `cd packages/example && pnpm evaluhealth watch`) - When already in the package directory ## Architecture @@ -147,21 +147,21 @@ pnpm run build && pnpm --filter evalite-tests test This is a pnpm workspace: -- **`packages/evalite`**: Main package that users install. Exports the `evalite()` function, CLI binary (`evalite`), server, database layer, and utilities. Built with TypeScript. +- **`packages/evaluhealth`**: Main package that users install. Exports the `evaluhealth()` function, CLI binary (`evaluhealth`), server, database layer, and utilities. Built with TypeScript. -- **`packages/evalite-core`**: Shared core utilities (currently appears to be deprecated or minimal) +- **`packages/evaluhealth-core`**: Shared core utilities (currently appears to be deprecated or minimal) -- **`packages/evalite-tests`**: Integration tests for evalite functionality +- **`packages/evaluhealth-tests`**: Integration tests for evaluhealth functionality - **`packages/example`**: Example eval files demonstrating usage patterns (e.g., `example.eval.ts`, `traces.eval.ts`) -- **`apps/evalite-ui`**: React-based web UI that displays eval results. Built with Vite, TanStack Router, and Tailwind. Gets copied to `packages/evalite/dist/ui` during build via the `after-build` script. +- **`apps/evaluhealth-ui`**: React-based web UI that displays eval results. Built with Vite, TanStack Router, and Tailwind. Gets copied to `packages/evaluhealth/dist/ui` during build via the `after-build` script. -- **`apps/evalite-docs`**: Documentation site +- **`apps/evaluhealth-docs`**: Documentation site ### Core Concepts -**Eval files**: Files matching `*.eval.ts` (or `.eval.mts`) that contain `evalite()` calls. These define: +**Eval files**: Files matching `*.eval.ts` (or `.eval.mts`) that contain `evaluhealth()` calls. These define: - A dataset (via `data()` function returning input/expected pairs) - A task (the LLM interaction to test) @@ -170,37 +170,37 @@ This is a pnpm workspace: **Execution flow**: -1. The `evalite` CLI uses Vitest under the hood to discover and run `*.eval.ts` files +1. The `evaluhealth` CLI uses Vitest under the hood to discover and run `*.eval.ts` files 2. Each eval creates a Vitest `describe` block with concurrent `it` tests for each data point -3. Results are stored in a SQLite database (`evalite.db`) +3. Results are stored in a SQLite database (`evaluhealth.db`) 4. A Fastify server serves the UI and provides WebSocket updates during runs -5. Files (images, audio, etc.) are saved to `.evalite` directory +5. Files (images, audio, etc.) are saved to `.evaluhealth` directory **Key architecture points**: - Uses Vitest's `inject("cwd")` to get the working directory - Supports async iterables (streaming) from tasks via `executeTask()` -- Files in input/output/expected are automatically detected and saved using `createEvaliteFileIfNeeded()` +- Files in input/output/expected are automatically detected and saved using `createEvaluhealthFileIfNeeded()` - Traces can be reported via `reportTraceLocalStorage` for nested LLM calls -- Integrates with AI SDK via `evalite/ai-sdk` export (provides `traceAISDKModel()`) +- Integrates with AI SDK via `evaluhealth/ai-sdk` export (provides `traceAISDKModel()`) ### Database Layer -SQLite database (`evalite.db`) stores: +SQLite database (`evaluhealth.db`) stores: - Runs (full or partial) - Evals (distinct eval names with metadata) - Results (individual test case results with scores, traces, columns) - Scores and traces are stored as JSON -Key queries in `packages/evalite/src/db.ts`: +Key queries in `packages/evaluhealth/src/db.ts`: - `getEvals()`, `getResults()`, `getScores()`, `getTraces()` - `getMostRecentRun()`, `getPreviousCompletedEval()` ### Server & UI -The Fastify server in `packages/evalite/src/server.ts`: +The Fastify server in `packages/evaluhealth/src/server.ts`: - Serves the UI from `dist/ui/` - Provides REST API at `/api/*` (menu-items, server-state, evals, results, etc.) @@ -208,11 +208,11 @@ The Fastify server in `packages/evalite/src/server.ts`: ## Important Notes -**Linking for local development**: If you need to test the global `evalite` command locally: +**Linking for local development**: If you need to test the global `evaluhealth` command locally: ```bash pnpm build -cd packages/evalite && npm link +cd packages/evaluhealth && npm link ``` **Node version**: Requires Node.js >= 22 @@ -235,7 +235,7 @@ The format of the file should be: ```md --- -"evalite": patch +"evaluhealth": patch --- Description of the change. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9ebaa921..7f5aa203 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to Evalite +# Contributing to Evaluhealth ## Development Commands @@ -7,25 +7,25 @@ This monorepo uses pnpm workspaces (no Turbo). All scripts use `pnpm --filter` t **Available scripts:** ```bash -pnpm run dev # Build evalite + UI, then run tsc -w on evalite + vitest on evalite-tests -pnpm run example # Build, then run evalite watch + UI dev server (parallel) -pnpm run test # Build, then run tests on evalite + evalite-tests -pnpm run build # Build evalite, then evalite-ui +pnpm run dev # Build evaluhealth + UI, then run tsc -w on evaluhealth + vitest on evaluhealth-tests +pnpm run example # Build, then run evaluhealth watch + UI dev server (parallel) +pnpm run test # Build, then run tests on evaluhealth + evaluhealth-tests +pnpm run build # Build evaluhealth, then evaluhealth-ui pnpm run ci # Full CI: build, test, lint, check-format ``` **Individual package scripts:** ```bash -pnpm build:evalite # Build evalite package only -pnpm build:evalite-ui # Build UI and copy to evalite/dist/ui +pnpm build:evaluhealth # Build evaluhealth package only +pnpm build:evaluhealth-ui # Build UI and copy to evaluhealth/dist/ui ``` **Setup:** 1. Create `.env` in `packages/example` with `OPENAI_API_KEY=your-key` 2. Run `pnpm install` -3. For global `evalite` command: `pnpm build && cd packages/evalite && npm link` +3. For global `evaluhealth` command: `pnpm build && cd packages/evaluhealth && npm link` ## Styling Guidelines diff --git a/apps/evalite-ui/README.md b/apps/evalite-ui/README.md deleted file mode 100644 index e8f05ecf..00000000 --- a/apps/evalite-ui/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Evalite UI - -This is the UI for Evalite. It is built with React, Vite, Tanstack Router, and Tailwind CSS. - -## Development - -From the root of the monorepo, run: - -```bash -pnpm run dev -``` - -## Deployment - -The UI is copied into the `evalite` package during the `after-build` script in the root `package.json`. diff --git a/apps/evalite-docs/.gitignore b/apps/evaluhealth-docs/.gitignore similarity index 100% rename from apps/evalite-docs/.gitignore rename to apps/evaluhealth-docs/.gitignore diff --git a/apps/evalite-docs/CLAUDE.md b/apps/evaluhealth-docs/CLAUDE.md similarity index 100% rename from apps/evalite-docs/CLAUDE.md rename to apps/evaluhealth-docs/CLAUDE.md diff --git a/apps/evalite-docs/README.md b/apps/evaluhealth-docs/README.md similarity index 100% rename from apps/evalite-docs/README.md rename to apps/evaluhealth-docs/README.md diff --git a/apps/evalite-docs/astro.config.mts b/apps/evaluhealth-docs/astro.config.mts similarity index 87% rename from apps/evalite-docs/astro.config.mts rename to apps/evaluhealth-docs/astro.config.mts index 1ce51671..5dfbe76e 100644 --- a/apps/evalite-docs/astro.config.mts +++ b/apps/evaluhealth-docs/astro.config.mts @@ -5,28 +5,28 @@ import starlight from "@astrojs/starlight"; export default defineConfig({ integrations: [ starlight({ - title: "Evalite", + title: "Evaluhealth", favicon: "/favicon.ico", components: { Banner: "./src/components/Banner.astro", }, editLink: { baseUrl: - "https://github.com/mattpocock/evalite/edit/main/apps/evalite-docs", + "https://github.com/kernelius-hq/evaluhealth/edit/main/apps/evaluhealth-docs", }, head: [ { tag: "meta", attrs: { property: "og:url", - content: "https://evalite.dev", + content: "https://evalu.health", }, }, { tag: "meta", attrs: { property: "og:image", - content: "https://evalite.dev/og-image.jpg", + content: "https://evalu.health/og-image.jpg", }, }, { @@ -47,7 +47,7 @@ export default defineConfig({ tag: "meta", attrs: { property: "og:image:alt", - content: "Evalite Logo", + content: "Evaluhealth Logo", }, }, { @@ -61,7 +61,7 @@ export default defineConfig({ tag: "meta", attrs: { name: "twitter:image", - content: "https://evalite.dev/og-image.jpg", + content: "https://evalu.health/og-image.jpg", }, }, { @@ -82,8 +82,8 @@ export default defineConfig({ }, ], social: { - github: "https://github.com/mattpocock/evalite", - discord: "https://mattpocock.com/ai-discord", + github: "https://github.com/kernelius-hq/evaluhealth", + discord: "https://kernelius.com/discord", }, logo: { light: "./src/assets/logo-light.svg", @@ -94,8 +94,8 @@ export default defineConfig({ label: "Getting Started", items: [ { - label: "What Is Evalite?", - slug: "what-is-evalite", + label: "What Is Evaluhealth?", + slug: "what-is-evaluhealth", }, { label: "Quickstart", diff --git a/apps/evalite-docs/package.json b/apps/evaluhealth-docs/package.json similarity index 91% rename from apps/evalite-docs/package.json rename to apps/evaluhealth-docs/package.json index 448c8355..86ec77f9 100644 --- a/apps/evalite-docs/package.json +++ b/apps/evaluhealth-docs/package.json @@ -1,5 +1,5 @@ { - "name": "evalite-docs", + "name": "evaluhealth-docs", "type": "module", "private": true, "version": "0.0.1", diff --git a/apps/evalite-docs/public/favicon.ico b/apps/evaluhealth-docs/public/favicon.ico similarity index 100% rename from apps/evalite-docs/public/favicon.ico rename to apps/evaluhealth-docs/public/favicon.ico diff --git a/apps/evalite-docs/public/hero.webp b/apps/evaluhealth-docs/public/hero.webp similarity index 100% rename from apps/evalite-docs/public/hero.webp rename to apps/evaluhealth-docs/public/hero.webp diff --git a/apps/evalite-docs/public/og-image.jpg b/apps/evaluhealth-docs/public/og-image.jpg similarity index 100% rename from apps/evalite-docs/public/og-image.jpg rename to apps/evaluhealth-docs/public/og-image.jpg diff --git a/apps/evalite-docs/src/assets/houston.webp b/apps/evaluhealth-docs/src/assets/houston.webp similarity index 100% rename from apps/evalite-docs/src/assets/houston.webp rename to apps/evaluhealth-docs/src/assets/houston.webp diff --git a/apps/evalite-docs/src/assets/logo-dark.svg b/apps/evaluhealth-docs/src/assets/logo-dark.svg similarity index 100% rename from apps/evalite-docs/src/assets/logo-dark.svg rename to apps/evaluhealth-docs/src/assets/logo-dark.svg diff --git a/apps/evalite-docs/src/assets/logo-light.svg b/apps/evaluhealth-docs/src/assets/logo-light.svg similarity index 100% rename from apps/evalite-docs/src/assets/logo-light.svg rename to apps/evaluhealth-docs/src/assets/logo-light.svg diff --git a/apps/evalite-docs/src/components/Banner.astro b/apps/evaluhealth-docs/src/components/Banner.astro similarity index 78% rename from apps/evalite-docs/src/components/Banner.astro rename to apps/evaluhealth-docs/src/components/Banner.astro index 7a86e3e8..41ed8de8 100644 --- a/apps/evalite-docs/src/components/Banner.astro +++ b/apps/evaluhealth-docs/src/components/Banner.astro @@ -7,11 +7,11 @@ import type { Props } from "@astrojs/starlight/props"; role="banner" aria-label="v1 beta announcement" > - The beta version of Evalite v1 is now available! Install with pnpm add evalite@betapnpm add evaluhealth@beta • View beta docs → diff --git a/apps/evalite-docs/src/content/config.ts b/apps/evaluhealth-docs/src/content/config.ts similarity index 100% rename from apps/evalite-docs/src/content/config.ts rename to apps/evaluhealth-docs/src/content/config.ts diff --git a/apps/evalite-docs/src/content/docs/examples/ai-sdk.md b/apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md similarity index 85% rename from apps/evalite-docs/src/content/docs/examples/ai-sdk.md rename to apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md index 215e5ea0..9de3120a 100644 --- a/apps/evalite-docs/src/content/docs/examples/ai-sdk.md +++ b/apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md @@ -16,10 +16,10 @@ You can use the `traceAISDKModel` function to trace the calls to the AI SDK: import { openai } from "@ai-sdk/openai"; import { streamText } from "ai"; import { Factuality, Levenshtein } from "autoevals"; -import { evalite } from "evalite"; -import { traceAISDKModel } from "evalite/ai-sdk"; +import { evaluhealth } from "evaluhealth"; +import { traceAISDKModel } from "evaluhealth/ai-sdk"; -evalite("Test Capitals", { +evaluhealth("Test Capitals", { data: async () => [ { input: `What's the capital of France?`, @@ -50,9 +50,9 @@ evalite("Test Capitals", { ## Testing Whole Conversations -You can also pass messages to the `input` property of the eval. To get autocomplete, you can pass the `CoreMessage` type to the `evalite` function as a type argument. +You can also pass messages to the `input` property of the eval. To get autocomplete, you can pass the `CoreMessage` type to the `evaluhealth` function as a type argument. -The three type parameters for `evalite` are: +The three type parameters for `evaluhealth` are: - The type of the input - The type of the output @@ -64,10 +64,10 @@ The three type parameters for `evalite` are: import { openai } from "@ai-sdk/openai"; import { streamText, type CoreMessage } from "ai"; import { Levenshtein } from "autoevals"; -import { evalite } from "evalite"; -import { traceAISDKModel } from "evalite/ai-sdk"; +import { evaluhealth } from "evaluhealth"; +import { traceAISDKModel } from "evaluhealth/ai-sdk"; -evalite("Test Capitals", { +evaluhealth("Test Capitals", { data: async () => [ { input: [ diff --git a/apps/evalite-docs/src/content/docs/guides/ci.mdx b/apps/evaluhealth-docs/src/content/docs/guides/ci.mdx similarity index 76% rename from apps/evalite-docs/src/content/docs/guides/ci.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/ci.mdx index a0ab56ec..4e6f1341 100644 --- a/apps/evalite-docs/src/content/docs/guides/ci.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/ci.mdx @@ -2,7 +2,7 @@ title: CI/CD --- -Evalite integrates seamlessly into CI/CD pipelines, allowing you to validate LLM-powered features as part of your automated testing workflow. +Evaluhealth integrates seamlessly into CI/CD pipelines, allowing you to validate LLM-powered features as part of your automated testing workflow. ## Static UI Export @@ -11,29 +11,29 @@ Export eval results as a static HTML bundle for viewing in CI artifacts without ### Basic Usage ```bash -evalite export +evaluhealth export ``` -Exports latest full run to `./evalite-export` directory. +Exports latest full run to `./evaluhealth-export` directory. ### Options **Custom output directory:** ```bash -evalite export --output=./my-export +evaluhealth export --output=./my-export ``` **Export specific run:** ```bash -evalite export --run-id=123 +evaluhealth export --run-id=123 ``` **Custom base path for non-root hosting:** ```bash -evalite export --basePath=/evals-123 +evaluhealth export --basePath=/evals-123 ``` Use when hosting at subpaths (e.g., S3/CloudFront with path-based URLs). The base path must start with `/`. @@ -52,7 +52,7 @@ Generated bundle contains: **Local preview:** ```bash -npx serve -s ./evalite-export +npx serve -s ./evaluhealth-export ``` **Static hosting:** Upload to artifact.ci, S3, GitHub Pages, etc. @@ -81,15 +81,15 @@ jobs: - name: Run evaluations env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: npx evalite --threshold=70 + run: npx evaluhealth --threshold=70 - name: Export UI - run: npx evalite export --output=./ui-export + run: npx evaluhealth export --output=./ui-export - name: Upload static UI uses: actions/upload-artifact@v3 with: - name: evalite-ui + name: evaluhealth-ui path: ui-export ``` @@ -103,7 +103,7 @@ Deploy to S3/CloudFront with unique paths per run: - name: Export UI with base path run: | RUN_PATH="/evals-${{ github.run_id }}" - npx evalite export --basePath="$RUN_PATH" --output=./ui-export + npx evaluhealth export --basePath="$RUN_PATH" --output=./ui-export - name: Upload to S3 run: | @@ -115,11 +115,11 @@ To test locally with base path: ```bash # Export with base path -evalite export --basePath=/evals-123 +evaluhealth export --basePath=/evals-123 # Create matching directory structure mkdir -p /tmp/test/evals-123 -cp -r evalite-export/* /tmp/test/evals-123/ +cp -r evaluhealth-export/* /tmp/test/evals-123/ # Serve and visit http://localhost:3000/evals-123 npx serve /tmp/test @@ -127,10 +127,10 @@ npx serve /tmp/test ## Running on CI -Run Evalite in run-once mode (default): +Run Evaluhealth in run-once mode (default): ```bash -evalite +evaluhealth ``` Executes all evals and exits. @@ -140,7 +140,7 @@ Executes all evals and exits. Fail CI builds if scores fall below threshold: ```bash -evalite --threshold=70 +evaluhealth --threshold=70 ``` Exits with code 1 if average score < 70. @@ -150,7 +150,7 @@ Exits with code 1 if average score < 70. For programmatic analysis, export raw JSON: ```bash -evalite --outputPath=./results.json +evaluhealth --outputPath=./results.json ``` ### Export Format @@ -158,9 +158,9 @@ evalite --outputPath=./results.json Typed hierarchical structure: ```typescript -import type { Evalite } from "evalite"; +import type { Evaluhealth } from "evaluhealth"; -type Output = Evalite.Exported.Output; +type Output = Evaluhealth.Exported.Output; ``` Contains: diff --git a/apps/evalite-docs/src/content/docs/guides/cli.mdx b/apps/evaluhealth-docs/src/content/docs/guides/cli.mdx similarity index 70% rename from apps/evalite-docs/src/content/docs/guides/cli.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/cli.mdx index b581c2a4..1cae6770 100644 --- a/apps/evalite-docs/src/content/docs/guides/cli.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/cli.mdx @@ -4,10 +4,10 @@ title: CLI ## Watch Mode -You can run Evalite in watch mode by running `evalite watch`: +You can run Evaluhealth in watch mode by running `evaluhealth watch`: ```bash -evalite watch +evaluhealth watch ``` This will watch for changes to your `.eval.ts` files and re-run the evals when they change. @@ -21,7 +21,7 @@ This will watch for changes to your `.eval.ts` files and re-run the evals when t When debugging with `console.log`, the detailed table output can make it harder to see your logs. You can hide it with `--hideTable`: ```bash -evalite watch --hideTable +evaluhealth watch --hideTable ``` This keeps the score summary but removes the detailed results table from the CLI output. @@ -31,38 +31,38 @@ This keeps the score summary but removes the detailed results table from the CLI You can run evals once and serve the UI without re-running on file changes: ```bash -evalite serve +evaluhealth serve ``` This runs your evals once and keeps the UI server running at `http://localhost:3006`. Unlike watch mode, tests won't re-run when files change. Since evals can take a while to run, this can be a useful alternative to watch mode. -To re-run evals after making changes, restart `evalite serve`. +To re-run evals after making changes, restart `evaluhealth serve`. ## Running Specific Files You can run specific files by passing them as arguments: ```bash -evalite my-eval.eval.ts +evaluhealth my-eval.eval.ts ``` This also works for `watch` and `serve` modes: ```bash -evalite watch my-eval.eval.ts -evalite serve my-eval.eval.ts +evaluhealth watch my-eval.eval.ts +evaluhealth serve my-eval.eval.ts ``` ## Threshold -You can tell Evalite that your evals must pass a specific score by passing `--threshold`: +You can tell Evaluhealth that your evals must pass a specific score by passing `--threshold`: ```bash -evalite --threshold=50 # Score must be greater than or equal to 50 +evaluhealth --threshold=50 # Score must be greater than or equal to 50 -evalite watch --threshold=70 # Also works in watch mode +evaluhealth watch --threshold=70 # Also works in watch mode ``` This is useful for running on CI. If the score threshold is not met, it will fail the process. @@ -72,10 +72,10 @@ This is useful for running on CI. If the score threshold is not met, it will fai Export eval results as a static HTML bundle: ```bash -evalite export +evaluhealth export ``` -This exports the latest run to `./evalite-export` by default. +This exports the latest run to `./evaluhealth-export` by default. ### Options @@ -84,7 +84,7 @@ This exports the latest run to `./evalite-export` by default. - `--basePath` - Base path for non-root hosting (must start with `/`) ```bash -evalite export --basePath=/evals-123 --output=./my-export +evaluhealth export --basePath=/evals-123 --output=./my-export ``` See the [CI/CD guide](/guides/ci) for full documentation on exporting and viewing static UI bundles. diff --git a/apps/evalite-docs/src/content/docs/guides/configuration.mdx b/apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx similarity index 64% rename from apps/evalite-docs/src/content/docs/guides/configuration.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx index 2c4ca11f..07f956f7 100644 --- a/apps/evalite-docs/src/content/docs/guides/configuration.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx @@ -4,15 +4,15 @@ title: Configuration import { Steps } from "@astrojs/starlight/components"; -Since **Evalite is based on Vitest**, you can configure eval behavior using Vitest's configuration options. Each data point in your eval becomes a separate Vitest test case, which means all Vitest configuration options work with Evalite. +Since **Evaluhealth is based on Vitest**, you can configure eval behavior using Vitest's configuration options. Each data point in your eval becomes a separate Vitest test case, which means all Vitest configuration options work with Evaluhealth. -## Evalite Configuration +## Evaluhealth Configuration -You can configure Evalite-specific options using `evalite.config.ts`: +You can configure Evaluhealth-specific options using `evaluhealth.config.ts`: ```ts -// evalite.config.ts -import { defineConfig } from "evalite/config"; +// evaluhealth.config.ts +import { defineConfig } from "evaluhealth/config"; export default defineConfig({ testTimeout: 60000, // 60 seconds @@ -31,7 +31,7 @@ export default defineConfig({ - **`maxConcurrency`**: Maximum number of test cases to run in parallel. Default is 5. - **`scoreThreshold`**: Minimum average score (0-100). Process exits with code 1 if average score falls below this threshold. - **`hideTable`**: Hide the results table in terminal output. Default is false. -- **`server.port`**: Port for the Evalite UI server. Default is 3006. +- **`server.port`**: Port for the Evaluhealth UI server. Default is 3006. - **`trialCount`**: Number of times to run each test case. Default is 1. Useful for measuring variance in non-deterministic evaluations. - **`setupFiles`**: Array of file paths to run before tests (e.g., for loading environment variables). @@ -41,11 +41,11 @@ export default defineConfig({ Control how many test cases run in parallel. Default is 5. -Configure in `evalite.config.ts`: +Configure in `evaluhealth.config.ts`: ```ts -// evalite.config.ts -import { defineConfig } from "evalite/config"; +// evaluhealth.config.ts +import { defineConfig } from "evaluhealth/config"; export default defineConfig({ maxConcurrency: 100, // Run up to 100 tests in parallel @@ -56,13 +56,13 @@ This is useful for optimizing performance and managing API rate limits. ### `testTimeout` -Set the maximum time (in milliseconds) a test can run before timing out. Default is 30000ms in Evalite. +Set the maximum time (in milliseconds) a test can run before timing out. Default is 30000ms in Evaluhealth. -Configure in `evalite.config.ts`: +Configure in `evaluhealth.config.ts`: ```ts -// evalite.config.ts -import { defineConfig } from "evalite/config"; +// evaluhealth.config.ts +import { defineConfig } from "evaluhealth/config"; export default defineConfig({ testTimeout: 60000, // 60 seconds @@ -73,21 +73,21 @@ export default defineConfig({ Run each test case multiple times to measure variance in non-deterministic evaluations. -Configure globally in `evalite.config.ts`: +Configure globally in `evaluhealth.config.ts`: ```ts -// evalite.config.ts -import { defineConfig } from "evalite/config"; +// evaluhealth.config.ts +import { defineConfig } from "evaluhealth/config"; export default defineConfig({ trialCount: 3, // Run each test case 3 times }); ``` -Or override per-eval in the `evalite()` call: +Or override per-eval in the `evaluhealth()` call: ```ts -evalite("Non-deterministic eval", { +evaluhealth("Non-deterministic eval", { data: () => [{ input: "Alice", expected: "Alice" }], task: async (input) => { // Non-deterministic task @@ -100,4 +100,4 @@ evalite("Non-deterministic eval", { }); ``` -Note: Per-eval `trialCount` overrides `evalite.config.ts` if both are present. +Note: Per-eval `trialCount` overrides `evaluhealth.config.ts` if both are present. diff --git a/apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx b/apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx similarity index 88% rename from apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx index 78ad14c6..a1a33bb0 100644 --- a/apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx @@ -6,18 +6,18 @@ import { Aside } from "@astrojs/starlight/components"; ## Creating Custom Columns -By default, the Evalite UI renders the input, expected and output columns: +By default, the Evaluhealth UI renders the input, expected and output columns: | Input | Expected | Output | | ------------------------ | --------------------------- | ---------------- | | `input` passed to `data` | `expected` passed to `data` | Result of `task` | -You can customize the columns shown by the UI by passing a `columns` attribute to the `evalite` function: +You can customize the columns shown by the UI by passing a `columns` attribute to the `evaluhealth` function: ```ts -import { evalite } from "evalite"; +import { evaluhealth } from "evaluhealth"; -evalite("My Eval", { +evaluhealth("My Eval", { data: [{ input: { a: 1, b: 2, c: 3, theOnlyPropertyWeWantToShow: "Hello" } }], task: async (input) => { return input.theOnlyPropertyWeWantToShow + " World!"; @@ -49,10 +49,10 @@ This will show two columns: The `columns` function also receives the computed `scores` and `traces` arrays, allowing you to display scorer results and trace information: ```ts -import { evalite } from "evalite"; -import { reportTrace } from "evalite/traces"; +import { evaluhealth } from "evaluhealth"; +import { reportTrace } from "evaluhealth/traces"; -evalite("My Eval", { +evaluhealth("My Eval", { data: [{ input: "test", expected: "TEST" }], task: async (input) => { reportTrace({ diff --git a/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx b/apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx similarity index 85% rename from apps/evalite-docs/src/content/docs/guides/environment-variables.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx index 24e15b49..75c5f899 100644 --- a/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx @@ -30,12 +30,12 @@ To call your LLM from a third-party service, you'll likely need some environment pnpm add -D dotenv ``` -4. Add an `evalite.config.ts` file: +4. Add an `evaluhealth.config.ts` file: ```ts - // evalite.config.ts + // evaluhealth.config.ts - import { defineConfig } from "evalite/config"; + import { defineConfig } from "evaluhealth/config"; export default defineConfig({ setupFiles: ["dotenv/config"], diff --git a/apps/evalite-docs/src/content/docs/guides/multi-modal.mdx b/apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx similarity index 70% rename from apps/evalite-docs/src/content/docs/guides/multi-modal.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx index 8f747424..ae3a5d1b 100644 --- a/apps/evalite-docs/src/content/docs/guides/multi-modal.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx @@ -4,11 +4,11 @@ title: Multi-Modal import { Aside } from "@astrojs/starlight/components"; -Evalite can handle not just text responses, but media like images, audio, and video. +Evaluhealth can handle not just text responses, but media like images, audio, and video. ## Files In Memory -A common way to work with media in Evalite is to read it into memory. +A common way to work with media in Evaluhealth is to read it into memory. ### What Are Files In Memory? @@ -36,15 +36,15 @@ It doesn't matter what the file extension is - when you read it into memory, it' just refer to them as `Uint8Array` objects. -### Evalite And Files In Memory +### Evaluhealth And Files In Memory -Evalite can automatically detect `Uint8Array` objects in your evals and handle them for you. +Evaluhealth can automatically detect `Uint8Array` objects in your evals and handle them for you. ```ts -import { evalite } from "evalite"; -import { reportTrace } from "evalite/traces"; +import { evaluhealth } from "evaluhealth"; +import { reportTrace } from "evaluhealth/traces"; -evalite("My Eval", { +evaluhealth("My Eval", { data: [ { // 1. In inputs... @@ -76,21 +76,21 @@ evalite("My Eval", { }); ``` -When Evalite finds a `Uint8Array`, it saves the file to a local cache, in `./node_modules/.evalite/files`. +When Evaluhealth finds a `Uint8Array`, it saves the file to a local cache, in `./node_modules/.evaluhealth/files`. Then in the UI, it'll reference that local file. ## Files On Disk -If you're working with files on disk, you can use the `EvaliteFile.fromPath` method to reference them: +If you're working with files on disk, you can use the `EvaluhealthFile.fromPath` method to reference them: ```ts -import { EvaliteFile, evalite } from "evalite"; +import { EvaluhealthFile, evaluhealth } from "evaluhealth"; -evalite("My Eval", { +evaluhealth("My Eval", { data: [ { - input: EvaliteFile.fromPath("path/to/file.jpg"), + input: EvaluhealthFile.fromPath("path/to/file.jpg"), }, ], task: async (input) => { diff --git a/apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx b/apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx similarity index 76% rename from apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx index 53252ea1..7371654b 100644 --- a/apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx @@ -2,16 +2,16 @@ title: Running Programmatically --- -You can run Evalite programmatically using the Node API. This is useful when you want to integrate Evalite into your own scripts, CI/CD pipelines, or custom tooling. +You can run Evaluhealth programmatically using the Node API. This is useful when you want to integrate Evaluhealth into your own scripts, CI/CD pipelines, or custom tooling. ## Basic Usage -Import the `runEvalite` function from `evalite/runner`: +Import the `runEvaluhealth` function from `evaluhealth/runner`: ```typescript -import { runEvalite } from "evalite/runner"; +import { runEvaluhealth } from "evaluhealth/runner"; -await runEvalite({ +await runEvaluhealth({ mode: "run-once-and-exit", }); ``` @@ -25,17 +25,17 @@ That's it! The `path` and `cwd` parameters are optional and default to running a This mode runs all evals once and exits. It's ideal for CI/CD pipelines: ```typescript -await runEvalite({ +await runEvaluhealth({ mode: "run-once-and-exit", }); ``` ### Watch Mode -This mode watches for file changes and re-runs evals automatically. It also starts the Evalite UI server: +This mode watches for file changes and re-runs evals automatically. It also starts the Evaluhealth UI server: ```typescript -await runEvalite({ +await runEvaluhealth({ mode: "watch-for-file-changes", }); ``` @@ -51,7 +51,7 @@ await runEvalite({ Optional path filter to run specific eval files. Defaults to `undefined` (runs all evals): ```typescript -await runEvalite({ +await runEvaluhealth({ path: "my-eval.eval.ts", mode: "run-once-and-exit", }); @@ -62,7 +62,7 @@ await runEvalite({ The working directory to run evals from. Defaults to `process.cwd()`: ```typescript -await runEvalite({ +await runEvaluhealth({ cwd: "/path/to/my/project", mode: "run-once-and-exit", }); @@ -73,7 +73,7 @@ await runEvalite({ Set a minimum score threshold (0-100). If the average score falls below this threshold, the process will exit with a non-zero exit code: ```typescript -await runEvalite({ +await runEvaluhealth({ mode: "run-once-and-exit", scoreThreshold: 80, // Fail if score is below 80 }); @@ -86,7 +86,7 @@ This is particularly useful for CI/CD pipelines where you want to fail the build Export the results to a JSON file after the run completes: ```typescript -await runEvalite({ +await runEvaluhealth({ mode: "run-once-and-exit", outputPath: "./results.json", }); @@ -99,14 +99,14 @@ The exported JSON file contains the complete run data including all evals, resul Here's a complete example that combines multiple options: ```typescript -import { runEvalite } from "evalite/runner"; +import { runEvaluhealth } from "evaluhealth/runner"; async function runEvals() { try { - await runEvalite({ + await runEvaluhealth({ mode: "run-once-and-exit", scoreThreshold: 75, // Fail if average score < 75 - outputPath: "./evalite-results.json", // Export results + outputPath: "./evaluhealth-results.json", // Export results }); console.log("All evals passed!"); } catch (error) { diff --git a/apps/evalite-docs/src/content/docs/guides/scorers.mdx b/apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx similarity index 93% rename from apps/evalite-docs/src/content/docs/guides/scorers.mdx rename to apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx index 5ff16b80..806a2b81 100644 --- a/apps/evalite-docs/src/content/docs/guides/scorers.mdx +++ b/apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx @@ -13,9 +13,9 @@ Scorers are used to score the output of your LLM call. If you don't need your scorer to be reusable, you can define it inline. ```ts -import { evalite } from "evalite"; +import { evaluhealth } from "evaluhealth"; -evalite("My Eval", { +evaluhealth("My Eval", { data: [{ input: "Hello" }], task: async (input) => { return input + " World!"; @@ -37,7 +37,7 @@ evalite("My Eval", { If you have a scorer you want to use across multiple files, you can use `createScorer` to create a reusable scorer. ```ts -import { createScorer } from "evalite"; +import { createScorer } from "evaluhealth"; const containsParis = createScorer({ name: "Contains Paris", @@ -47,7 +47,7 @@ const containsParis = createScorer({ }, }); -evalite("My Eval", { +evaluhealth("My Eval", { data: [{ input: "Hello" }], task: async (input) => { return input + " World!"; @@ -56,14 +56,14 @@ evalite("My Eval", { }); ``` -The `name` and `description` of the scorer will be displayed in the Evalite UI. +The `name` and `description` of the scorer will be displayed in the Evaluhealth UI. ## Score Properties The `score` function receives three properties on the object passed: ```ts -import { createScorer } from "evalite"; +import { createScorer } from "evaluhealth"; const containsParis = createScorer({ name: "Contains Paris", @@ -80,7 +80,7 @@ const containsParis = createScorer({ These are typed using the three type arguments passed to `createScorer`: ```ts -import { createScorer } from "evalite"; +import { createScorer } from "evaluhealth"; const containsParis = createScorer< string, // Type of 'input' @@ -103,7 +103,7 @@ If `expected` is omitted, it will be inferred from the type of `output`. You can provide metadata along with your custom scorer: ```ts -import { createScorer } from "evalite"; +import { createScorer } from "evaluhealth"; const containsParis = createScorer({ name: "Contains Paris", @@ -119,7 +119,7 @@ const containsParis = createScorer({ }); ``` -This will be visible along with the score in the Evalite UI. +This will be visible along with the score in the Evaluhealth UI.