From 73df42676ba77b005121e9b89f51d4c4a2468a02 Mon Sep 17 00:00:00 2001
From: z8 <1234yamd@gmail.com>
Date: Sat, 20 Dec 2025 09:58:28 +0200
Subject: [PATCH 1/5] Refactor evaluhealth package and update documentation
- Renamed evalite to evaluhealth in various files and configurations.
- Added new files and updated existing ones in the evaluhealth-docs directory.
- Removed evalite-related files and configurations.
- Updated package.json and README files to reflect the changes.
- Added .gitignore entries for evaluhealth-specific files.
This commit enhances the structure and clarity of the evaluhealth package, ensuring consistency across documentation and codebase.
---
.changeset/config.json | 7 +-
.changeset/tasty-parents-bathe.md | 4 +-
.github/workflows/preview.yml | 2 +-
.prettierignore | 6 +-
CLAUDE.md | 94 +-
CONTRIBUTING.md | 16 +-
apps/evalite-ui/README.md | 15 -
.../.gitignore | 0
.../CLAUDE.md | 0
.../README.md | 0
.../astro.config.mts | 20 +-
.../package.json | 2 +-
.../public/favicon.ico | Bin
.../public/hero.webp | Bin
.../public/og-image.jpg | Bin
.../src/assets/houston.webp | Bin
.../src/assets/logo-dark.svg | 0
.../src/assets/logo-light.svg | 0
.../src/components/Banner.astro | 6 +-
.../src/content/config.ts | 0
.../src/content/docs/examples/ai-sdk.md | 16 +-
.../src/content/docs/guides/ci.mdx | 38 +-
.../src/content/docs/guides/cli.mdx | 28 +-
.../src/content/docs/guides/configuration.mdx | 38 +-
.../docs/guides/customizing-the-ui.mdx | 14 +-
.../docs/guides/environment-variables.mdx | 6 +-
.../src/content/docs/guides/multi-modal.mdx | 24 +-
.../docs/guides/running-programmatically.mdx | 28 +-
.../src/content/docs/guides/scorers.mdx | 22 +-
.../src/content/docs/guides/skipping.mdx | 6 +-
.../src/content/docs/guides/streams.md | 6 +-
.../src/content/docs/guides/traces.mdx | 12 +-
.../docs/guides/variant-comparison.mdx | 10 +-
.../src/content/docs/index.mdx | 4 +-
.../src/content/docs/quickstart.mdx | 22 +-
.../src/content/docs/what-is-evaluhealth.mdx} | 16 +-
.../src/env.d.ts | 0
.../tsconfig.json | 0
.../.eslintrc.cjs | 0
.../{evalite-ui => evaluhealth-ui}/.gitignore | 0
apps/evaluhealth-ui/README.md | 15 +
.../app/components/display-input.tsx | 20 +-
.../app/components/logo.tsx | 2 +-
.../app/components/page-layout.tsx | 0
.../app/components/score.tsx | 4 +-
.../app/components/ui/breadcrumb.tsx | 0
.../app/components/ui/button.tsx | 0
.../app/components/ui/chart.tsx | 0
.../app/components/ui/copy-button.tsx | 0
.../app/components/ui/input.tsx | 0
.../app/components/ui/line-chart.tsx | 0
.../app/components/ui/live-date.tsx | 0
.../app/components/ui/separator.tsx | 0
.../app/components/ui/sheet.tsx | 0
.../app/components/ui/sidebar.tsx | 0
.../app/components/ui/skeleton.tsx | 0
.../app/components/ui/table.tsx | 0
.../app/components/ui/tooltip.tsx | 0
.../app/data/queries.ts | 0
.../app/data/use-subscribe-to-socket.ts | 8 +-
.../app/hooks/use-mobile.ts | 0
.../app/hooks/use-mobile.tsx | 0
.../app/hooks/use-server-state-utils.ts | 4 +-
.../app/lib/utils.ts | 0
.../app/main.tsx | 2 +-
.../app/routes/$.tsx | 2 +-
.../app/routes/__root.tsx | 6 +-
.../routes/eval.$name.result.$resultIndex.tsx | 6 +-
.../app/routes/eval.$name.tsx | 18 +-
.../{evalite-ui => evaluhealth-ui}/app/sdk.ts | 33 +-
.../app/tailwind.css | 0
.../app/utils.test.ts | 0
.../app/utils.ts | 4 +-
.../components.json | 0
.../eslint.config.js | 0
.../{evalite-ui => evaluhealth-ui}/index.html | 0
.../package.json | 6 +-
.../public/assets/favicon.svg | 0
.../tsconfig.json | 0
.../tsr.config.json | 0
.../vite.config.ts | 0
package.json | 20 +-
packages/evalite-tests/.gitignore | 2 -
.../fixtures/config-precedence/vite.config.ts | 8 -
packages/evalite/src/constants.ts | 2 -
packages/evalite/src/index.ts | 4 -
packages/evaluhealth-tests/.gitignore | 2 +
.../CHANGELOG.md | 30 +-
.../package.json | 6 +-
.../tests/ai-sdk-traces.test.ts | 0
.../tests/basics.test.ts | 2 +-
.../tests/columns-with-scores-traces.test.ts | 0
.../tests/columns.test.ts | 0
.../tests/config.test.ts | 6 +-
.../tests/custom-scorer.test.ts | 2 +-
.../tests/export-static.test.ts | 26 +-
.../tests/failing.test.ts | 4 +-
.../tests/files.test.ts | 18 +-
.../ai-sdk-traces-stream/traces.eval.ts | 6 +-
.../fixtures/ai-sdk-traces/traces.eval.ts | 6 +-
.../tests/fixtures/basics}/basics.eval.ts | 4 +-
.../columns-with-scores-traces.eval.ts | 6 +-
.../tests/fixtures/columns/columns.eval.ts | 4 +-
.../fixtures/config-includes}/basics.eval.ts | 4 +-
.../fixtures/config-includes/vite.config.ts | 0
.../config-precedence/evaluhealth.config.ts} | 2 +-
.../fixtures/config-precedence/test.eval.ts | 4 +-
.../fixtures/config-precedence/vite.config.ts | 8 +
.../config-setupfiles/evaluhealth.config.ts} | 2 +-
.../tests/fixtures/config-setupfiles/setup.ts | 0
.../fixtures/config-setupfiles/test.eval.ts | 4 +-
.../fixtures/custom-scorer/index.eval.ts | 4 +-
.../evaluhealth-config}/basics.eval.ts | 4 +-
.../evaluhealth-config/evaluhealth.config.ts} | 2 +-
.../basics.eval.ts | 4 +-
.../evaluhealth.config.ts} | 2 +-
.../experimental_columns/files-4.eval.ts | 6 +-
.../fixtures/experimental_columns/test.png | Bin
.../tests/fixtures/export/export.eval.ts | 16 +-
.../tests/fixtures/export/test.png | Bin
.../failing-test-in-data/failing-test.eval.ts | 4 +-
.../failing-test}/failing-test.eval.ts | 4 +-
.../fixtures/failing-test/vite.config.ts | 0
.../tests/fixtures/files/files-1.eval.ts | 6 +-
.../tests/fixtures/files/files-2.eval.ts | 6 +-
.../tests/fixtures/files/files-3.eval.ts | 6 +-
.../tests/fixtures/files/files-4.eval.ts | 6 +-
.../tests/fixtures/files/test.png | Bin
.../fixtures/long-text/long-text.eval.ts | 4 +-
.../module-level-error.eval.ts | 4 +-
.../module-level-error/vite.config.ts | 0
.../fixtures/much-data/much-data.eval.ts | 4 +-
.../tests/fixtures/multi/multi-1.eval.ts | 4 +-
.../tests/fixtures/multi/multi-2.eval.ts | 4 +-
.../tests/fixtures/multi/multi-3.eval.ts | 6 +-
.../fixtures/no-scorers/no-scorers.eval.ts | 4 +-
.../non-serializable-data.eval.ts | 4 +-
.../tests/fixtures/objects/objects.eval.ts | 4 +-
.../only-flag-multiple.eval.ts | 4 +-
.../only-flag-none/only-flag-none.eval.ts | 4 +-
.../only-flag-single/only-flag-single.eval.ts | 4 +-
.../only-flag-variants.eval.ts | 4 +-
.../fixtures/paths/should-not-run.eval.ts | 4 +-
.../tests/fixtures/paths/should-run.eval.ts | 4 +-
.../polymorphic-data/polymorphic.eval.ts | 6 +-
.../tests/fixtures/stream/file.txt | 0
.../tests/fixtures/stream/stream.eval.ts | 4 +-
.../test-modifiers.eval.ts | 8 +-
.../test-modifiers.eval.ts | 6 +-
.../tests/fixtures/threshold/basics.eval.ts | 4 +-
.../tests/fixtures/timeout/timeout.eval.ts | 4 +-
.../tests/fixtures/timeout/vite.config.ts | 0
.../tests/fixtures/traces/traces.eval.ts | 6 +-
.../trial-count-config/evaluhealth.config.ts} | 2 +-
.../fixtures/trial-count-config/test.eval.ts | 4 +-
.../evaluhealth.config.ts} | 2 +-
.../trial-count-precedence/test.eval.ts | 4 +-
.../fixtures/trial-count/trial-count.eval.ts | 4 +-
.../tests/fixtures/variants/variants.eval.ts | 4 +-
.../watch-mode-fail}/failing-test.eval.ts | 4 +-
.../tests/long-text.test.ts | 0
.../tests/much-data.test.ts | 0
.../tests/multi.test.ts | 0
.../tests/no-scorers.test.ts | 0
.../tests/non-serializable-data.test.ts | 0
.../tests/objects.test.ts | 0
.../tests/only-flag.test.ts | 0
.../tests/output-path.test.ts | 2 +-
.../tests/paths.test.ts | 0
.../tests/polymorphic-data.test.ts | 0
.../tests/stream.test.ts | 0
.../tests/test-modifiers.test.ts | 2 +-
.../tests/test-utils.ts | 21 +-
.../tests/threshold.test.ts | 0
.../tests/timeout.test.ts | 0
.../tests/traces.test.ts | 0
.../tests/trial-count.test.ts | 4 +-
.../tests/variants.test.ts | 0
.../tests/watch-mode.test.ts | 0
.../tsconfig.json | 0
.../vitest.config.ts | 0
packages/{evalite => evaluhealth}/.gitignore | 2 +-
.../{evalite => evaluhealth}/CHANGELOG.md | 82 +-
.../{evalite => evaluhealth}/package.json | 14 +-
packages/{evalite => evaluhealth}/readme.md | 26 +-
.../{evalite => evaluhealth}/src/ai-sdk.ts | 0
.../src/backend-only-constants.ts | 0
packages/{evalite => evaluhealth}/src/bin.ts | 0
.../src/command.test.ts | 20 +-
.../{evalite => evaluhealth}/src/command.ts | 20 +-
.../{evalite => evaluhealth}/src/config.ts | 34 +-
packages/evaluhealth/src/constants.ts | 2 +
.../src/create-scorer.ts | 8 +-
.../src/evaluhealth.ts} | 62 +-
.../src/export-static.ts | 105 +-
packages/evaluhealth/src/index.ts | 4 +
.../{evalite => evaluhealth}/src/reporter.ts | 28 +-
.../src/reporter/EvaluhealthRunner.ts} | 30 +-
.../src/reporter/events.ts | 24 +-
.../src/reporter/rendering.ts | 24 +-
.../src/run-evaluhealth.ts} | 42 +-
packages/{evalite => evaluhealth}/src/sdk.ts | 34 +-
.../{evalite => evaluhealth}/src/server.ts | 24 +-
.../src/storage/in-memory.ts | 74 +-
.../src/storage/sqlite.ts | 100 +-
.../src/storage/storage.test.ts | 2 +-
.../src/storage/test-utils.ts | 6 +-
.../src/storage/utils.ts | 4 +-
.../{evalite => evaluhealth}/src/traces.ts | 10 +-
.../{evalite => evaluhealth}/src/types.ts | 100 +-
.../{evalite => evaluhealth}/src/utils.ts | 20 +-
.../src/write-file-queue-local-storage.ts | 0
.../tsconfig-warning/index.d.ts | 8 +-
.../tsconfig-warning/index.js | 4 +-
.../{evalite => evaluhealth}/tsconfig.json | 0
packages/example/.gitignore | 4 +-
packages/example/CHANGELOG.md | 18 +-
...valite.config.ts => evaluhealth.config.ts} | 2 +-
packages/example/package.json | 4 +-
.../example/src/content-generation.eval.ts | 6 +-
packages/example/src/example.2.eval.ts | 6 +-
packages/example/src/example.eval.ts | 6 +-
packages/example/src/fail.eval.ts | 4 +-
packages/example/src/files.eval.ts | 6 +-
packages/example/src/issue-123.eval.ts | 4 +-
packages/example/src/long.eval.ts | 4 +-
packages/example/src/no-scorers.eval.ts | 4 +-
packages/example/src/object.eval.ts | 4 +-
packages/example/src/traces.eval.ts | 6 +-
packages/example/src/trial-count.eval.ts | 4 +-
packages/example/src/variants.eval.ts | 6 +-
pnpm-lock.yaml | 1107 ++---------------
readme.md | 12 +-
233 files changed, 1142 insertions(+), 1975 deletions(-)
delete mode 100644 apps/evalite-ui/README.md
rename apps/{evalite-docs => evaluhealth-docs}/.gitignore (100%)
rename apps/{evalite-docs => evaluhealth-docs}/CLAUDE.md (100%)
rename apps/{evalite-docs => evaluhealth-docs}/README.md (100%)
rename apps/{evalite-docs => evaluhealth-docs}/astro.config.mts (87%)
rename apps/{evalite-docs => evaluhealth-docs}/package.json (91%)
rename apps/{evalite-docs => evaluhealth-docs}/public/favicon.ico (100%)
rename apps/{evalite-docs => evaluhealth-docs}/public/hero.webp (100%)
rename apps/{evalite-docs => evaluhealth-docs}/public/og-image.jpg (100%)
rename apps/{evalite-docs => evaluhealth-docs}/src/assets/houston.webp (100%)
rename apps/{evalite-docs => evaluhealth-docs}/src/assets/logo-dark.svg (100%)
rename apps/{evalite-docs => evaluhealth-docs}/src/assets/logo-light.svg (100%)
rename apps/{evalite-docs => evaluhealth-docs}/src/components/Banner.astro (78%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/config.ts (100%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/examples/ai-sdk.md (85%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/ci.mdx (76%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/cli.mdx (70%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/configuration.mdx (64%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/customizing-the-ui.mdx (88%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/environment-variables.mdx (85%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/multi-modal.mdx (70%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/running-programmatically.mdx (76%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/scorers.mdx (93%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/skipping.mdx (82%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/streams.md (63%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/traces.mdx (82%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/guides/variant-comparison.mdx (86%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/index.mdx (84%)
rename apps/{evalite-docs => evaluhealth-docs}/src/content/docs/quickstart.mdx (75%)
rename apps/{evalite-docs/src/content/docs/what-is-evalite.mdx => evaluhealth-docs/src/content/docs/what-is-evaluhealth.mdx} (76%)
rename apps/{evalite-docs => evaluhealth-docs}/src/env.d.ts (100%)
rename apps/{evalite-docs => evaluhealth-docs}/tsconfig.json (100%)
rename apps/{evalite-ui => evaluhealth-ui}/.eslintrc.cjs (100%)
rename apps/{evalite-ui => evaluhealth-ui}/.gitignore (100%)
create mode 100644 apps/evaluhealth-ui/README.md
rename apps/{evalite-ui => evaluhealth-ui}/app/components/display-input.tsx (94%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/logo.tsx (98%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/page-layout.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/score.tsx (96%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/breadcrumb.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/button.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/chart.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/copy-button.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/input.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/line-chart.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/live-date.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/separator.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/sheet.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/sidebar.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/skeleton.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/table.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/components/ui/tooltip.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/data/queries.ts (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/data/use-subscribe-to-socket.ts (76%)
rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-mobile.ts (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-mobile.tsx (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/hooks/use-server-state-utils.ts (79%)
rename apps/{evalite-ui => evaluhealth-ui}/app/lib/utils.ts (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/main.tsx (95%)
rename apps/{evalite-ui => evaluhealth-ui}/app/routes/$.tsx (94%)
rename apps/{evalite-ui => evaluhealth-ui}/app/routes/__root.tsx (97%)
rename apps/{evalite-ui => evaluhealth-ui}/app/routes/eval.$name.result.$resultIndex.tsx (98%)
rename apps/{evalite-ui => evaluhealth-ui}/app/routes/eval.$name.tsx (97%)
rename apps/{evalite-ui => evaluhealth-ui}/app/sdk.ts (79%)
rename apps/{evalite-ui => evaluhealth-ui}/app/tailwind.css (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/utils.test.ts (100%)
rename apps/{evalite-ui => evaluhealth-ui}/app/utils.ts (85%)
rename apps/{evalite-ui => evaluhealth-ui}/components.json (100%)
rename apps/{evalite-ui => evaluhealth-ui}/eslint.config.js (100%)
rename apps/{evalite-ui => evaluhealth-ui}/index.html (100%)
rename apps/{evalite-ui => evaluhealth-ui}/package.json (92%)
rename apps/{evalite-ui => evaluhealth-ui}/public/assets/favicon.svg (100%)
rename apps/{evalite-ui => evaluhealth-ui}/tsconfig.json (100%)
rename apps/{evalite-ui => evaluhealth-ui}/tsr.config.json (100%)
rename apps/{evalite-ui => evaluhealth-ui}/vite.config.ts (100%)
delete mode 100644 packages/evalite-tests/.gitignore
delete mode 100644 packages/evalite-tests/tests/fixtures/config-precedence/vite.config.ts
delete mode 100644 packages/evalite/src/constants.ts
delete mode 100644 packages/evalite/src/index.ts
create mode 100644 packages/evaluhealth-tests/.gitignore
rename packages/{evalite-tests => evaluhealth-tests}/CHANGELOG.md (66%)
rename packages/{evalite-tests => evaluhealth-tests}/package.json (73%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/ai-sdk-traces.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/basics.test.ts (97%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/columns-with-scores-traces.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/columns.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/config.test.ts (82%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/custom-scorer.test.ts (97%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/export-static.test.ts (90%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/failing.test.ts (95%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/files.test.ts (87%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/ai-sdk-traces-stream/traces.eval.ts (89%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/ai-sdk-traces/traces.eval.ts (88%)
rename packages/{evalite-tests/tests/fixtures/config-includes => evaluhealth-tests/tests/fixtures/basics}/basics.eval.ts (84%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/columns-with-scores-traces/columns-with-scores-traces.eval.ts (94%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/columns/columns.eval.ts (89%)
rename packages/{evalite-tests/tests/fixtures/basics => evaluhealth-tests/tests/fixtures/config-includes}/basics.eval.ts (84%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-includes/vite.config.ts (100%)
rename packages/{evalite-tests/tests/fixtures/config-precedence/evalite.config.ts => evaluhealth-tests/tests/fixtures/config-precedence/evaluhealth.config.ts} (60%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-precedence/test.eval.ts (72%)
create mode 100644 packages/evaluhealth-tests/tests/fixtures/config-precedence/vite.config.ts
rename packages/{evalite-tests/tests/fixtures/config-setupfiles/evalite.config.ts => evaluhealth-tests/tests/fixtures/config-setupfiles/evaluhealth.config.ts} (56%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-setupfiles/setup.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/config-setupfiles/test.eval.ts (77%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/custom-scorer/index.eval.ts (85%)
rename packages/{evalite-tests/tests/fixtures/evalite-config => evaluhealth-tests/tests/fixtures/evaluhealth-config}/basics.eval.ts (78%)
rename packages/{evalite-tests/tests/fixtures/evalite-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/evaluhealth-config/evaluhealth.config.ts} (71%)
rename packages/{evalite-tests/tests/fixtures/evalite-timeout-config => evaluhealth-tests/tests/fixtures/evaluhealth-timeout-config}/basics.eval.ts (81%)
rename packages/{evalite-tests/tests/fixtures/evalite-timeout-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/evaluhealth-timeout-config/evaluhealth.config.ts} (51%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/experimental_columns/files-4.eval.ts (76%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/experimental_columns/test.png (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/export/export.eval.ts (57%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/export/test.png (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/failing-test-in-data/failing-test.eval.ts (77%)
rename packages/{evalite-tests/tests/fixtures/watch-mode-fail => evaluhealth-tests/tests/fixtures/failing-test}/failing-test.eval.ts (81%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/failing-test/vite.config.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-1.eval.ts (70%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-2.eval.ts (74%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-3.eval.ts (76%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/files-4.eval.ts (76%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/files/test.png (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/long-text/long-text.eval.ts (94%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/module-level-error/module-level-error.eval.ts (73%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/module-level-error/vite.config.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/much-data/much-data.eval.ts (89%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-1.eval.ts (83%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-2.eval.ts (83%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/multi/multi-3.eval.ts (86%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/no-scorers/no-scorers.eval.ts (81%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/non-serializable-data/non-serializable-data.eval.ts (87%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/objects/objects.eval.ts (86%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-multiple/only-flag-multiple.eval.ts (86%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-none/only-flag-none.eval.ts (82%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-single/only-flag-single.eval.ts (82%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/only-flag-variants/only-flag-variants.eval.ts (89%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/paths/should-not-run.eval.ts (83%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/paths/should-run.eval.ts (83%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/polymorphic-data/polymorphic.eval.ts (82%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/stream/file.txt (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/stream/stream.eval.ts (87%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/test-modifiers-regular/test-modifiers.eval.ts (81%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/test-modifiers-skipped/test-modifiers.eval.ts (82%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/threshold/basics.eval.ts (76%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/timeout/timeout.eval.ts (83%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/timeout/vite.config.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/traces/traces.eval.ts (81%)
rename packages/{evalite-tests/tests/fixtures/trial-count-config/evalite.config.ts => evaluhealth-tests/tests/fixtures/trial-count-config/evaluhealth.config.ts} (50%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count-config/test.eval.ts (55%)
rename packages/{evalite-tests/tests/fixtures/trial-count-precedence/evalite.config.ts => evaluhealth-tests/tests/fixtures/trial-count-precedence/evaluhealth.config.ts} (50%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count-precedence/test.eval.ts (67%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/trial-count/trial-count.eval.ts (78%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/fixtures/variants/variants.eval.ts (87%)
rename packages/{evalite-tests/tests/fixtures/failing-test => evaluhealth-tests/tests/fixtures/watch-mode-fail}/failing-test.eval.ts (81%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/long-text.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/much-data.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/multi.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/no-scorers.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/non-serializable-data.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/objects.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/only-flag.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/output-path.test.ts (98%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/paths.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/polymorphic-data.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/stream.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/test-modifiers.test.ts (93%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/test-utils.ts (87%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/threshold.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/timeout.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/traces.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/trial-count.test.ts (97%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/variants.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tests/watch-mode.test.ts (100%)
rename packages/{evalite-tests => evaluhealth-tests}/tsconfig.json (100%)
rename packages/{evalite-tests => evaluhealth-tests}/vitest.config.ts (100%)
rename packages/{evalite => evaluhealth}/.gitignore (71%)
rename packages/{evalite => evaluhealth}/CHANGELOG.md (78%)
rename packages/{evalite => evaluhealth}/package.json (83%)
rename packages/{evalite => evaluhealth}/readme.md (54%)
rename packages/{evalite => evaluhealth}/src/ai-sdk.ts (100%)
rename packages/{evalite => evaluhealth}/src/backend-only-constants.ts (100%)
rename packages/{evalite => evaluhealth}/src/bin.ts (100%)
rename packages/{evalite => evaluhealth}/src/command.test.ts (90%)
rename packages/{evalite => evaluhealth}/src/command.ts (92%)
rename packages/{evalite => evaluhealth}/src/config.ts (59%)
create mode 100644 packages/evaluhealth/src/constants.ts
rename packages/{evalite => evaluhealth}/src/create-scorer.ts (73%)
rename packages/{evalite/src/evalite.ts => evaluhealth/src/evaluhealth.ts} (87%)
rename packages/{evalite => evaluhealth}/src/export-static.ts (83%)
create mode 100644 packages/evaluhealth/src/index.ts
rename packages/{evalite => evaluhealth}/src/reporter.ts (93%)
rename packages/{evalite/src/reporter/EvaliteRunner.ts => evaluhealth/src/reporter/EvaluhealthRunner.ts} (92%)
rename packages/{evalite => evaluhealth}/src/reporter/events.ts (61%)
rename packages/{evalite => evaluhealth}/src/reporter/rendering.ts (93%)
rename packages/{evalite/src/run-evalite.ts => evaluhealth/src/run-evaluhealth.ts} (90%)
rename packages/{evalite => evaluhealth}/src/sdk.ts (71%)
rename packages/{evalite => evaluhealth}/src/server.ts (95%)
rename packages/{evalite => evaluhealth}/src/storage/in-memory.ts (81%)
rename packages/{evalite => evaluhealth}/src/storage/sqlite.ts (85%)
rename packages/{evalite => evaluhealth}/src/storage/storage.test.ts (99%)
rename packages/{evalite => evaluhealth}/src/storage/test-utils.ts (87%)
rename packages/{evalite => evaluhealth}/src/storage/utils.ts (92%)
rename packages/{evalite => evaluhealth}/src/traces.ts (66%)
rename packages/{evalite => evaluhealth}/src/types.ts (86%)
rename packages/{evalite => evaluhealth}/src/utils.ts (72%)
rename packages/{evalite => evaluhealth}/src/write-file-queue-local-storage.ts (100%)
rename packages/{evalite => evaluhealth}/tsconfig-warning/index.d.ts (79%)
rename packages/{evalite => evaluhealth}/tsconfig-warning/index.js (88%)
rename packages/{evalite => evaluhealth}/tsconfig.json (100%)
rename packages/example/{evalite.config.ts => evaluhealth.config.ts} (57%)
diff --git a/.changeset/config.json b/.changeset/config.json
index 3d5c3b31..83119579 100644
--- a/.changeset/config.json
+++ b/.changeset/config.json
@@ -6,5 +6,10 @@
"linked": [],
"access": "public",
"baseBranch": "main",
- "ignore": ["example", "evalite-tests", "evalite-docs", "evalite-ui"]
+ "ignore": [
+ "example",
+ "evaluhealth-tests",
+ "evaluhealth-docs",
+ "evaluhealth-ui"
+ ]
}
diff --git a/.changeset/tasty-parents-bathe.md b/.changeset/tasty-parents-bathe.md
index 720c7c61..bfeb2572 100644
--- a/.changeset/tasty-parents-bathe.md
+++ b/.changeset/tasty-parents-bathe.md
@@ -1,5 +1,5 @@
---
-"evalite-ui": patch
+"evaluhealth-ui": patch
---
-Use window.location.origin for WebSocket and BASE_URL to allow opening evalite running on remote server
+Use window.location.origin for WebSocket and BASE_URL to allow opening evaluhealth running on remote server
diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml
index f3be05f8..f199e037 100644
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -24,4 +24,4 @@ jobs:
- run: pnpm build
- name: Publish preview releases
- run: pnpx pkg-pr-new publish packages/evalite
+ run: pnpx pkg-pr-new publish packages/evaluhealth
diff --git a/.prettierignore b/.prettierignore
index 95e4ab7c..99fcce93 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -3,12 +3,12 @@ dist
.turbo
*.tsbuildinfo
coverage
-.evalite
-evalite.db
+.evaluhealth
+evaluhealth.db
pnpm-lock.yaml
**/tests/playground/**
build
vite.config.ts.timestamp*
*.d.ts
*.gen.ts
-**/evalite-export
\ No newline at end of file
+**/evaluhealth-export
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
index c4bab832..f705ef56 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,15 +4,15 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Overview
-Evalite is a TypeScript-native, local-first tool for testing LLM-powered apps built on Vitest. It allows developers to write evaluations (evals) as `.eval.ts` files that run like tests.
+Evaluhealth is a TypeScript-native, local-first tool for testing LLM-powered apps built on Vitest. It allows developers to write evaluations (evals) as `.eval.ts` files that run like tests.
## Configuration
-The primary configuration method is `evalite.config.ts`. While `vitest.config.ts` is still supported for backward compatibility, it is not documented and `evalite.config.ts` should be used for all configuration needs.
+The primary configuration method is `evaluhealth.config.ts`. While `vitest.config.ts` is still supported for backward compatibility, it is not documented and `evaluhealth.config.ts` should be used for all configuration needs.
## Development Commands
-**Development mode** (recommended for working on Evalite itself):
+**Development mode** (recommended for working on Evaluhealth itself):
```bash
pnpm run dev
@@ -20,8 +20,8 @@ pnpm run dev
This runs:
-- TypeScript type checker on `evalite` package
-- Tests in `evalite-tests` package
+- TypeScript type checker on `evaluhealth` package
+- Tests in `evaluhealth-tests` package
- Live reload for both packages
**Build all packages**:
@@ -30,7 +30,7 @@ This runs:
pnpm build
```
-This builds `evalite` package first, then `evalite-ui`, copying UI assets to `packages/evalite/dist/ui`.
+This builds `evaluhealth` package first, then `evaluhealth-ui`, copying UI assets to `packages/evaluhealth/dist/ui`.
**Run CI pipeline** (build, test, lint):
@@ -42,20 +42,20 @@ pnpm ci
```bash
pnpm run example
-# Or: cd packages/example && pnpm evalite watch
+# Or: cd packages/example && pnpm evaluhealth watch
```
**Run single package tests**:
```bash
-cd packages/evalite && pnpm test
-cd packages/evalite-tests && pnpm test
+cd packages/evaluhealth && pnpm test
+cd packages/evaluhealth-tests && pnpm test
```
**Lint a package**:
```bash
-cd packages/evalite && pnpm lint
+cd packages/evaluhealth && pnpm lint
```
## Working with pnpm Filters
@@ -67,64 +67,64 @@ When working on specific packages in this monorepo, **use pnpm's `--filter` flag
**Build a specific package**:
```bash
-pnpm --filter evalite build
-pnpm --filter evalite-ui build
+pnpm --filter evaluhealth build
+pnpm --filter evaluhealth-ui build
```
**Run tests for a specific package**:
```bash
-pnpm --filter evalite-tests test
+pnpm --filter evaluhealth-tests test
```
**Run dev mode for a specific package**:
```bash
-pnpm --filter evalite dev
-pnpm --filter evalite-ui dev
+pnpm --filter evaluhealth dev
+pnpm --filter evaluhealth-ui dev
```
**Lint a specific package**:
```bash
-pnpm --filter evalite lint
-pnpm --filter evalite-tests lint
+pnpm --filter evaluhealth lint
+pnpm --filter evaluhealth-tests lint
```
### Filter Patterns
pnpm supports several filter patterns:
-- `--filter evalite` - Run task for the `evalite` package only
-- `--filter evalite...` - Run task for `evalite` and all its dependencies
-- `--filter ...evalite` - Run task for `evalite` and all packages that depend on it
+- `--filter evaluhealth` - Run task for the `evaluhealth` package only
+- `--filter evaluhealth...` - Run task for `evaluhealth` and all its dependencies
+- `--filter ...evaluhealth` - Run task for `evaluhealth` and all packages that depend on it
- `--filter "./packages/*"` - Run task for all packages in the packages directory
-- `--filter "!evalite"` - Run task for all packages except `evalite`
+- `--filter "!evaluhealth"` - Run task for all packages except `evaluhealth`
### Examples for Common Workflows
-**Working on the main evalite package**:
+**Working on the main evaluhealth package**:
```bash
-# Build evalite and watch for changes
-pnpm --filter evalite dev
+# Build evaluhealth and watch for changes
+pnpm --filter evaluhealth dev
# Run tests after making changes
-pnpm --filter evalite test
+pnpm --filter evaluhealth test
```
**Working on the UI**:
```bash
-# Build evalite first, then start UI dev server
-pnpm run build:evalite && pnpm --filter evalite-ui dev
+# Build evaluhealth first, then start UI dev server
+pnpm run build:evaluhealth && pnpm --filter evaluhealth-ui dev
```
**Working on integration tests**:
```bash
-# Ensure evalite is built before running tests
-pnpm run build && pnpm --filter evalite-tests test
+# Ensure evaluhealth is built before running tests
+pnpm run build && pnpm --filter evaluhealth-tests test
```
### When to Use Filters
@@ -138,7 +138,7 @@ pnpm run build && pnpm --filter evalite-tests test
**Direct package commands are fine for**:
- Quick one-off commands (like `pnpm install`)
-- Running the evalite CLI itself (e.g., `cd packages/example && pnpm evalite watch`)
+- Running the evaluhealth CLI itself (e.g., `cd packages/example && pnpm evaluhealth watch`)
- When already in the package directory
## Architecture
@@ -147,21 +147,21 @@ pnpm run build && pnpm --filter evalite-tests test
This is a pnpm workspace:
-- **`packages/evalite`**: Main package that users install. Exports the `evalite()` function, CLI binary (`evalite`), server, database layer, and utilities. Built with TypeScript.
+- **`packages/evaluhealth`**: Main package that users install. Exports the `evaluhealth()` function, CLI binary (`evaluhealth`), server, database layer, and utilities. Built with TypeScript.
-- **`packages/evalite-core`**: Shared core utilities (currently appears to be deprecated or minimal)
+- **`packages/evaluhealth-core`**: Shared core utilities (currently appears to be deprecated or minimal)
-- **`packages/evalite-tests`**: Integration tests for evalite functionality
+- **`packages/evaluhealth-tests`**: Integration tests for evaluhealth functionality
- **`packages/example`**: Example eval files demonstrating usage patterns (e.g., `example.eval.ts`, `traces.eval.ts`)
-- **`apps/evalite-ui`**: React-based web UI that displays eval results. Built with Vite, TanStack Router, and Tailwind. Gets copied to `packages/evalite/dist/ui` during build via the `after-build` script.
+- **`apps/evaluhealth-ui`**: React-based web UI that displays eval results. Built with Vite, TanStack Router, and Tailwind. Gets copied to `packages/evaluhealth/dist/ui` during build via the `after-build` script.
-- **`apps/evalite-docs`**: Documentation site
+- **`apps/evaluhealth-docs`**: Documentation site
### Core Concepts
-**Eval files**: Files matching `*.eval.ts` (or `.eval.mts`) that contain `evalite()` calls. These define:
+**Eval files**: Files matching `*.eval.ts` (or `.eval.mts`) that contain `evaluhealth()` calls. These define:
- A dataset (via `data()` function returning input/expected pairs)
- A task (the LLM interaction to test)
@@ -170,37 +170,37 @@ This is a pnpm workspace:
**Execution flow**:
-1. The `evalite` CLI uses Vitest under the hood to discover and run `*.eval.ts` files
+1. The `evaluhealth` CLI uses Vitest under the hood to discover and run `*.eval.ts` files
2. Each eval creates a Vitest `describe` block with concurrent `it` tests for each data point
-3. Results are stored in a SQLite database (`evalite.db`)
+3. Results are stored in a SQLite database (`evaluhealth.db`)
4. A Fastify server serves the UI and provides WebSocket updates during runs
-5. Files (images, audio, etc.) are saved to `.evalite` directory
+5. Files (images, audio, etc.) are saved to `.evaluhealth` directory
**Key architecture points**:
- Uses Vitest's `inject("cwd")` to get the working directory
- Supports async iterables (streaming) from tasks via `executeTask()`
-- Files in input/output/expected are automatically detected and saved using `createEvaliteFileIfNeeded()`
+- Files in input/output/expected are automatically detected and saved using `createEvaluhealthFileIfNeeded()`
- Traces can be reported via `reportTraceLocalStorage` for nested LLM calls
-- Integrates with AI SDK via `evalite/ai-sdk` export (provides `traceAISDKModel()`)
+- Integrates with AI SDK via `evaluhealth/ai-sdk` export (provides `traceAISDKModel()`)
### Database Layer
-SQLite database (`evalite.db`) stores:
+SQLite database (`evaluhealth.db`) stores:
- Runs (full or partial)
- Evals (distinct eval names with metadata)
- Results (individual test case results with scores, traces, columns)
- Scores and traces are stored as JSON
-Key queries in `packages/evalite/src/db.ts`:
+Key queries in `packages/evaluhealth/src/db.ts`:
- `getEvals()`, `getResults()`, `getScores()`, `getTraces()`
- `getMostRecentRun()`, `getPreviousCompletedEval()`
### Server & UI
-The Fastify server in `packages/evalite/src/server.ts`:
+The Fastify server in `packages/evaluhealth/src/server.ts`:
- Serves the UI from `dist/ui/`
- Provides REST API at `/api/*` (menu-items, server-state, evals, results, etc.)
@@ -208,11 +208,11 @@ The Fastify server in `packages/evalite/src/server.ts`:
## Important Notes
-**Linking for local development**: If you need to test the global `evalite` command locally:
+**Linking for local development**: If you need to test the global `evaluhealth` command locally:
```bash
pnpm build
-cd packages/evalite && npm link
+cd packages/evaluhealth && npm link
```
**Node version**: Requires Node.js >= 22
@@ -235,7 +235,7 @@ The format of the file should be:
```md
---
-"evalite": patch
+"evaluhealth": patch
---
Description of the change.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9ebaa921..7f5aa203 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to Evalite
+# Contributing to Evaluhealth
## Development Commands
@@ -7,25 +7,25 @@ This monorepo uses pnpm workspaces (no Turbo). All scripts use `pnpm --filter` t
**Available scripts:**
```bash
-pnpm run dev # Build evalite + UI, then run tsc -w on evalite + vitest on evalite-tests
-pnpm run example # Build, then run evalite watch + UI dev server (parallel)
-pnpm run test # Build, then run tests on evalite + evalite-tests
-pnpm run build # Build evalite, then evalite-ui
+pnpm run dev # Build evaluhealth + UI, then run tsc -w on evaluhealth + vitest on evaluhealth-tests
+pnpm run example # Build, then run evaluhealth watch + UI dev server (parallel)
+pnpm run test # Build, then run tests on evaluhealth + evaluhealth-tests
+pnpm run build # Build evaluhealth, then evaluhealth-ui
pnpm run ci # Full CI: build, test, lint, check-format
```
**Individual package scripts:**
```bash
-pnpm build:evalite # Build evalite package only
-pnpm build:evalite-ui # Build UI and copy to evalite/dist/ui
+pnpm build:evaluhealth # Build evaluhealth package only
+pnpm build:evaluhealth-ui # Build UI and copy to evaluhealth/dist/ui
```
**Setup:**
1. Create `.env` in `packages/example` with `OPENAI_API_KEY=your-key`
2. Run `pnpm install`
-3. For global `evalite` command: `pnpm build && cd packages/evalite && npm link`
+3. For global `evaluhealth` command: `pnpm build && cd packages/evaluhealth && npm link`
## Styling Guidelines
diff --git a/apps/evalite-ui/README.md b/apps/evalite-ui/README.md
deleted file mode 100644
index e8f05ecf..00000000
--- a/apps/evalite-ui/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Evalite UI
-
-This is the UI for Evalite. It is built with React, Vite, Tanstack Router, and Tailwind CSS.
-
-## Development
-
-From the root of the monorepo, run:
-
-```bash
-pnpm run dev
-```
-
-## Deployment
-
-The UI is copied into the `evalite` package during the `after-build` script in the root `package.json`.
diff --git a/apps/evalite-docs/.gitignore b/apps/evaluhealth-docs/.gitignore
similarity index 100%
rename from apps/evalite-docs/.gitignore
rename to apps/evaluhealth-docs/.gitignore
diff --git a/apps/evalite-docs/CLAUDE.md b/apps/evaluhealth-docs/CLAUDE.md
similarity index 100%
rename from apps/evalite-docs/CLAUDE.md
rename to apps/evaluhealth-docs/CLAUDE.md
diff --git a/apps/evalite-docs/README.md b/apps/evaluhealth-docs/README.md
similarity index 100%
rename from apps/evalite-docs/README.md
rename to apps/evaluhealth-docs/README.md
diff --git a/apps/evalite-docs/astro.config.mts b/apps/evaluhealth-docs/astro.config.mts
similarity index 87%
rename from apps/evalite-docs/astro.config.mts
rename to apps/evaluhealth-docs/astro.config.mts
index 1ce51671..5dfbe76e 100644
--- a/apps/evalite-docs/astro.config.mts
+++ b/apps/evaluhealth-docs/astro.config.mts
@@ -5,28 +5,28 @@ import starlight from "@astrojs/starlight";
export default defineConfig({
integrations: [
starlight({
- title: "Evalite",
+ title: "Evaluhealth",
favicon: "/favicon.ico",
components: {
Banner: "./src/components/Banner.astro",
},
editLink: {
baseUrl:
- "https://github.com/mattpocock/evalite/edit/main/apps/evalite-docs",
+ "https://github.com/kernelius-hq/evaluhealth/edit/main/apps/evaluhealth-docs",
},
head: [
{
tag: "meta",
attrs: {
property: "og:url",
- content: "https://evalite.dev",
+ content: "https://evalu.health",
},
},
{
tag: "meta",
attrs: {
property: "og:image",
- content: "https://evalite.dev/og-image.jpg",
+ content: "https://evalu.health/og-image.jpg",
},
},
{
@@ -47,7 +47,7 @@ export default defineConfig({
tag: "meta",
attrs: {
property: "og:image:alt",
- content: "Evalite Logo",
+ content: "Evaluhealth Logo",
},
},
{
@@ -61,7 +61,7 @@ export default defineConfig({
tag: "meta",
attrs: {
name: "twitter:image",
- content: "https://evalite.dev/og-image.jpg",
+ content: "https://evalu.health/og-image.jpg",
},
},
{
@@ -82,8 +82,8 @@ export default defineConfig({
},
],
social: {
- github: "https://github.com/mattpocock/evalite",
- discord: "https://mattpocock.com/ai-discord",
+ github: "https://github.com/kernelius-hq/evaluhealth",
+ discord: "https://kernelius.com/discord",
},
logo: {
light: "./src/assets/logo-light.svg",
@@ -94,8 +94,8 @@ export default defineConfig({
label: "Getting Started",
items: [
{
- label: "What Is Evalite?",
- slug: "what-is-evalite",
+ label: "What Is Evaluhealth?",
+ slug: "what-is-evaluhealth",
},
{
label: "Quickstart",
diff --git a/apps/evalite-docs/package.json b/apps/evaluhealth-docs/package.json
similarity index 91%
rename from apps/evalite-docs/package.json
rename to apps/evaluhealth-docs/package.json
index 448c8355..86ec77f9 100644
--- a/apps/evalite-docs/package.json
+++ b/apps/evaluhealth-docs/package.json
@@ -1,5 +1,5 @@
{
- "name": "evalite-docs",
+ "name": "evaluhealth-docs",
"type": "module",
"private": true,
"version": "0.0.1",
diff --git a/apps/evalite-docs/public/favicon.ico b/apps/evaluhealth-docs/public/favicon.ico
similarity index 100%
rename from apps/evalite-docs/public/favicon.ico
rename to apps/evaluhealth-docs/public/favicon.ico
diff --git a/apps/evalite-docs/public/hero.webp b/apps/evaluhealth-docs/public/hero.webp
similarity index 100%
rename from apps/evalite-docs/public/hero.webp
rename to apps/evaluhealth-docs/public/hero.webp
diff --git a/apps/evalite-docs/public/og-image.jpg b/apps/evaluhealth-docs/public/og-image.jpg
similarity index 100%
rename from apps/evalite-docs/public/og-image.jpg
rename to apps/evaluhealth-docs/public/og-image.jpg
diff --git a/apps/evalite-docs/src/assets/houston.webp b/apps/evaluhealth-docs/src/assets/houston.webp
similarity index 100%
rename from apps/evalite-docs/src/assets/houston.webp
rename to apps/evaluhealth-docs/src/assets/houston.webp
diff --git a/apps/evalite-docs/src/assets/logo-dark.svg b/apps/evaluhealth-docs/src/assets/logo-dark.svg
similarity index 100%
rename from apps/evalite-docs/src/assets/logo-dark.svg
rename to apps/evaluhealth-docs/src/assets/logo-dark.svg
diff --git a/apps/evalite-docs/src/assets/logo-light.svg b/apps/evaluhealth-docs/src/assets/logo-light.svg
similarity index 100%
rename from apps/evalite-docs/src/assets/logo-light.svg
rename to apps/evaluhealth-docs/src/assets/logo-light.svg
diff --git a/apps/evalite-docs/src/components/Banner.astro b/apps/evaluhealth-docs/src/components/Banner.astro
similarity index 78%
rename from apps/evalite-docs/src/components/Banner.astro
rename to apps/evaluhealth-docs/src/components/Banner.astro
index 7a86e3e8..41ed8de8 100644
--- a/apps/evalite-docs/src/components/Banner.astro
+++ b/apps/evaluhealth-docs/src/components/Banner.astro
@@ -7,11 +7,11 @@ import type { Props } from "@astrojs/starlight/props";
role="banner"
aria-label="v1 beta announcement"
>
- The beta version of Evalite v1 is now available! Install with pnpm add evalite@betapnpm add evaluhealth@beta • View beta docs →
diff --git a/apps/evalite-docs/src/content/config.ts b/apps/evaluhealth-docs/src/content/config.ts
similarity index 100%
rename from apps/evalite-docs/src/content/config.ts
rename to apps/evaluhealth-docs/src/content/config.ts
diff --git a/apps/evalite-docs/src/content/docs/examples/ai-sdk.md b/apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md
similarity index 85%
rename from apps/evalite-docs/src/content/docs/examples/ai-sdk.md
rename to apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md
index 215e5ea0..9de3120a 100644
--- a/apps/evalite-docs/src/content/docs/examples/ai-sdk.md
+++ b/apps/evaluhealth-docs/src/content/docs/examples/ai-sdk.md
@@ -16,10 +16,10 @@ You can use the `traceAISDKModel` function to trace the calls to the AI SDK:
import { openai } from "@ai-sdk/openai";
import { streamText } from "ai";
import { Factuality, Levenshtein } from "autoevals";
-import { evalite } from "evalite";
-import { traceAISDKModel } from "evalite/ai-sdk";
+import { evaluhealth } from "evaluhealth";
+import { traceAISDKModel } from "evaluhealth/ai-sdk";
-evalite("Test Capitals", {
+evaluhealth("Test Capitals", {
data: async () => [
{
input: `What's the capital of France?`,
@@ -50,9 +50,9 @@ evalite("Test Capitals", {
## Testing Whole Conversations
-You can also pass messages to the `input` property of the eval. To get autocomplete, you can pass the `CoreMessage` type to the `evalite` function as a type argument.
+You can also pass messages to the `input` property of the eval. To get autocomplete, you can pass the `CoreMessage` type to the `evaluhealth` function as a type argument.
-The three type parameters for `evalite` are:
+The three type parameters for `evaluhealth` are:
- The type of the input
- The type of the output
@@ -64,10 +64,10 @@ The three type parameters for `evalite` are:
import { openai } from "@ai-sdk/openai";
import { streamText, type CoreMessage } from "ai";
import { Levenshtein } from "autoevals";
-import { evalite } from "evalite";
-import { traceAISDKModel } from "evalite/ai-sdk";
+import { evaluhealth } from "evaluhealth";
+import { traceAISDKModel } from "evaluhealth/ai-sdk";
-evalite("Test Capitals", {
+evaluhealth("Test Capitals", {
data: async () => [
{
input: [
diff --git a/apps/evalite-docs/src/content/docs/guides/ci.mdx b/apps/evaluhealth-docs/src/content/docs/guides/ci.mdx
similarity index 76%
rename from apps/evalite-docs/src/content/docs/guides/ci.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/ci.mdx
index a0ab56ec..4e6f1341 100644
--- a/apps/evalite-docs/src/content/docs/guides/ci.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/ci.mdx
@@ -2,7 +2,7 @@
title: CI/CD
---
-Evalite integrates seamlessly into CI/CD pipelines, allowing you to validate LLM-powered features as part of your automated testing workflow.
+Evaluhealth integrates seamlessly into CI/CD pipelines, allowing you to validate LLM-powered features as part of your automated testing workflow.
## Static UI Export
@@ -11,29 +11,29 @@ Export eval results as a static HTML bundle for viewing in CI artifacts without
### Basic Usage
```bash
-evalite export
+evaluhealth export
```
-Exports latest full run to `./evalite-export` directory.
+Exports latest full run to `./evaluhealth-export` directory.
### Options
**Custom output directory:**
```bash
-evalite export --output=./my-export
+evaluhealth export --output=./my-export
```
**Export specific run:**
```bash
-evalite export --run-id=123
+evaluhealth export --run-id=123
```
**Custom base path for non-root hosting:**
```bash
-evalite export --basePath=/evals-123
+evaluhealth export --basePath=/evals-123
```
Use when hosting at subpaths (e.g., S3/CloudFront with path-based URLs). The base path must start with `/`.
@@ -52,7 +52,7 @@ Generated bundle contains:
**Local preview:**
```bash
-npx serve -s ./evalite-export
+npx serve -s ./evaluhealth-export
```
**Static hosting:** Upload to artifact.ci, S3, GitHub Pages, etc.
@@ -81,15 +81,15 @@ jobs:
- name: Run evaluations
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- run: npx evalite --threshold=70
+ run: npx evaluhealth --threshold=70
- name: Export UI
- run: npx evalite export --output=./ui-export
+ run: npx evaluhealth export --output=./ui-export
- name: Upload static UI
uses: actions/upload-artifact@v3
with:
- name: evalite-ui
+ name: evaluhealth-ui
path: ui-export
```
@@ -103,7 +103,7 @@ Deploy to S3/CloudFront with unique paths per run:
- name: Export UI with base path
run: |
RUN_PATH="/evals-${{ github.run_id }}"
- npx evalite export --basePath="$RUN_PATH" --output=./ui-export
+ npx evaluhealth export --basePath="$RUN_PATH" --output=./ui-export
- name: Upload to S3
run: |
@@ -115,11 +115,11 @@ To test locally with base path:
```bash
# Export with base path
-evalite export --basePath=/evals-123
+evaluhealth export --basePath=/evals-123
# Create matching directory structure
mkdir -p /tmp/test/evals-123
-cp -r evalite-export/* /tmp/test/evals-123/
+cp -r evaluhealth-export/* /tmp/test/evals-123/
# Serve and visit http://localhost:3000/evals-123
npx serve /tmp/test
@@ -127,10 +127,10 @@ npx serve /tmp/test
## Running on CI
-Run Evalite in run-once mode (default):
+Run Evaluhealth in run-once mode (default):
```bash
-evalite
+evaluhealth
```
Executes all evals and exits.
@@ -140,7 +140,7 @@ Executes all evals and exits.
Fail CI builds if scores fall below threshold:
```bash
-evalite --threshold=70
+evaluhealth --threshold=70
```
Exits with code 1 if average score < 70.
@@ -150,7 +150,7 @@ Exits with code 1 if average score < 70.
For programmatic analysis, export raw JSON:
```bash
-evalite --outputPath=./results.json
+evaluhealth --outputPath=./results.json
```
### Export Format
@@ -158,9 +158,9 @@ evalite --outputPath=./results.json
Typed hierarchical structure:
```typescript
-import type { Evalite } from "evalite";
+import type { Evaluhealth } from "evaluhealth";
-type Output = Evalite.Exported.Output;
+type Output = Evaluhealth.Exported.Output;
```
Contains:
diff --git a/apps/evalite-docs/src/content/docs/guides/cli.mdx b/apps/evaluhealth-docs/src/content/docs/guides/cli.mdx
similarity index 70%
rename from apps/evalite-docs/src/content/docs/guides/cli.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/cli.mdx
index b581c2a4..1cae6770 100644
--- a/apps/evalite-docs/src/content/docs/guides/cli.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/cli.mdx
@@ -4,10 +4,10 @@ title: CLI
## Watch Mode
-You can run Evalite in watch mode by running `evalite watch`:
+You can run Evaluhealth in watch mode by running `evaluhealth watch`:
```bash
-evalite watch
+evaluhealth watch
```
This will watch for changes to your `.eval.ts` files and re-run the evals when they change.
@@ -21,7 +21,7 @@ This will watch for changes to your `.eval.ts` files and re-run the evals when t
When debugging with `console.log`, the detailed table output can make it harder to see your logs. You can hide it with `--hideTable`:
```bash
-evalite watch --hideTable
+evaluhealth watch --hideTable
```
This keeps the score summary but removes the detailed results table from the CLI output.
@@ -31,38 +31,38 @@ This keeps the score summary but removes the detailed results table from the CLI
You can run evals once and serve the UI without re-running on file changes:
```bash
-evalite serve
+evaluhealth serve
```
This runs your evals once and keeps the UI server running at `http://localhost:3006`. Unlike watch mode, tests won't re-run when files change.
Since evals can take a while to run, this can be a useful alternative to watch mode.
-To re-run evals after making changes, restart `evalite serve`.
+To re-run evals after making changes, restart `evaluhealth serve`.
## Running Specific Files
You can run specific files by passing them as arguments:
```bash
-evalite my-eval.eval.ts
+evaluhealth my-eval.eval.ts
```
This also works for `watch` and `serve` modes:
```bash
-evalite watch my-eval.eval.ts
-evalite serve my-eval.eval.ts
+evaluhealth watch my-eval.eval.ts
+evaluhealth serve my-eval.eval.ts
```
## Threshold
-You can tell Evalite that your evals must pass a specific score by passing `--threshold`:
+You can tell Evaluhealth that your evals must pass a specific score by passing `--threshold`:
```bash
-evalite --threshold=50 # Score must be greater than or equal to 50
+evaluhealth --threshold=50 # Score must be greater than or equal to 50
-evalite watch --threshold=70 # Also works in watch mode
+evaluhealth watch --threshold=70 # Also works in watch mode
```
This is useful for running on CI. If the score threshold is not met, it will fail the process.
@@ -72,10 +72,10 @@ This is useful for running on CI. If the score threshold is not met, it will fai
Export eval results as a static HTML bundle:
```bash
-evalite export
+evaluhealth export
```
-This exports the latest run to `./evalite-export` by default.
+This exports the latest run to `./evaluhealth-export` by default.
### Options
@@ -84,7 +84,7 @@ This exports the latest run to `./evalite-export` by default.
- `--basePath` - Base path for non-root hosting (must start with `/`)
```bash
-evalite export --basePath=/evals-123 --output=./my-export
+evaluhealth export --basePath=/evals-123 --output=./my-export
```
See the [CI/CD guide](/guides/ci) for full documentation on exporting and viewing static UI bundles.
diff --git a/apps/evalite-docs/src/content/docs/guides/configuration.mdx b/apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx
similarity index 64%
rename from apps/evalite-docs/src/content/docs/guides/configuration.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx
index 2c4ca11f..07f956f7 100644
--- a/apps/evalite-docs/src/content/docs/guides/configuration.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/configuration.mdx
@@ -4,15 +4,15 @@ title: Configuration
import { Steps } from "@astrojs/starlight/components";
-Since **Evalite is based on Vitest**, you can configure eval behavior using Vitest's configuration options. Each data point in your eval becomes a separate Vitest test case, which means all Vitest configuration options work with Evalite.
+Since **Evaluhealth is based on Vitest**, you can configure eval behavior using Vitest's configuration options. Each data point in your eval becomes a separate Vitest test case, which means all Vitest configuration options work with Evaluhealth.
-## Evalite Configuration
+## Evaluhealth Configuration
-You can configure Evalite-specific options using `evalite.config.ts`:
+You can configure Evaluhealth-specific options using `evaluhealth.config.ts`:
```ts
-// evalite.config.ts
-import { defineConfig } from "evalite/config";
+// evaluhealth.config.ts
+import { defineConfig } from "evaluhealth/config";
export default defineConfig({
testTimeout: 60000, // 60 seconds
@@ -31,7 +31,7 @@ export default defineConfig({
- **`maxConcurrency`**: Maximum number of test cases to run in parallel. Default is 5.
- **`scoreThreshold`**: Minimum average score (0-100). Process exits with code 1 if average score falls below this threshold.
- **`hideTable`**: Hide the results table in terminal output. Default is false.
-- **`server.port`**: Port for the Evalite UI server. Default is 3006.
+- **`server.port`**: Port for the Evaluhealth UI server. Default is 3006.
- **`trialCount`**: Number of times to run each test case. Default is 1. Useful for measuring variance in non-deterministic evaluations.
- **`setupFiles`**: Array of file paths to run before tests (e.g., for loading environment variables).
@@ -41,11 +41,11 @@ export default defineConfig({
Control how many test cases run in parallel. Default is 5.
-Configure in `evalite.config.ts`:
+Configure in `evaluhealth.config.ts`:
```ts
-// evalite.config.ts
-import { defineConfig } from "evalite/config";
+// evaluhealth.config.ts
+import { defineConfig } from "evaluhealth/config";
export default defineConfig({
maxConcurrency: 100, // Run up to 100 tests in parallel
@@ -56,13 +56,13 @@ This is useful for optimizing performance and managing API rate limits.
### `testTimeout`
-Set the maximum time (in milliseconds) a test can run before timing out. Default is 30000ms in Evalite.
+Set the maximum time (in milliseconds) a test can run before timing out. Default is 30000ms in Evaluhealth.
-Configure in `evalite.config.ts`:
+Configure in `evaluhealth.config.ts`:
```ts
-// evalite.config.ts
-import { defineConfig } from "evalite/config";
+// evaluhealth.config.ts
+import { defineConfig } from "evaluhealth/config";
export default defineConfig({
testTimeout: 60000, // 60 seconds
@@ -73,21 +73,21 @@ export default defineConfig({
Run each test case multiple times to measure variance in non-deterministic evaluations.
-Configure globally in `evalite.config.ts`:
+Configure globally in `evaluhealth.config.ts`:
```ts
-// evalite.config.ts
-import { defineConfig } from "evalite/config";
+// evaluhealth.config.ts
+import { defineConfig } from "evaluhealth/config";
export default defineConfig({
trialCount: 3, // Run each test case 3 times
});
```
-Or override per-eval in the `evalite()` call:
+Or override per-eval in the `evaluhealth()` call:
```ts
-evalite("Non-deterministic eval", {
+evaluhealth("Non-deterministic eval", {
data: () => [{ input: "Alice", expected: "Alice" }],
task: async (input) => {
// Non-deterministic task
@@ -100,4 +100,4 @@ evalite("Non-deterministic eval", {
});
```
-Note: Per-eval `trialCount` overrides `evalite.config.ts` if both are present.
+Note: Per-eval `trialCount` overrides `evaluhealth.config.ts` if both are present.
diff --git a/apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx b/apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx
similarity index 88%
rename from apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx
index 78ad14c6..a1a33bb0 100644
--- a/apps/evalite-docs/src/content/docs/guides/customizing-the-ui.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/customizing-the-ui.mdx
@@ -6,18 +6,18 @@ import { Aside } from "@astrojs/starlight/components";
## Creating Custom Columns
-By default, the Evalite UI renders the input, expected and output columns:
+By default, the Evaluhealth UI renders the input, expected and output columns:
| Input | Expected | Output |
| ------------------------ | --------------------------- | ---------------- |
| `input` passed to `data` | `expected` passed to `data` | Result of `task` |
-You can customize the columns shown by the UI by passing a `columns` attribute to the `evalite` function:
+You can customize the columns shown by the UI by passing a `columns` attribute to the `evaluhealth` function:
```ts
-import { evalite } from "evalite";
+import { evaluhealth } from "evaluhealth";
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [{ input: { a: 1, b: 2, c: 3, theOnlyPropertyWeWantToShow: "Hello" } }],
task: async (input) => {
return input.theOnlyPropertyWeWantToShow + " World!";
@@ -49,10 +49,10 @@ This will show two columns:
The `columns` function also receives the computed `scores` and `traces` arrays, allowing you to display scorer results and trace information:
```ts
-import { evalite } from "evalite";
-import { reportTrace } from "evalite/traces";
+import { evaluhealth } from "evaluhealth";
+import { reportTrace } from "evaluhealth/traces";
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [{ input: "test", expected: "TEST" }],
task: async (input) => {
reportTrace({
diff --git a/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx b/apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx
similarity index 85%
rename from apps/evalite-docs/src/content/docs/guides/environment-variables.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx
index 24e15b49..75c5f899 100644
--- a/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/environment-variables.mdx
@@ -30,12 +30,12 @@ To call your LLM from a third-party service, you'll likely need some environment
pnpm add -D dotenv
```
-4. Add an `evalite.config.ts` file:
+4. Add an `evaluhealth.config.ts` file:
```ts
- // evalite.config.ts
+ // evaluhealth.config.ts
- import { defineConfig } from "evalite/config";
+ import { defineConfig } from "evaluhealth/config";
export default defineConfig({
setupFiles: ["dotenv/config"],
diff --git a/apps/evalite-docs/src/content/docs/guides/multi-modal.mdx b/apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx
similarity index 70%
rename from apps/evalite-docs/src/content/docs/guides/multi-modal.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx
index 8f747424..ae3a5d1b 100644
--- a/apps/evalite-docs/src/content/docs/guides/multi-modal.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/multi-modal.mdx
@@ -4,11 +4,11 @@ title: Multi-Modal
import { Aside } from "@astrojs/starlight/components";
-Evalite can handle not just text responses, but media like images, audio, and video.
+Evaluhealth can handle not just text responses, but media like images, audio, and video.
## Files In Memory
-A common way to work with media in Evalite is to read it into memory.
+A common way to work with media in Evaluhealth is to read it into memory.
### What Are Files In Memory?
@@ -36,15 +36,15 @@ It doesn't matter what the file extension is - when you read it into memory, it'
just refer to them as `Uint8Array` objects.
-### Evalite And Files In Memory
+### Evaluhealth And Files In Memory
-Evalite can automatically detect `Uint8Array` objects in your evals and handle them for you.
+Evaluhealth can automatically detect `Uint8Array` objects in your evals and handle them for you.
```ts
-import { evalite } from "evalite";
-import { reportTrace } from "evalite/traces";
+import { evaluhealth } from "evaluhealth";
+import { reportTrace } from "evaluhealth/traces";
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [
{
// 1. In inputs...
@@ -76,21 +76,21 @@ evalite("My Eval", {
});
```
-When Evalite finds a `Uint8Array`, it saves the file to a local cache, in `./node_modules/.evalite/files`.
+When Evaluhealth finds a `Uint8Array`, it saves the file to a local cache, in `./node_modules/.evaluhealth/files`.
Then in the UI, it'll reference that local file.
## Files On Disk
-If you're working with files on disk, you can use the `EvaliteFile.fromPath` method to reference them:
+If you're working with files on disk, you can use the `EvaluhealthFile.fromPath` method to reference them:
```ts
-import { EvaliteFile, evalite } from "evalite";
+import { EvaluhealthFile, evaluhealth } from "evaluhealth";
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [
{
- input: EvaliteFile.fromPath("path/to/file.jpg"),
+ input: EvaluhealthFile.fromPath("path/to/file.jpg"),
},
],
task: async (input) => {
diff --git a/apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx b/apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx
similarity index 76%
rename from apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx
index 53252ea1..7371654b 100644
--- a/apps/evalite-docs/src/content/docs/guides/running-programmatically.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/running-programmatically.mdx
@@ -2,16 +2,16 @@
title: Running Programmatically
---
-You can run Evalite programmatically using the Node API. This is useful when you want to integrate Evalite into your own scripts, CI/CD pipelines, or custom tooling.
+You can run Evaluhealth programmatically using the Node API. This is useful when you want to integrate Evaluhealth into your own scripts, CI/CD pipelines, or custom tooling.
## Basic Usage
-Import the `runEvalite` function from `evalite/runner`:
+Import the `runEvaluhealth` function from `evaluhealth/runner`:
```typescript
-import { runEvalite } from "evalite/runner";
+import { runEvaluhealth } from "evaluhealth/runner";
-await runEvalite({
+await runEvaluhealth({
mode: "run-once-and-exit",
});
```
@@ -25,17 +25,17 @@ That's it! The `path` and `cwd` parameters are optional and default to running a
This mode runs all evals once and exits. It's ideal for CI/CD pipelines:
```typescript
-await runEvalite({
+await runEvaluhealth({
mode: "run-once-and-exit",
});
```
### Watch Mode
-This mode watches for file changes and re-runs evals automatically. It also starts the Evalite UI server:
+This mode watches for file changes and re-runs evals automatically. It also starts the Evaluhealth UI server:
```typescript
-await runEvalite({
+await runEvaluhealth({
mode: "watch-for-file-changes",
});
```
@@ -51,7 +51,7 @@ await runEvalite({
Optional path filter to run specific eval files. Defaults to `undefined` (runs all evals):
```typescript
-await runEvalite({
+await runEvaluhealth({
path: "my-eval.eval.ts",
mode: "run-once-and-exit",
});
@@ -62,7 +62,7 @@ await runEvalite({
The working directory to run evals from. Defaults to `process.cwd()`:
```typescript
-await runEvalite({
+await runEvaluhealth({
cwd: "/path/to/my/project",
mode: "run-once-and-exit",
});
@@ -73,7 +73,7 @@ await runEvalite({
Set a minimum score threshold (0-100). If the average score falls below this threshold, the process will exit with a non-zero exit code:
```typescript
-await runEvalite({
+await runEvaluhealth({
mode: "run-once-and-exit",
scoreThreshold: 80, // Fail if score is below 80
});
@@ -86,7 +86,7 @@ This is particularly useful for CI/CD pipelines where you want to fail the build
Export the results to a JSON file after the run completes:
```typescript
-await runEvalite({
+await runEvaluhealth({
mode: "run-once-and-exit",
outputPath: "./results.json",
});
@@ -99,14 +99,14 @@ The exported JSON file contains the complete run data including all evals, resul
Here's a complete example that combines multiple options:
```typescript
-import { runEvalite } from "evalite/runner";
+import { runEvaluhealth } from "evaluhealth/runner";
async function runEvals() {
try {
- await runEvalite({
+ await runEvaluhealth({
mode: "run-once-and-exit",
scoreThreshold: 75, // Fail if average score < 75
- outputPath: "./evalite-results.json", // Export results
+ outputPath: "./evaluhealth-results.json", // Export results
});
console.log("All evals passed!");
} catch (error) {
diff --git a/apps/evalite-docs/src/content/docs/guides/scorers.mdx b/apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx
similarity index 93%
rename from apps/evalite-docs/src/content/docs/guides/scorers.mdx
rename to apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx
index 5ff16b80..806a2b81 100644
--- a/apps/evalite-docs/src/content/docs/guides/scorers.mdx
+++ b/apps/evaluhealth-docs/src/content/docs/guides/scorers.mdx
@@ -13,9 +13,9 @@ Scorers are used to score the output of your LLM call.
If you don't need your scorer to be reusable, you can define it inline.
```ts
-import { evalite } from "evalite";
+import { evaluhealth } from "evaluhealth";
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [{ input: "Hello" }],
task: async (input) => {
return input + " World!";
@@ -37,7 +37,7 @@ evalite("My Eval", {
If you have a scorer you want to use across multiple files, you can use `createScorer` to create a reusable scorer.
```ts
-import { createScorer } from "evalite";
+import { createScorer } from "evaluhealth";
const containsParis = createScorer({
name: "Contains Paris",
@@ -47,7 +47,7 @@ const containsParis = createScorer({
},
});
-evalite("My Eval", {
+evaluhealth("My Eval", {
data: [{ input: "Hello" }],
task: async (input) => {
return input + " World!";
@@ -56,14 +56,14 @@ evalite("My Eval", {
});
```
-The `name` and `description` of the scorer will be displayed in the Evalite UI.
+The `name` and `description` of the scorer will be displayed in the Evaluhealth UI.
## Score Properties
The `score` function receives three properties on the object passed:
```ts
-import { createScorer } from "evalite";
+import { createScorer } from "evaluhealth";
const containsParis = createScorer({
name: "Contains Paris",
@@ -80,7 +80,7 @@ const containsParis = createScorer({
These are typed using the three type arguments passed to `createScorer`:
```ts
-import { createScorer } from "evalite";
+import { createScorer } from "evaluhealth";
const containsParis = createScorer<
string, // Type of 'input'
@@ -103,7 +103,7 @@ If `expected` is omitted, it will be inferred from the type of `output`.
You can provide metadata along with your custom scorer:
```ts
-import { createScorer } from "evalite";
+import { createScorer } from "evaluhealth";
const containsParis = createScorer({
name: "Contains Paris",
@@ -119,7 +119,7 @@ const containsParis = createScorer({
});
```
-This will be visible along with the score in the Evalite UI.
+This will be visible along with the score in the Evaluhealth UI.