From 4b62f2127fe12182ad3e3b1816f13d710b733062 Mon Sep 17 00:00:00 2001 From: Pedro Fontana Date: Fri, 19 Dec 2025 10:03:35 -0800 Subject: [PATCH 1/4] Modify main_metric_name --- shared/eval/src/harness.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/shared/eval/src/harness.rs b/shared/eval/src/harness.rs index 23b814b69..1d37401d4 100644 --- a/shared/eval/src/harness.rs +++ b/shared/eval/src/harness.rs @@ -35,12 +35,7 @@ const TASKS_WITH_ACC_NORM: [&str; 6] = [ PIQA::name(), ]; -const TASKS_WITH_ACC_UNCOND: [&str; 4] = [ - ArcChallenge::name(), - ArcEasy::name(), - MMLUCF::name(), - PIQA::name(), -]; +const TASKS_WITH_ACC_UNCOND: [&str; 3] = [ArcChallenge::name(), ArcEasy::name(), MMLUCF::name()]; pub enum TaskType { LogLikelihood(Box), @@ -810,7 +805,14 @@ impl PreparedTask { } pub fn main_metric_name(&self) -> &str { - if TASKS_WITH_ACC_NORM.contains(&self.name()) { + let name = self.name(); + if name == ArcChallenge::name() || name == MMLUCF::name() { + "acc_uncond" + } else if name == MMLUPro::name() + || name == Hellaswag::name() + || name == OpenbookQA::name() + || name == PIQA::name() + { "acc_norm" } else { "acc" From 20a326bcf51232a0278f556883c57460b58c49e5 Mon Sep 17 00:00:00 2001 From: Pedro Fontana Date: Fri, 19 Dec 2025 10:04:13 -0800 Subject: [PATCH 2/4] use 5 fewshots in client evals --- shared/client/src/cli.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/shared/client/src/cli.rs b/shared/client/src/cli.rs index 268ea753a..a0bbb4634 100644 --- a/shared/client/src/cli.rs +++ b/shared/client/src/cli.rs @@ -278,12 +278,8 @@ impl TrainArgs { let result: Result> = eval_tasks .split(",") .map(|eval_task| { - let fewshot = match eval_task { - "mmlu_pro" => 5, - _ => 0, - }; tasktype_from_name(eval_task) - .map(|task_type| psyche_eval::Task::new(task_type, fewshot, eval_seed)) + .map(|task_type| psyche_eval::Task::new(task_type, 5, eval_seed)) }) .collect(); result From c2ba13e362bb2d0299f3137875a0a3216c98c1d9 Mon Sep 17 00:00:00 2001 From: Pedro Fontana Date: Mon, 22 Dec 2025 07:26:46 -0800 Subject: [PATCH 3/4] website readme fix --- website/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/README.md b/website/README.md index 76b552011..d971e2bee 100644 --- a/website/README.md +++ b/website/README.md @@ -49,7 +49,7 @@ Optional: ### running with the backend pointed to localnet -1. `cd backend`, `pnpm dev-local` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend. +1. `cd backend`, `pnpm dev-localnet` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend. 2. `cd frontend`, `pnpm dev` in another terminal. ### running with the backend pointing to a non-localnet setup From 59da10433625074bc185bca04f2ef53bbb9153f3 Mon Sep 17 00:00:00 2001 From: Pedro Fontana Date: Mon, 22 Dec 2025 13:34:54 -0300 Subject: [PATCH 4/4] Update README.md --- website/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/README.md b/website/README.md index d971e2bee..5025a9610 100644 --- a/website/README.md +++ b/website/README.md @@ -49,8 +49,8 @@ Optional: ### running with the backend pointed to localnet -1. `cd backend`, `pnpm dev-localnet` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend. -2. `cd frontend`, `pnpm dev` in another terminal. +1. `cd backend`, `pnpm dev-local` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend. +2. `cd frontend`, `pnpm dev-localnet` in another terminal. ### running with the backend pointing to a non-localnet setup