diff --git a/shared/client/src/cli.rs b/shared/client/src/cli.rs index 268ea753a..a0bbb4634 100644 --- a/shared/client/src/cli.rs +++ b/shared/client/src/cli.rs @@ -278,12 +278,8 @@ impl TrainArgs { let result: Result> = eval_tasks .split(",") .map(|eval_task| { - let fewshot = match eval_task { - "mmlu_pro" => 5, - _ => 0, - }; tasktype_from_name(eval_task) - .map(|task_type| psyche_eval::Task::new(task_type, fewshot, eval_seed)) + .map(|task_type| psyche_eval::Task::new(task_type, 5, eval_seed)) }) .collect(); result diff --git a/shared/eval/src/harness.rs b/shared/eval/src/harness.rs index 836cdd728..987ed29fc 100644 --- a/shared/eval/src/harness.rs +++ b/shared/eval/src/harness.rs @@ -35,12 +35,7 @@ const TASKS_WITH_ACC_NORM: [&str; 6] = [ PIQA::name(), ]; -const TASKS_WITH_ACC_UNCOND: [&str; 4] = [ - ArcChallenge::name(), - ArcEasy::name(), - MMLUCF::name(), - PIQA::name(), -]; +const TASKS_WITH_ACC_UNCOND: [&str; 3] = [ArcChallenge::name(), ArcEasy::name(), MMLUCF::name()]; pub enum TaskType { LogLikelihood(Box), @@ -827,7 +822,14 @@ impl PreparedTask { } pub fn main_metric_name(&self) -> &str { - if TASKS_WITH_ACC_NORM.contains(&self.name()) { + let name = self.name(); + if name == ArcChallenge::name() || name == MMLUCF::name() { + "acc_uncond" + } else if name == MMLUPro::name() + || name == Hellaswag::name() + || name == OpenbookQA::name() + || name == PIQA::name() + { "acc_norm" } else { "acc" diff --git a/website/README.md b/website/README.md index 50bbd5615..6c02128f6 100644 --- a/website/README.md +++ b/website/README.md @@ -50,7 +50,7 @@ Optional: ### running with the backend pointed to localnet 1. `cd backend`, `pnpm dev-local` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend. -2. `cd frontend`, `pnpm dev` in another terminal. +2. `cd frontend`, `pnpm dev-localnet` in another terminal. ### running with the backend pointing to a non-localnet setup