Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions shared/client/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,8 @@ impl TrainArgs {
let result: Result<Vec<psyche_eval::Task>> = eval_tasks
.split(",")
.map(|eval_task| {
let fewshot = match eval_task {
"mmlu_pro" => 5,
_ => 0,
};
tasktype_from_name(eval_task)
.map(|task_type| psyche_eval::Task::new(task_type, fewshot, eval_seed))
.map(|task_type| psyche_eval::Task::new(task_type, 5, eval_seed))
})
.collect();
result
Expand Down
16 changes: 9 additions & 7 deletions shared/eval/src/harness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,7 @@ const TASKS_WITH_ACC_NORM: [&str; 6] = [
PIQA::name(),
];

const TASKS_WITH_ACC_UNCOND: [&str; 4] = [
ArcChallenge::name(),
ArcEasy::name(),
MMLUCF::name(),
PIQA::name(),
];
const TASKS_WITH_ACC_UNCOND: [&str; 3] = [ArcChallenge::name(), ArcEasy::name(), MMLUCF::name()];

pub enum TaskType {
LogLikelihood(Box<dyn LogLikelihoodTask>),
Expand Down Expand Up @@ -827,7 +822,14 @@ impl PreparedTask {
}

pub fn main_metric_name(&self) -> &str {
if TASKS_WITH_ACC_NORM.contains(&self.name()) {
let name = self.name();
if name == ArcChallenge::name() || name == MMLUCF::name() {
"acc_uncond"
} else if name == MMLUPro::name()
|| name == Hellaswag::name()
|| name == OpenbookQA::name()
|| name == PIQA::name()
{
"acc_norm"
} else {
"acc"
Expand Down
2 changes: 1 addition & 1 deletion website/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Optional:
### running with the backend pointed to localnet

1. `cd backend`, `pnpm dev-local` in another terminal. This will build the WASM for deserializing the onchain state, build the IDL for interacting with the contracts, and start the backend.
2. `cd frontend`, `pnpm dev` in another terminal.
2. `cd frontend`, `pnpm dev-localnet` in another terminal.

### running with the backend pointing to a non-localnet setup

Expand Down