Skip to content

Commit 7162a39

Browse files
committed
feat: fetch task definitions from HF repo (workspace.yaml + tests/), remove auto_install hack
/evaluate now downloads workspace.yaml with install_config and test scripts from the HF repo tasks/ directory instead of using parquet rows with auto-generated install commands.
1 parent 2963325 commit 7162a39

File tree

3 files changed

+153
-67
lines changed

3 files changed

+153
-67
lines changed

src/handlers.rs

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,7 @@ async fn evaluate_with_stored_agent(
11501150
.get("task_ids")
11511151
.and_then(|v| serde_json::from_value(v.clone()).ok())
11521152
.unwrap_or_default();
1153-
let split = body
1153+
let _split = body
11541154
.get("split")
11551155
.and_then(|v| v.as_str())
11561156
.unwrap_or("train")
@@ -1174,61 +1174,54 @@ async fn evaluate_with_stored_agent(
11741174
})?
11751175
};
11761176

1177-
// Fetch dataset from HuggingFace
1177+
// Download task files from HF repo (workspace.yaml, tests/*.sh, etc.)
11781178
let hf_client = crate::swe_forge::client::HuggingFaceClient::new().map_err(|e| {
11791179
(
11801180
StatusCode::INTERNAL_SERVER_ERROR,
11811181
Json(serde_json::json!({"error": format!("HF client error: {}", e)})),
11821182
)
11831183
})?;
11841184

1185-
let dataset_config = crate::swe_forge::types::DatasetConfig {
1186-
dataset_id: "CortexLM/swe-forge".to_string(),
1187-
split,
1188-
limit: 100,
1189-
offset: 0,
1190-
};
1185+
let dataset_id = "CortexLM/swe-forge";
1186+
let tasks_base = state.config.workspace_base.join("_hf_tasks");
1187+
let _ = tokio::fs::remove_dir_all(&tasks_base).await;
11911188

1192-
let dataset = hf_client
1193-
.fetch_dataset(&dataset_config)
1194-
.await
1195-
.map_err(|e| {
1196-
(
1197-
StatusCode::BAD_GATEWAY,
1198-
Json(serde_json::json!({"error": format!("Failed to fetch HF dataset: {}", e)})),
1199-
)
1200-
})?;
1189+
let mut hf_tasks: Vec<crate::task::SweForgeTask> = Vec::new();
1190+
let mut errors: Vec<String> = Vec::new();
12011191

1202-
let matched: Vec<&crate::swe_forge::types::DatasetEntry> = dataset
1203-
.entries
1204-
.iter()
1205-
.filter(|e| task_ids.contains(&e.instance_id))
1206-
.collect();
1192+
for task_id in &task_ids {
1193+
let task_dir = tasks_base.join(task_id.replace('/', "__"));
1194+
match hf_client
1195+
.download_task_files(dataset_id, task_id, &task_dir)
1196+
.await
1197+
{
1198+
Ok(()) => match crate::task::parse_task(&task_dir) {
1199+
Ok(mut task) => {
1200+
task.id = task_id.clone();
1201+
hf_tasks.push(task);
1202+
}
1203+
Err(e) => {
1204+
tracing::warn!("Failed to parse task {}: {}", task_id, e);
1205+
errors.push(format!("{}: parse error: {}", task_id, e));
1206+
}
1207+
},
1208+
Err(e) => {
1209+
tracing::warn!("Failed to download task {}: {}", task_id, e);
1210+
errors.push(format!("{}: download error: {}", task_id, e));
1211+
}
1212+
}
1213+
}
12071214

1208-
if matched.is_empty() {
1215+
if hf_tasks.is_empty() {
12091216
return Err((
12101217
StatusCode::NOT_FOUND,
1211-
Json(
1212-
serde_json::json!({"error": "No matching tasks found", "available": dataset.entries.len()}),
1213-
),
1218+
Json(serde_json::json!({
1219+
"error": "No valid tasks found",
1220+
"details": errors,
1221+
})),
12141222
));
12151223
}
12161224

1217-
// Build tasks from HF
1218-
let mut registry = crate::task::registry::TaskRegistry::new();
1219-
let hf_dataset = crate::swe_forge::types::HuggingFaceDataset {
1220-
dataset_id: dataset.dataset_id.clone(),
1221-
split: dataset.split.clone(),
1222-
entries: matched.into_iter().cloned().collect(),
1223-
total_count: dataset.total_count,
1224-
};
1225-
registry.load_from_huggingface(&hf_dataset).map_err(|e| {
1226-
(
1227-
StatusCode::INTERNAL_SERVER_ERROR,
1228-
Json(serde_json::json!({"error": format!("Failed to load tasks: {}", e)})),
1229-
)
1230-
})?;
1231-
12321225
// Extract agent code only (no tasks/ required - we use HF tasks)
12331226
let extract_dir = state.config.workspace_base.join("_extract_evaluate");
12341227
let _ = tokio::fs::remove_dir_all(&extract_dir).await;
@@ -1243,7 +1236,6 @@ async fn evaluate_with_stored_agent(
12431236
})?;
12441237
let _ = tokio::fs::remove_dir_all(&extract_dir).await;
12451238

1246-
let hf_tasks: Vec<crate::task::SweForgeTask> = registry.get_tasks().to_vec();
12471239
let final_archive = crate::task::ExtractedArchive {
12481240
tasks: hf_tasks,
12491241
agent_code,

src/swe_forge/client.rs

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use anyhow::{Context, Result};
2-
use tracing::{debug, info};
2+
use std::path::Path;
3+
use tracing::{debug, info, warn};
34

45
use super::types::{DatasetConfig, DatasetEntry, HfRowsResponse, HuggingFaceDataset};
56

67
const HF_DATASET_VIEWER_BASE: &str = "https://datasets-server.huggingface.co/rows";
8+
const HF_REPO_BASE: &str = "https://huggingface.co";
79
const DEFAULT_TIMEOUT_SECS: u64 = 30;
810
const MAX_PAGE_SIZE: usize = 100;
911

@@ -97,6 +99,121 @@ impl HuggingFaceClient {
9799
))
98100
}
99101

102+
/// Download all task files for a given instance_id from the HF repo into a local directory.
103+
/// The directory will have workspace.yaml, prompt.md, tests/*.sh, etc.
104+
pub async fn download_task_files(
105+
&self,
106+
dataset_id: &str,
107+
instance_id: &str,
108+
dest_dir: &Path,
109+
) -> Result<()> {
110+
let tree_url = format!(
111+
"{}/api/datasets/{}/tree/main/tasks/{}",
112+
HF_REPO_BASE, dataset_id, instance_id
113+
);
114+
info!("Listing HF task files: {}", tree_url);
115+
116+
// List all files (including subdirectories)
117+
let files = self.list_tree_recursive(dataset_id, instance_id).await?;
118+
119+
if files.is_empty() {
120+
anyhow::bail!(
121+
"No files found for task {} in dataset {}",
122+
instance_id,
123+
dataset_id
124+
);
125+
}
126+
127+
tokio::fs::create_dir_all(dest_dir).await?;
128+
129+
for file_path in &files {
130+
let relative = file_path
131+
.strip_prefix(&format!("tasks/{}/", instance_id))
132+
.unwrap_or(file_path);
133+
let local_path = dest_dir.join(relative);
134+
135+
if let Some(parent) = local_path.parent() {
136+
tokio::fs::create_dir_all(parent).await?;
137+
}
138+
139+
let download_url = format!(
140+
"{}/datasets/{}/resolve/main/{}",
141+
HF_REPO_BASE, dataset_id, file_path
142+
);
143+
debug!("Downloading {} -> {}", download_url, local_path.display());
144+
145+
let resp = self
146+
.client
147+
.get(&download_url)
148+
.send()
149+
.await
150+
.with_context(|| format!("Failed to download {}", download_url))?;
151+
152+
if !resp.status().is_success() {
153+
warn!("Failed to download {}: HTTP {}", file_path, resp.status());
154+
continue;
155+
}
156+
157+
let bytes = resp.bytes().await?;
158+
tokio::fs::write(&local_path, &bytes).await?;
159+
}
160+
161+
info!(
162+
"Downloaded {} files for task {} to {}",
163+
files.len(),
164+
instance_id,
165+
dest_dir.display()
166+
);
167+
Ok(())
168+
}
169+
170+
async fn list_tree_recursive(
171+
&self,
172+
dataset_id: &str,
173+
instance_id: &str,
174+
) -> Result<Vec<String>> {
175+
let mut all_files = Vec::new();
176+
let mut dirs_to_visit = vec![format!("tasks/{}", instance_id)];
177+
178+
while let Some(dir_path) = dirs_to_visit.pop() {
179+
let url = format!(
180+
"{}/api/datasets/{}/tree/main/{}",
181+
HF_REPO_BASE, dataset_id, dir_path
182+
);
183+
let resp = self
184+
.client
185+
.get(&url)
186+
.send()
187+
.await
188+
.with_context(|| format!("Failed to list {}", url))?;
189+
190+
if !resp.status().is_success() {
191+
warn!(
192+
"Failed to list directory {}: HTTP {}",
193+
dir_path,
194+
resp.status()
195+
);
196+
continue;
197+
}
198+
199+
let entries: Vec<serde_json::Value> = resp.json().await?;
200+
for entry in entries {
201+
let entry_type = entry["type"].as_str().unwrap_or("");
202+
let entry_path = entry["path"].as_str().unwrap_or("");
203+
if entry_path.is_empty() {
204+
continue;
205+
}
206+
match entry_type {
207+
"file" => all_files.push(entry_path.to_string()),
208+
"directory" => dirs_to_visit.push(entry_path.to_string()),
209+
_ => {}
210+
}
211+
}
212+
}
213+
214+
Ok(all_files)
215+
}
216+
100217
async fn fetch_page(
101218
&self,
102219
dataset_id: &str,

src/task/registry.rs

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -131,27 +131,6 @@ fn parse_test_list(raw: &str) -> Vec<String> {
131131
.collect()
132132
}
133133

134-
fn auto_install_commands(language: &str) -> Option<Vec<String>> {
135-
match language {
136-
"python" => Some(vec![
137-
"pip install -e '.[dev,test]' 2>/dev/null || pip install -e . 2>/dev/null || true".to_string(),
138-
"pip install pytest 2>/dev/null || true".to_string(),
139-
]),
140-
"javascript" | "typescript" => Some(vec![
141-
"npm install --legacy-peer-deps 2>/dev/null || yarn install --frozen-lockfile 2>/dev/null || true".to_string(),
142-
]),
143-
"go" => Some(vec![
144-
"go mod download 2>/dev/null || true".to_string(),
145-
]),
146-
"rust" => Some(vec![
147-
"cargo fetch 2>/dev/null || true".to_string(),
148-
]),
149-
_ => Some(vec![
150-
"pip install -e . 2>/dev/null || npm install 2>/dev/null || true".to_string(),
151-
]),
152-
}
153-
}
154-
155134
fn convert_dataset_entry_to_task(entry: &DatasetEntry) -> Result<SweForgeTask> {
156135
let repo_url = build_repo_url(&entry.repo);
157136
let language = entry
@@ -168,13 +147,11 @@ fn convert_dataset_entry_to_task(entry: &DatasetEntry) -> Result<SweForgeTask> {
168147
.as_deref()
169148
.and_then(|s| serde_json::from_str(s).ok());
170149

171-
let install = auto_install_commands(&language);
172-
173150
let workspace = WorkspaceConfig {
174151
repo: repo_url,
175152
version: entry.version.clone().unwrap_or_default(),
176153
base_commit: Some(entry.base_commit.clone()),
177-
install,
154+
install: None,
178155
language: Some(language),
179156
fail_to_pass: f2p,
180157
pass_to_pass: p2p,

0 commit comments

Comments
 (0)