-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
59 lines (49 loc) · 1.92 KB
/
main.py
File metadata and controls
59 lines (49 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import yaml
import json
from pathlib import Path
from glob import glob
from core import discover_test_files, normalize_file, parse_test_file
def load_config(path="config.yaml"):
with open(path, "r") as f:
return yaml.safe_load(f)
def expand_roots(repo_root, sub_roots):
"""把相对路径和 glob 展开为绝对路径"""
expanded = []
for sub in sub_roots:
full_pattern = str(Path(repo_root) / sub)
for match in glob(full_pattern, recursive=True):
if Path(match).is_dir():
expanded.append(match)
return expanded
def run_pipeline(config):
Path("data").mkdir(exist_ok=True)
for repo_key in ["tf", "pt"]:
repo_root = config["repos"][repo_key]
sub_roots = config["test_roots"][repo_key]
root_dirs = expand_roots(repo_root, sub_roots)
include = config["include_globs"]
exclude = config["exclude_globs"]
# Step 1: Discover
files = discover_test_files(root_dirs, include, exclude)
Path("data/parsing").mkdir(parents=True, exist_ok=True)
with open(f"data/parsing/files_{repo_key}.jsonl", "w") as f:
for item in files:
f.write(json.dumps(item) + "\n")
# Step 2: Normalize
normalized = [normalize_file(item) for item in files]
with open(f"data/parsing/norm_{repo_key}.jsonl", "w") as f:
for item in normalized:
f.write(json.dumps(item) + "\n")
# Step 3: Parse tests
results = []
for item in normalized:
tests = parse_test_file(item["abs_path"])
for t in tests:
t["file"] = item["rel_path"]
results.append(t)
with open(f"data/parsing/tests_{repo_key}.parsed.jsonl", "w") as f:
for t in results:
f.write(json.dumps(t) + "\n")
if __name__ == "__main__":
config = load_config("config.yaml")
run_pipeline(config)