-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathengine_cli.py
More file actions
52 lines (43 loc) · 2.1 KB
/
engine_cli.py
File metadata and controls
52 lines (43 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from __future__ import annotations
import argparse
import json
from distill_similarity_checker_engine import EngineCompareOptions, SimilarityEngine
from preprocess import PreprocessOptions
def main() -> None:
parser = argparse.ArgumentParser(description="Distill Similarity Checker Engine CLI")
parser.add_argument("--file-a", required=True, help="Path to first .txt or .md file")
parser.add_argument("--file-b", required=True, help="Path to second .txt or .md file")
parser.add_argument("--mode", choices=["Strict Style", "Loose Style"], default="Strict Style")
parser.add_argument("--bootstrap-runs", type=int, default=100)
parser.add_argument("--ignore-urls", action="store_true")
parser.add_argument("--ignore-numbers", action="store_true")
parser.add_argument("--ignore-code-blocks", action="store_true")
parser.add_argument("--ignore-blockquotes", action="store_true")
parser.add_argument("--strip-headings", action="store_true")
parser.add_argument("--strip-markdown", action="store_true")
parser.add_argument("--log-jsonl", default="", help="Optional JSONL output path for appending results")
args = parser.parse_args()
preprocess = PreprocessOptions(
ignore_blockquotes=args.ignore_blockquotes,
ignore_code_blocks=args.ignore_code_blocks,
ignore_urls=args.ignore_urls,
ignore_numbers=args.ignore_numbers,
strip_headings=args.strip_headings,
strip_markdown=args.strip_markdown,
)
options = EngineCompareOptions(
mode=args.mode,
bootstrap_runs=args.bootstrap_runs,
preprocess=preprocess,
)
engine = SimilarityEngine(enable_cache=True, cache_size=128)
result = engine.compare_files(args.file_a, args.file_b, options=options)
score = result["final"]["score_1_1000"]
percent = result["final"]["percent"]
print(f"Style similarity: {score}/1000 ({percent:.2f}%)")
print(json.dumps(result, indent=2))
if args.log_jsonl:
engine.log_result(result, args.log_jsonl, append=True)
print(f"Logged result to: {args.log_jsonl}")
if __name__ == "__main__":
main()