-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgit_diff_parse.py
More file actions
117 lines (94 loc) · 3.08 KB
/
git_diff_parse.py
File metadata and controls
117 lines (94 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
"""
Show a JSON-formatted diff of a file comparing the working tree vs the last commit.
Usage:
python git_diff_parse.py <file_path>
Example:
python git_diff_parse.py dbt_code/models/marts/marketing/marketing_channel_attribution.sql
"""
import difflib
import json
import subprocess
import sys
from pathlib import Path
def get_committed_content(file_path: str) -> str | None:
"""Get the file content from the last git commit (HEAD)."""
result = subprocess.run(
["git", "show", f"HEAD:{file_path}"],
capture_output=True, text=True,
)
if result.returncode != 0:
return None
return result.stdout
def get_working_content(file_path: str) -> str | None:
"""Get the file content from the working tree."""
p = Path(file_path)
if not p.exists():
return None
return p.read_text()
def build_diff_json(file_path: str, old_text: str, new_text: str) -> dict:
"""Build a structured JSON diff using difflib."""
old_lines = old_text.splitlines()
new_lines = new_text.splitlines()
changes = []
for tag, i1, i2, j1, j2 in difflib.SequenceMatcher(
None, old_lines, new_lines
).get_opcodes():
if tag == "equal":
continue
entry = {"type": tag}
if tag == "replace":
entry["old_lines"] = {
"start": i1 + 1,
"end": i2,
"content": old_lines[i1:i2],
}
entry["new_lines"] = {
"start": j1 + 1,
"end": j2,
"content": new_lines[j1:j2],
}
elif tag == "delete":
entry["old_lines"] = {
"start": i1 + 1,
"end": i2,
"content": old_lines[i1:i2],
}
elif tag == "insert":
entry["new_lines"] = {
"start": j1 + 1,
"end": j2,
"content": new_lines[j1:j2],
}
changes.append(entry)
return {
"file": file_path,
"old_total_lines": len(old_lines),
"new_total_lines": len(new_lines),
"total_changes": len(changes),
"changes": changes,
}
def main():
if len(sys.argv) < 2:
print("Usage: python git_diff_parse.py <file_path>")
print(
"Example: python git_diff_parse.py "
"dbt_code/models/marts/marketing/marketing_channel_attribution.sql"
)
sys.exit(1)
file_path = sys.argv[1]
old_text = get_committed_content(file_path)
if old_text is None:
print(json.dumps({"error": f"File not found in HEAD commit: {file_path}"}))
sys.exit(1)
new_text = get_working_content(file_path)
if new_text is None:
print(json.dumps({"error": f"File not found on disk: {file_path}"}))
sys.exit(1)
if old_text == new_text:
print(json.dumps({"file": file_path, "total_changes": 0, "changes": []}))
sys.exit(0)
result = build_diff_json(file_path, old_text, new_text)
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()