-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild.py
More file actions
131 lines (107 loc) · 3.82 KB
/
build.py
File metadata and controls
131 lines (107 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
import json
from pathlib import Path
import subprocess
import yaml
import re
from datetime import datetime
import html # for escaping code content
# Paths
ROOT = Path(__file__).parent.resolve()
POSTS_DIR = ROOT / "posts"
TEMPLATE_FILE = ROOT / "src" / "html" / "tmpl.html"
META_FILE = ROOT / "meta.json"
# Regex for fenced code blocks (GitHub style: ```lang ... ```)
CODE_BLOCK_RE = re.compile(r"```([^\s`]+)?\n(.*?)```", re.S)
def md_to_html(md_path: Path):
# Read markdown
with open(md_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract frontmatter
fm_match = re.match(r"^---\n(.*?)\n---\n(.*)$", content, re.S)
if not fm_match:
raise ValueError(f"No valid frontmatter in {md_path}")
fm_raw, md_body = fm_match.groups()
fm = yaml.safe_load(fm_raw)
# Normalize date
if "date" in fm:
if isinstance(fm["date"], datetime):
fm["date"] = fm["date"].strftime("%Y-%m-%d")
else:
try:
fm["date"] = datetime.fromisoformat(str(fm["date"])).strftime("%Y-%m-%d")
except Exception:
fm["date"] = str(fm["date"])
# Slug from filename
slug = md_path.stem
# Extract code blocks and replace with placeholders
code_blocks = []
def repl(m):
lang = m.group(1) or ""
code = m.group(2)
placeholder = f"[[[CODEBLOCK{len(code_blocks)}]]]"
# Escape special chars in code
code_escaped = html.escape(code)
wrapped = f'<pre><code class="language-{lang} toolbar">{code_escaped}</code></pre>'
code_blocks.append((placeholder, wrapped))
return placeholder
md_body_safe = CODE_BLOCK_RE.sub(repl, md_body)
# Convert rest of markdown body with pandoc
html_body = subprocess.check_output(
[
"pandoc",
"--toc",
"--template=toc.html",
"--from", "gfm-tex_math_dollars",
"--to", "html",
],
input=md_body_safe.encode("utf-8")
).decode("utf-8")
# Restore code blocks
for placeholder, wrapped in code_blocks:
html_body = html_body.replace(placeholder, wrapped)
# Insert into template
with open(TEMPLATE_FILE, "r", encoding="utf-8") as tf:
template = tf.read()
final_html = template.replace("{{{content}}}", html_body).replace("{{{title}}}", fm["title"])
return final_html, fm, slug
def build():
# Load existing meta.json or empty list
if META_FILE.exists():
with open(META_FILE, "r", encoding="utf-8") as f:
metadata = []
pass
#metadata = json.load(f)
else:
metadata = []
# Iterate over all markdown files in posts/*.md
cnt = 0;
for md_file in POSTS_DIR.glob("*.md"):
final_html, fm, slug = md_to_html(md_file)
# Determine category
category = fm["category"]
category_dir = ROOT / category
category_dir.mkdir(exist_ok=True) # dynamically create if new
out_path = category_dir / f"{slug}.html"
# Write HTML file
with open(out_path, "w", encoding="utf-8") as out:
cnt += 1
out.write(final_html)
# Create meta entry
meta_entry = {
"date": fm.get("date", ""),
"title": fm.get("title", slug),
"slug": slug,
"tags": fm.get("tags", []),
"category": category
}
# Remove existing entry with same slug
metadata = [m for m in metadata if m["slug"] != slug]
# Insert new entry at front
metadata.append(meta_entry)
metadata.sort(key=lambda m: m.get("date", ""), reverse=True)
# Save updated meta.json
with open(META_FILE, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
if __name__ == "__main__":
build()