forked from jgoldin-skillz/confluenceDumpWithPython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpatch_sidebar.py
More file actions
236 lines (187 loc) · 7.92 KB
/
patch_sidebar.py
File metadata and controls
236 lines (187 loc) · 7.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Confluence Sidebar Patcher (Site-Dir Aware)
-------------------------------------------
Applies the structure from 'sidebar.md' (or 'sidebar_edit.md') to all HTML files.
Supports automatic root-unwrapping for Space exports via magic comments.
Usage:
python3 patch_sidebar.py --site-dir "./output/TIMESTAMP Space X"
"""
import os
import sys
import argparse
import re
import copy
from urllib.parse import unquote, urlparse
from bs4 import BeautifulSoup, Comment
class Node:
def __init__(self, title, page_id=None, level=0):
self.title = title
self.page_id = page_id
self.level = level
self.children = []
def parse_markdown_to_tree(md_content):
"""
Parses markdown into a Node tree.
Returns the root Node and a detected config dictionary.
"""
lines = md_content.splitlines()
root = Node("root", level=-1)
stack = [root]
config = {'mode': 'default'}
link_pattern = re.compile(r'\[(.*?)\]\((.*?)\)')
config_pattern = re.compile(r'<!--\s*(.*?)\s*-->')
for line in lines:
stripped = line.strip()
# Check for config comments (e.g. <!-- mode: space -->)
if stripped.startswith('<!--'):
match = config_pattern.search(stripped)
if match:
content = match.group(1)
if 'mode:' in content:
key, val = content.split(':', 1)
config[key.strip()] = val.strip()
continue
if not stripped or not stripped.startswith('-'): continue
raw_indent = line[:line.find('-')]
# level calculation: 2 spaces or 1 tab = 1 level step
level = raw_indent.count('\t') + (raw_indent.count(' ') // 2)
content = stripped[1:].strip()
match = link_pattern.search(content)
if match:
title = match.group(1)
href = match.group(2)
try:
path = unquote(urlparse(href).path)
filename = os.path.basename(path)
page_id = os.path.splitext(filename)[0]
except:
page_id = None
else:
title = content
page_id = None
node = Node(title, page_id, level)
# Adjust stack based on level
while len(stack) > 1 and stack[-1].level >= level:
stack.pop()
stack[-1].children.append(node)
stack.append(node)
return root, config
def render_tree_to_html(root):
""" Renders the Node tree to the HTML structure required by the CSS. """
def render_node(n):
if not n.children:
# Leaf
if n.page_id:
link = f'<a href="{n.page_id}.html">{n.title}</a>'
return f'<li class="leaf">{link}</li>'
else:
return f'<li class="leaf"><span>{n.title}</span></li>'
else:
# Folder
inner_html = "".join([render_node(c) for c in n.children])
if n.page_id:
link = f'<a href="{n.page_id}.html">{n.title}</a>'
# Use details/summary for collapsible folder
return f'<li class="folder"><details><summary>{link}</summary><ul>{inner_html}</ul></details></li>'
else:
return f'<li class="folder"><details><summary>{n.title}</summary><ul>{inner_html}</ul></details></li>'
sidebar_content = "<ul>" + "".join([render_node(c) for c in root.children]) + "</ul>"
return f'<div class="sidebar-tree">{sidebar_content}</div>'
def apply_active_state(sidebar_soup, current_page_id):
""" Sets 'active-page' class and expands details for the current path. """
for tag in sidebar_soup.find_all(attrs={"class": "active-page"}):
tag['class'].remove("active-page")
for tag in sidebar_soup.find_all('details', attrs={"open": True}):
del tag['open']
target_href = f"{current_page_id}.html"
active_link = sidebar_soup.find('a', href=target_href)
if active_link:
active_link['class'] = active_link.get('class', []) + ['active-page']
parent = active_link.parent
while parent:
if parent.name == 'details':
parent['open'] = ''
parent = parent.parent
return sidebar_soup
def patch_page(file_path, sidebar_soup_template):
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
soup = BeautifulSoup(content, 'html.parser')
meta_id = soup.find('meta', attrs={'name': 'confluence-page-id'})
if meta_id and meta_id.get('content'):
page_id = meta_id['content']
else:
page_id = os.path.splitext(os.path.basename(file_path))[0]
aside = soup.find('aside', id='sidebar')
if not aside: return False
sidebar_instance = copy.copy(sidebar_soup_template)
apply_active_state(sidebar_instance, page_id)
aside.clear()
aside.append(Comment(" CONFLUENCE-SIDEBAR-START "))
if sidebar_instance.body:
for child in list(sidebar_instance.body.children): aside.append(child)
else:
for child in list(sidebar_instance.children): aside.append(child)
aside.append(Comment(" CONFLUENCE-SIDEBAR-END "))
with open(file_path, 'w', encoding='utf-8') as f:
f.write(str(soup))
return True
except Exception as e:
print(f"Error processing {file_path}: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="Patch Sidebar")
parser.add_argument('--site-dir', required=True, help="Base directory of the dump")
parser.add_argument('--restore-original', action='store_true', help="Use sidebar_orig.md")
parser.add_argument('--unwrap', action='store_true', help="Force unwrap of root node (promote children)")
args = parser.parse_args()
pages_dir = os.path.join(args.site_dir, "pages")
if args.restore_original:
source_path = os.path.join(args.site_dir, "sidebar_orig.md")
print("Mode: Restoring Original Sidebar")
else:
edit_path = os.path.join(args.site_dir, "sidebar_edit.md")
orig_path = os.path.join(args.site_dir, "sidebar.md")
if os.path.exists(edit_path):
source_path = edit_path
print("Mode: Applying Edited Sidebar (sidebar_edit.md)")
else:
source_path = orig_path
print("Mode: Applying Standard Sidebar (sidebar.md)")
if not os.path.exists(pages_dir):
print(f"Error: Pages dir not found: {pages_dir}")
sys.exit(1)
if not os.path.exists(source_path):
print(f"Error: Source file not found: {source_path}")
sys.exit(1)
print(f"Reading source: {source_path}")
with open(source_path, 'r', encoding='utf-8') as f:
# 1. Parse
root, config = parse_markdown_to_tree(f.read())
# 2. Logic: Should we unwrap the root?
# Condition: (Flag set OR Magic Comment 'mode: space') AND (Root has exactly 1 child)
should_unwrap = args.unwrap or (config.get('mode') == 'space')
if should_unwrap:
if len(root.children) == 1:
print(f"Unwrapping root node: '{root.children[0].title}' -> Children promoted to top level.")
root = root.children[0]
# We don't change root.level here because render starts inside root.children
else:
print("Unwrap requested but root has multiple (or zero) children. Skipping unwrap.")
# 3. Render
html_content = render_tree_to_html(root)
sidebar_template = BeautifulSoup(html_content, 'html.parser')
files = [f for f in os.listdir(pages_dir) if f.endswith('.html')]
total = len(files)
count = 0
print(f"Patching {total} files...")
for filename in files:
if patch_page(os.path.join(pages_dir, filename), sidebar_template):
count += 1
if count % 100 == 0: print(f" {count}...")
print(f"Done. Patched {count}/{total} files.")
if __name__ == '__main__':
main()