diff --git a/README.md b/README.md
index f32e1d2..a4904e7 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,12 @@ HTML-filer kan publiceras via:
- **Cloudflare R2**: Med `html-export-workflow.yml` (kräver R2-credentials)
- **GitHub Pages**: Med `github-pages-workflow.yml` (enklare setup, kräver aktiverad GitHub Pages)
+För att testa HTML-sajten lokalt:
+```bash
+python scripts/serve_html.py
+```
+Detta startar en lokal HTTP-server på `http://localhost:8000`. Du kan ange en annan port med `python scripts/serve_html.py 3000`.
+
### Vektor-format (för semantisk sökning)
- **`vector`**: Konverterar författningar till vektorembeddings för semantisk sökning och RAG-applikationer. Använder OpenAI:s text-embedding-3-large modell (3072 dimensioner) och stödjer lagring i PostgreSQL (pgvector), Elasticsearch eller JSON-fil.
diff --git a/README_EN.md b/README_EN.md
index 09c7ab7..8b31635 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -50,6 +50,12 @@ HTML files can be published via:
- **Cloudflare R2**: Using `html-export-workflow.yml` (requires R2 credentials)
- **GitHub Pages**: Using `github-pages-workflow.yml` (simpler setup, requires GitHub Pages enabled)
+To test the HTML site locally:
+```bash
+python scripts/serve_html.py
+```
+This starts a local HTTP server on `http://localhost:8000`. You can specify a different port with `python scripts/serve_html.py 3000`.
+
### Vector Format (for semantic search)
- **`vector`**: Converts legislation to vector embeddings for semantic search and RAG applications. Uses OpenAI's text-embedding-3-large model (3072 dimensions) and supports storage in PostgreSQL (pgvector), Elasticsearch, or JSON file.
diff --git a/exporters/html/html_export.py b/exporters/html/html_export.py
index cc88b38..41470a5 100644
--- a/exporters/html/html_export.py
+++ b/exporters/html/html_export.py
@@ -348,19 +348,25 @@ def convert_to_html(data: Dict[str, Any], apply_amendments: bool = False, up_to_
def make_links_relative(html_content: str) -> str:
"""
Strip base URL from links to make them relative for HTML export.
-
- Removes https://selex.se/eli from links to make them relative.
-
+
+ Removes base domain (e.g., https://selex.se/eli) from links to make them relative.
+ This works with to resolve paths correctly.
+
Args:
html_content (str): HTML content with potentially absolute links
-
+
Returns:
str: HTML content with relative links
"""
- # Pattern to match https://selex.se/eli in href attributes
- pattern = r'href="https://selex\.se/eli(/[^"]*)"'
+ # Get base URL from environment (with /eli/)
+ base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')
+
+ # Pattern to match base_url in href attributes
+ # Escape special regex characters in base_url
+ escaped_base = re.escape(base_url)
+ pattern = rf'href="{escaped_base}(/[^"]*)"'
replacement = r'href="\1"'
-
+
return re.sub(pattern, replacement, html_content)
@@ -651,6 +657,7 @@ def create_html_head(title: str, beteckning: str, additional_styles: str = "", a
head = f"""
+
{html.escape(title)}{eli_metadata}
{navbar_script}
{base_styles}
diff --git a/formatters/apply_links.py b/formatters/apply_links.py
index d6e1967..153de0f 100644
--- a/formatters/apply_links.py
+++ b/formatters/apply_links.py
@@ -53,7 +53,7 @@ def apply_sfs_links(text: str) -> str:
# Matchar mönster som "2002:43", "1970:485", etc.
sfs_pattern = SFS_PATTERN
- # Använd alltid https://selex.se/eli som default om env variabel inte finns
+ # Använd base URL med /eli/ prefix (default: https://selex.se/eli)
base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')
# TODO: Slå upp SFS-beteckning mot JSON-fil för att verifiera giltighet
@@ -238,9 +238,9 @@ def apply_law_name_links(text: str) -> str:
if not law_names_data:
return text
- # Hämta bas-URL från miljövariabler - använd alltid https://selex.se/eli som default
+ # Hämta bas-URL från miljövariabler (med /eli/ prefix)
base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')
-
+
# Processar texten rad för rad för att undvika att länka rubriker
lines = text.split('\n')
processed_lines = []
@@ -257,22 +257,22 @@ def replace_law_name_reference(match):
paragraph_part = match.group(2).strip()
law_name = match.group(3).lower()
full_match = match.group(0)
-
+
# Leta upp lagnamnet i data
sfs_id = _lookup_law_name(law_name, law_names_data)
-
+
if not sfs_id:
print(f"Varning: Okänt lagnamn '{law_name}' i referens '{full_match}'")
return full_match # Returnera oförändrat om lagnamnet inte hittas
-
+
# Extrahera år och nummer från SFS-ID (format: "YYYY:NNN")
id_parts = sfs_id.split(':')
if len(id_parts) != 2:
print(f"Varning: Ogiltigt SFS-ID format '{sfs_id}' för lagnamn '{law_name}'")
return full_match
-
+
year, number = id_parts
-
+
# Skapa bas-URL
url = f"{base_url}/sfs/{year}/{number}"
diff --git a/scripts/serve_html.py b/scripts/serve_html.py
new file mode 100755
index 0000000..4245de9
--- /dev/null
+++ b/scripts/serve_html.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""
+Simple HTTP server to serve the HTML site locally for testing.
+Run with: python serve_html.py [port]
+Default port is 8000.
+"""
+
+import http.server
+import socketserver
+import sys
+import os
+from pathlib import Path
+
+def main():
+ # Default port
+ PORT = 8000
+
+ # Allow custom port from command line
+ if len(sys.argv) > 1:
+ try:
+ PORT = int(sys.argv[1])
+ except ValueError:
+ print(f"Error: Invalid port number '{sys.argv[1]}'")
+ sys.exit(1)
+
+ # Change to the HTML site directory (go up to project root first)
+ html_dir = Path(__file__).parent.parent / "output" / "html_site"
+
+ if not html_dir.exists():
+ print(f"Error: HTML site directory not found at {html_dir}")
+ print("Run the HTML export first with:")
+ print(" python sfs_processor.py --formats html")
+ sys.exit(1)
+
+ os.chdir(html_dir)
+
+ # Create server
+ Handler = http.server.SimpleHTTPRequestHandler
+
+ with socketserver.TCPServer(("", PORT), Handler) as httpd:
+ print(f"Serving HTML site at http://localhost:{PORT}")
+ print(f"Directory: {html_dir}")
+ print("Press Ctrl+C to stop")
+
+ try:
+ httpd.serve_forever()
+ except KeyboardInterrupt:
+ print("\nShutting down server...")
+
+if __name__ == "__main__":
+ main()