Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ HTML-filer kan publiceras via:
- **Cloudflare R2**: Med `html-export-workflow.yml` (kräver R2-credentials)
- **GitHub Pages**: Med `github-pages-workflow.yml` (enklare setup, kräver aktiverad GitHub Pages)

För att testa HTML-sajten lokalt:
```bash
python scripts/serve_html.py
```
Detta startar en lokal HTTP-server på `http://localhost:8000`. Du kan ange en annan port med `python scripts/serve_html.py 3000`.

### Vektor-format (för semantisk sökning)

- **`vector`**: Konverterar författningar till vektorembeddings för semantisk sökning och RAG-applikationer. Använder OpenAI:s text-embedding-3-large modell (3072 dimensioner) och stödjer lagring i PostgreSQL (pgvector), Elasticsearch eller JSON-fil.
Expand Down
6 changes: 6 additions & 0 deletions README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ HTML files can be published via:
- **Cloudflare R2**: Using `html-export-workflow.yml` (requires R2 credentials)
- **GitHub Pages**: Using `github-pages-workflow.yml` (simpler setup, requires GitHub Pages enabled)

To test the HTML site locally:
```bash
python scripts/serve_html.py
```
This starts a local HTTP server on `http://localhost:8000`. You can specify a different port with `python scripts/serve_html.py 3000`.

### Vector Format (for semantic search)

- **`vector`**: Converts legislation to vector embeddings for semantic search and RAG applications. Uses OpenAI's text-embedding-3-large model (3072 dimensions) and supports storage in PostgreSQL (pgvector), Elasticsearch, or JSON file.
Expand Down
21 changes: 14 additions & 7 deletions exporters/html/html_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,19 +348,25 @@ def convert_to_html(data: Dict[str, Any], apply_amendments: bool = False, up_to_
def make_links_relative(html_content: str) -> str:
"""
Strip base URL from links to make them relative for HTML export.

Removes https://selex.se/eli from links to make them relative.


Removes base domain (e.g., https://selex.se/eli) from links to make them relative.
This works with <base href="/eli/"> to resolve paths correctly.

Args:
html_content (str): HTML content with potentially absolute links

Returns:
str: HTML content with relative links
"""
# Pattern to match https://selex.se/eli in href attributes
pattern = r'href="https://selex\.se/eli(/[^"]*)"'
# Get base URL from environment (with /eli/)
base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')

# Pattern to match base_url in href attributes
# Escape special regex characters in base_url
escaped_base = re.escape(base_url)
pattern = rf'href="{escaped_base}(/[^"]*)"'
replacement = r'href="\1"'

return re.sub(pattern, replacement, html_content)


Expand Down Expand Up @@ -651,6 +657,7 @@ def create_html_head(title: str, beteckning: str, additional_styles: str = "", a
head = f"""<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<base href="/eli/">
<title>{html.escape(title)}</title>{eli_metadata}
{navbar_script}
{base_styles}
Expand Down
16 changes: 8 additions & 8 deletions formatters/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def apply_sfs_links(text: str) -> str:
# Matchar mönster som "2002:43", "1970:485", etc.
sfs_pattern = SFS_PATTERN

# Använd alltid https://selex.se/eli som default om env variabel inte finns
# Använd base URL med /eli/ prefix (default: https://selex.se/eli)
base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')

# TODO: Slå upp SFS-beteckning mot JSON-fil för att verifiera giltighet
Expand Down Expand Up @@ -238,9 +238,9 @@ def apply_law_name_links(text: str) -> str:
if not law_names_data:
return text

# Hämta bas-URL från miljövariabler - använd alltid https://selex.se/eli som default
# Hämta bas-URL från miljövariabler (med /eli/ prefix)
base_url = os.getenv('INTERNAL_LINKS_BASE_URL', 'https://selex.se/eli')

# Processar texten rad för rad för att undvika att länka rubriker
lines = text.split('\n')
processed_lines = []
Expand All @@ -257,22 +257,22 @@ def replace_law_name_reference(match):
paragraph_part = match.group(2).strip()
law_name = match.group(3).lower()
full_match = match.group(0)

# Leta upp lagnamnet i data
sfs_id = _lookup_law_name(law_name, law_names_data)

if not sfs_id:
print(f"Varning: Okänt lagnamn '{law_name}' i referens '{full_match}'")
return full_match # Returnera oförändrat om lagnamnet inte hittas

# Extrahera år och nummer från SFS-ID (format: "YYYY:NNN")
id_parts = sfs_id.split(':')
if len(id_parts) != 2:
print(f"Varning: Ogiltigt SFS-ID format '{sfs_id}' för lagnamn '{law_name}'")
return full_match

year, number = id_parts

# Skapa bas-URL
url = f"{base_url}/sfs/{year}/{number}"

Expand Down
51 changes: 51 additions & 0 deletions scripts/serve_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
"""
Simple HTTP server to serve the HTML site locally for testing.
Run with: python serve_html.py [port]
Default port is 8000.
"""

import http.server
import socketserver
import sys
import os
from pathlib import Path

def main():
# Default port
PORT = 8000

# Allow custom port from command line
if len(sys.argv) > 1:
try:
PORT = int(sys.argv[1])
except ValueError:
print(f"Error: Invalid port number '{sys.argv[1]}'")
sys.exit(1)

# Change to the HTML site directory (go up to project root first)
html_dir = Path(__file__).parent.parent / "output" / "html_site"

if not html_dir.exists():
print(f"Error: HTML site directory not found at {html_dir}")
print("Run the HTML export first with:")
print(" python sfs_processor.py --formats html")
sys.exit(1)

os.chdir(html_dir)

# Create server
Handler = http.server.SimpleHTTPRequestHandler

with socketserver.TCPServer(("", PORT), Handler) as httpd:
print(f"Serving HTML site at http://localhost:{PORT}")
print(f"Directory: {html_dir}")
print("Press Ctrl+C to stop")

try:
httpd.serve_forever()
except KeyboardInterrupt:
print("\nShutting down server...")

if __name__ == "__main__":
main()