Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__/
*.pyc
.pytest_cache/
21 changes: 20 additions & 1 deletion init.md
Original file line number Diff line number Diff line change
@@ -1 +1,20 @@
Initializing the repo
# Stoplight Migrator

This repository now includes a Python-based CLI for converting Stoplight documentation to [Fern](https://buildwithfern.com/) docs. The tool reads a Stoplight project's table of contents and markdown pages, then produces the matching markdown files and navigation entries inside `docs.yml`.

## Usage

```bash
PYTHONPATH=src python -m stoplight_migrator.cli <source> --docs-yml docs.yml --docs-root docs
```

- `<source>` can either be a path to a local Stoplight export (containing a `table_of_contents.json` file) or a hosted Stoplight docs URL.
- Generated markdown files are written to `<docs-root>/pages` by default.
- Use `--append-navigation` to append the generated navigation instead of replacing the existing section.
- Pass `--dry-run` to preview the updated `docs.yml` without writing files.

Run the test suite with:

```bash
pytest
```
10 changes: 10 additions & 0 deletions src/stoplight_migrator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Utilities for migrating Stoplight documentation to Fern docs."""

from .migrator import StoplightMigrator
from .clients import StoplightDirectoryClient, StoplightHostedDocsClient

__all__ = [
"StoplightMigrator",
"StoplightDirectoryClient",
"StoplightHostedDocsClient",
]
76 changes: 76 additions & 0 deletions src/stoplight_migrator/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Command line interface for the Stoplight to Fern migrator."""

from __future__ import annotations

import argparse
from pathlib import Path
from typing import Optional

from .clients import StoplightDirectoryClient, StoplightHostedDocsClient
from .migrator import MigrationConfig, StoplightMigrator


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Migrate Stoplight documentation to Fern docs")
parser.add_argument(
"source",
help="Stoplight docs URL or path to a Stoplight export directory containing table_of_contents.json",
)
parser.add_argument(
"--docs-yml",
dest="docs_yml",
default="docs.yml",
help="Path to the Fern docs.yml file (default: docs.yml)",
)
parser.add_argument(
"--docs-root",
dest="docs_root",
default="docs",
help="Root directory containing Fern docs content (default: docs)",
)
parser.add_argument(
"--pages-dir",
dest="pages_dir",
default=None,
help="Directory to write generated markdown pages (default: <docs-root>/pages)",
)
parser.add_argument(
"--append-navigation",
action="store_true",
help="Append generated navigation instead of replacing the existing navigation section",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Perform a dry run without writing any files",
)
return parser


def main(argv: Optional[list[str]] = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)

source = args.source
pages_dir = Path(args.pages_dir) if args.pages_dir else Path(args.docs_root) / "pages"
docs_yml_path = Path(args.docs_yml)

if Path(source).exists():
client = StoplightDirectoryClient(Path(source))
else:
client = StoplightHostedDocsClient(source)

config = MigrationConfig(
docs_yml_path=docs_yml_path,
pages_dir=pages_dir,
overwrite_navigation=not args.append_navigation,
dry_run=args.dry_run,
)
migrator = StoplightMigrator(client=client, config=config)
migrator.migrate()
return 0


if __name__ == "__main__": # pragma: no cover - CLI entry point
raise SystemExit(main())

274 changes: 274 additions & 0 deletions src/stoplight_migrator/clients.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
"""Clients for retrieving Stoplight documentation content."""

from __future__ import annotations

import json
import re
import urllib.parse
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Optional

from .toc import StoplightNode, TocParser


class StoplightClient:
"""Common interface for reading Stoplight documentation data."""

def load_tree(self) -> List[StoplightNode]:
raise NotImplementedError

def get_markdown(self, node: StoplightNode) -> Optional[str]:
raise NotImplementedError


@dataclass
class StoplightDirectoryClient(StoplightClient):
"""Reads Stoplight documentation from a local export directory."""

root: Path
toc_filename: str = "table_of_contents.json"
documents_dirname: str = "documents"

def __post_init__(self) -> None:
self.root = Path(self.root)
if not self.root.exists():
raise FileNotFoundError(f"Stoplight export directory '{self.root}' does not exist")

def _load_raw_tree(self) -> Iterable[dict]:
toc_path = self._find_table_of_contents()
with toc_path.open("r", encoding="utf-8") as file:
data = json.load(file)
if isinstance(data, dict):
for key in ("items", "contents", "children"):
if key in data and isinstance(data[key], list):
return data[key]
if isinstance(data, list):
return data
raise ValueError(f"Unsupported table of contents format: {type(data)}")

def _find_table_of_contents(self) -> Path:
toc_path = self.root / self.toc_filename
if toc_path.exists():
return toc_path
for candidate in self.root.rglob(self.toc_filename):
if candidate.is_file():
return candidate
raise FileNotFoundError(
f"Unable to locate a '{self.toc_filename}' file under {self.root}. "
"Expected a Stoplight export directory containing Stoplight metadata."
)

def load_tree(self) -> List[StoplightNode]:
parser = TocParser(self._load_raw_tree())
return parser.parse()

def get_markdown(self, node: StoplightNode) -> Optional[str]:
slug = node.slug or node.id or node.raw.get("slug")
if not slug:
return None
documents_dir = self.root / self.documents_dirname
for candidate in self._candidate_markdown_paths(documents_dir, slug):
if candidate.exists():
return candidate.read_text(encoding="utf-8")
fallback = self._find_markdown_file(slug)
if fallback and fallback.exists():
return fallback.read_text(encoding="utf-8")
# Some Stoplight exports embed markdown inline under the node.
return extract_markdown_from_node(node.raw)

@staticmethod
def _candidate_markdown_paths(documents_dir: Path, slug: str) -> Iterable[Path]:
base = slug.replace("/", "-")
extensions = (".md", ".mdx", ".markdown")
for extension in extensions:
yield documents_dir / f"{base}{extension}"

def _find_markdown_file(self, slug: str) -> Optional[Path]:
base = slug.replace("/", "-")
extensions = (".md", ".mdx", ".markdown")
for extension in extensions:
pattern = f"{base}{extension}"
for candidate in self.root.rglob(pattern):
if self.documents_dirname in candidate.parts:
return candidate
return None


class StoplightHostedDocsClient(StoplightClient):
"""Fetches documentation from a hosted Stoplight Elements site."""

def __init__(self, base_url: str) -> None:
if not base_url:
raise ValueError("Base URL must be provided")
self.base_url = base_url.rstrip("/")
self._next_data: Optional[dict] = None
self._nodes_by_id: Dict[str, dict] = {}
self._toc: List[StoplightNode] = []
self._load()

def _load(self) -> None:
next_data = self._fetch_next_data()
if next_data is None:
raise RuntimeError(
"Unable to locate Next.js data from Stoplight documentation site."
)
self._next_data = next_data
self._nodes_by_id = collect_nodes_by_id(next_data)
raw_toc = find_table_of_contents(next_data)
if raw_toc is None:
raise RuntimeError("Failed to find table of contents in Stoplight data")
parser = TocParser(raw_toc)
self._toc = parser.parse()

def _fetch_next_data(self) -> Optional[dict]:
html = http_get(self.base_url)
if html is None:
return None
match = re.search(
r'<script[^>]*id=["\']__NEXT_DATA__["\'][^>]*>(.*?)</script>',
html,
re.DOTALL,
)
if match:
json_text = match.group(1).strip()
try:
return json.loads(json_text)
except json.JSONDecodeError:
pass
assign_match = re.search(r"window\.__NEXT_DATA__\s*=\s*(\{)", html)
if assign_match:
start = assign_match.start(1)
json_text = _extract_json_object(html, start)
if json_text:
try:
return json.loads(json_text)
except json.JSONDecodeError:
return None
return None

def load_tree(self) -> List[StoplightNode]:
return list(self._toc)

def get_markdown(self, node: StoplightNode) -> Optional[str]:
raw = node.raw
inline = extract_markdown_from_node(raw)
if inline:
return inline
node_id = raw.get("id") or raw.get("targetId")
if node_id and node_id in self._nodes_by_id:
referenced = extract_markdown_from_node(self._nodes_by_id[node_id])
if referenced:
return referenced
slug = node.slug or raw.get("slug")
if slug:
url = urllib.parse.urljoin(self.base_url + "/", f"{slug}.md")
markdown = http_get(url)
if markdown:
return markdown
return None


def http_get(url: str) -> Optional[str]:
try:
request = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(request) as response:
charset = response.headers.get_content_charset() or "utf-8"
return response.read().decode(charset, errors="replace")
except Exception:
return None


def collect_nodes_by_id(data: dict) -> Dict[str, dict]:
nodes: Dict[str, dict] = {}

def visit(value: object) -> None:
if isinstance(value, dict):
node_id = value.get("id")
node_type = value.get("type") or value.get("kind")
if node_id and node_type:
nodes[node_id] = value
for item in value.values():
visit(item)
elif isinstance(value, list):
for item in value:
visit(item)

visit(data)
return nodes


def find_table_of_contents(data: dict) -> Optional[Iterable[dict]]:
candidates: List[Iterable[dict]] = []

def visit(value: object) -> None:
if isinstance(value, dict):
for key in ("tableOfContents", "toc", "tree", "items", "contents", "children"):
candidate = value.get(key)
if isinstance(candidate, list) and _looks_like_toc(candidate):
candidates.append(candidate)
for item in value.values():
visit(item)
elif isinstance(value, list):
for item in value:
visit(item)

visit(data)
return candidates[0] if candidates else None


def _looks_like_toc(items: Iterable[dict]) -> bool:
sample = list(items)[:5]
if not sample:
return False
return all(isinstance(item, dict) and any(k in item for k in ("type", "kind", "title")) for item in sample)


def extract_markdown_from_node(node: dict) -> Optional[str]:
markdown_fields = [
("markdown",),
("data", "markdown"),
("document", "markdown"),
("body", "markdown"),
]
for path in markdown_fields:
current = node
for key in path:
if not isinstance(current, dict) or key not in current:
break
current = current[key]
else: # Only executed if loop wasn't broken
if isinstance(current, str):
return current
if isinstance(current, dict):
for text_key in ("content", "raw", "plain", "text"):
if text_key in current and isinstance(current[text_key], str):
return current[text_key]
return None


def _extract_json_object(source: str, start: int) -> Optional[str]:
depth = 0
in_string = False
escape = False
for index in range(start, len(source)):
char = source[index]
if in_string:
if escape:
escape = False
elif char == "\\":
escape = True
elif char == '"':
in_string = False
else:
if char == '"':
in_string = True
elif char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
return source[start : index + 1]
return None

Loading