diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..75c6182 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +.pytest_cache/ diff --git a/init.md b/init.md index 2620500..ce69911 100644 --- a/init.md +++ b/init.md @@ -1 +1,20 @@ -Initializing the repo +# Stoplight Migrator + +This repository now includes a Python-based CLI for converting Stoplight documentation to [Fern](https://buildwithfern.com/) docs. The tool reads a Stoplight project's table of contents and markdown pages, then produces the matching markdown files and navigation entries inside `docs.yml`. + +## Usage + +```bash +PYTHONPATH=src python -m stoplight_migrator.cli --docs-yml docs.yml --docs-root docs +``` + +- `` can either be a path to a local Stoplight export (containing a `table_of_contents.json` file) or a hosted Stoplight docs URL. +- Generated markdown files are written to `/pages` by default. +- Use `--append-navigation` to append the generated navigation instead of replacing the existing section. +- Pass `--dry-run` to preview the updated `docs.yml` without writing files. + +Run the test suite with: + +```bash +pytest +``` diff --git a/src/stoplight_migrator/__init__.py b/src/stoplight_migrator/__init__.py new file mode 100644 index 0000000..14739f0 --- /dev/null +++ b/src/stoplight_migrator/__init__.py @@ -0,0 +1,10 @@ +"""Utilities for migrating Stoplight documentation to Fern docs.""" + +from .migrator import StoplightMigrator +from .clients import StoplightDirectoryClient, StoplightHostedDocsClient + +__all__ = [ + "StoplightMigrator", + "StoplightDirectoryClient", + "StoplightHostedDocsClient", +] diff --git a/src/stoplight_migrator/cli.py b/src/stoplight_migrator/cli.py new file mode 100644 index 0000000..fbdc8a7 --- /dev/null +++ b/src/stoplight_migrator/cli.py @@ -0,0 +1,76 @@ +"""Command line interface for the Stoplight to Fern migrator.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import Optional + +from .clients import StoplightDirectoryClient, StoplightHostedDocsClient +from .migrator import MigrationConfig, StoplightMigrator + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Migrate Stoplight documentation to Fern docs") + parser.add_argument( + "source", + help="Stoplight docs URL or path to a Stoplight export directory containing table_of_contents.json", + ) + parser.add_argument( + "--docs-yml", + dest="docs_yml", + default="docs.yml", + help="Path to the Fern docs.yml file (default: docs.yml)", + ) + parser.add_argument( + "--docs-root", + dest="docs_root", + default="docs", + help="Root directory containing Fern docs content (default: docs)", + ) + parser.add_argument( + "--pages-dir", + dest="pages_dir", + default=None, + help="Directory to write generated markdown pages (default: /pages)", + ) + parser.add_argument( + "--append-navigation", + action="store_true", + help="Append generated navigation instead of replacing the existing navigation section", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Perform a dry run without writing any files", + ) + return parser + + +def main(argv: Optional[list[str]] = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + source = args.source + pages_dir = Path(args.pages_dir) if args.pages_dir else Path(args.docs_root) / "pages" + docs_yml_path = Path(args.docs_yml) + + if Path(source).exists(): + client = StoplightDirectoryClient(Path(source)) + else: + client = StoplightHostedDocsClient(source) + + config = MigrationConfig( + docs_yml_path=docs_yml_path, + pages_dir=pages_dir, + overwrite_navigation=not args.append_navigation, + dry_run=args.dry_run, + ) + migrator = StoplightMigrator(client=client, config=config) + migrator.migrate() + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry point + raise SystemExit(main()) + diff --git a/src/stoplight_migrator/clients.py b/src/stoplight_migrator/clients.py new file mode 100644 index 0000000..f6585b8 --- /dev/null +++ b/src/stoplight_migrator/clients.py @@ -0,0 +1,274 @@ +"""Clients for retrieving Stoplight documentation content.""" + +from __future__ import annotations + +import json +import re +import urllib.parse +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional + +from .toc import StoplightNode, TocParser + + +class StoplightClient: + """Common interface for reading Stoplight documentation data.""" + + def load_tree(self) -> List[StoplightNode]: + raise NotImplementedError + + def get_markdown(self, node: StoplightNode) -> Optional[str]: + raise NotImplementedError + + +@dataclass +class StoplightDirectoryClient(StoplightClient): + """Reads Stoplight documentation from a local export directory.""" + + root: Path + toc_filename: str = "table_of_contents.json" + documents_dirname: str = "documents" + + def __post_init__(self) -> None: + self.root = Path(self.root) + if not self.root.exists(): + raise FileNotFoundError(f"Stoplight export directory '{self.root}' does not exist") + + def _load_raw_tree(self) -> Iterable[dict]: + toc_path = self._find_table_of_contents() + with toc_path.open("r", encoding="utf-8") as file: + data = json.load(file) + if isinstance(data, dict): + for key in ("items", "contents", "children"): + if key in data and isinstance(data[key], list): + return data[key] + if isinstance(data, list): + return data + raise ValueError(f"Unsupported table of contents format: {type(data)}") + + def _find_table_of_contents(self) -> Path: + toc_path = self.root / self.toc_filename + if toc_path.exists(): + return toc_path + for candidate in self.root.rglob(self.toc_filename): + if candidate.is_file(): + return candidate + raise FileNotFoundError( + f"Unable to locate a '{self.toc_filename}' file under {self.root}. " + "Expected a Stoplight export directory containing Stoplight metadata." + ) + + def load_tree(self) -> List[StoplightNode]: + parser = TocParser(self._load_raw_tree()) + return parser.parse() + + def get_markdown(self, node: StoplightNode) -> Optional[str]: + slug = node.slug or node.id or node.raw.get("slug") + if not slug: + return None + documents_dir = self.root / self.documents_dirname + for candidate in self._candidate_markdown_paths(documents_dir, slug): + if candidate.exists(): + return candidate.read_text(encoding="utf-8") + fallback = self._find_markdown_file(slug) + if fallback and fallback.exists(): + return fallback.read_text(encoding="utf-8") + # Some Stoplight exports embed markdown inline under the node. + return extract_markdown_from_node(node.raw) + + @staticmethod + def _candidate_markdown_paths(documents_dir: Path, slug: str) -> Iterable[Path]: + base = slug.replace("/", "-") + extensions = (".md", ".mdx", ".markdown") + for extension in extensions: + yield documents_dir / f"{base}{extension}" + + def _find_markdown_file(self, slug: str) -> Optional[Path]: + base = slug.replace("/", "-") + extensions = (".md", ".mdx", ".markdown") + for extension in extensions: + pattern = f"{base}{extension}" + for candidate in self.root.rglob(pattern): + if self.documents_dirname in candidate.parts: + return candidate + return None + + +class StoplightHostedDocsClient(StoplightClient): + """Fetches documentation from a hosted Stoplight Elements site.""" + + def __init__(self, base_url: str) -> None: + if not base_url: + raise ValueError("Base URL must be provided") + self.base_url = base_url.rstrip("/") + self._next_data: Optional[dict] = None + self._nodes_by_id: Dict[str, dict] = {} + self._toc: List[StoplightNode] = [] + self._load() + + def _load(self) -> None: + next_data = self._fetch_next_data() + if next_data is None: + raise RuntimeError( + "Unable to locate Next.js data from Stoplight documentation site." + ) + self._next_data = next_data + self._nodes_by_id = collect_nodes_by_id(next_data) + raw_toc = find_table_of_contents(next_data) + if raw_toc is None: + raise RuntimeError("Failed to find table of contents in Stoplight data") + parser = TocParser(raw_toc) + self._toc = parser.parse() + + def _fetch_next_data(self) -> Optional[dict]: + html = http_get(self.base_url) + if html is None: + return None + match = re.search( + r']*id=["\']__NEXT_DATA__["\'][^>]*>(.*?)', + html, + re.DOTALL, + ) + if match: + json_text = match.group(1).strip() + try: + return json.loads(json_text) + except json.JSONDecodeError: + pass + assign_match = re.search(r"window\.__NEXT_DATA__\s*=\s*(\{)", html) + if assign_match: + start = assign_match.start(1) + json_text = _extract_json_object(html, start) + if json_text: + try: + return json.loads(json_text) + except json.JSONDecodeError: + return None + return None + + def load_tree(self) -> List[StoplightNode]: + return list(self._toc) + + def get_markdown(self, node: StoplightNode) -> Optional[str]: + raw = node.raw + inline = extract_markdown_from_node(raw) + if inline: + return inline + node_id = raw.get("id") or raw.get("targetId") + if node_id and node_id in self._nodes_by_id: + referenced = extract_markdown_from_node(self._nodes_by_id[node_id]) + if referenced: + return referenced + slug = node.slug or raw.get("slug") + if slug: + url = urllib.parse.urljoin(self.base_url + "/", f"{slug}.md") + markdown = http_get(url) + if markdown: + return markdown + return None + + +def http_get(url: str) -> Optional[str]: + try: + request = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(request) as response: + charset = response.headers.get_content_charset() or "utf-8" + return response.read().decode(charset, errors="replace") + except Exception: + return None + + +def collect_nodes_by_id(data: dict) -> Dict[str, dict]: + nodes: Dict[str, dict] = {} + + def visit(value: object) -> None: + if isinstance(value, dict): + node_id = value.get("id") + node_type = value.get("type") or value.get("kind") + if node_id and node_type: + nodes[node_id] = value + for item in value.values(): + visit(item) + elif isinstance(value, list): + for item in value: + visit(item) + + visit(data) + return nodes + + +def find_table_of_contents(data: dict) -> Optional[Iterable[dict]]: + candidates: List[Iterable[dict]] = [] + + def visit(value: object) -> None: + if isinstance(value, dict): + for key in ("tableOfContents", "toc", "tree", "items", "contents", "children"): + candidate = value.get(key) + if isinstance(candidate, list) and _looks_like_toc(candidate): + candidates.append(candidate) + for item in value.values(): + visit(item) + elif isinstance(value, list): + for item in value: + visit(item) + + visit(data) + return candidates[0] if candidates else None + + +def _looks_like_toc(items: Iterable[dict]) -> bool: + sample = list(items)[:5] + if not sample: + return False + return all(isinstance(item, dict) and any(k in item for k in ("type", "kind", "title")) for item in sample) + + +def extract_markdown_from_node(node: dict) -> Optional[str]: + markdown_fields = [ + ("markdown",), + ("data", "markdown"), + ("document", "markdown"), + ("body", "markdown"), + ] + for path in markdown_fields: + current = node + for key in path: + if not isinstance(current, dict) or key not in current: + break + current = current[key] + else: # Only executed if loop wasn't broken + if isinstance(current, str): + return current + if isinstance(current, dict): + for text_key in ("content", "raw", "plain", "text"): + if text_key in current and isinstance(current[text_key], str): + return current[text_key] + return None + + +def _extract_json_object(source: str, start: int) -> Optional[str]: + depth = 0 + in_string = False + escape = False + for index in range(start, len(source)): + char = source[index] + if in_string: + if escape: + escape = False + elif char == "\\": + escape = True + elif char == '"': + in_string = False + else: + if char == '"': + in_string = True + elif char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return source[start : index + 1] + return None + diff --git a/src/stoplight_migrator/migrator.py b/src/stoplight_migrator/migrator.py new file mode 100644 index 0000000..d62703f --- /dev/null +++ b/src/stoplight_migrator/migrator.py @@ -0,0 +1,152 @@ +"""Core logic for migrating Stoplight documentation into Fern docs.""" + +from __future__ import annotations + +from collections import OrderedDict +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Optional + +from .clients import StoplightClient +from .simple_yaml import dump as dump_yaml +from .simple_yaml import load as load_yaml +from .toc import StoplightNode, slugify + + +@dataclass +class MigrationConfig: + docs_yml_path: Path + pages_dir: Path + overwrite_navigation: bool = True + dry_run: bool = False + + +class StoplightMigrator: + """Migrates Stoplight documentation to Fern docs.""" + + def __init__(self, client: StoplightClient, config: MigrationConfig) -> None: + self.client = client + self.config = config + self.slug_registry: Dict[str, int] = {} + self.pages_dir = config.pages_dir + self.pages_dir.mkdir(parents=True, exist_ok=True) + + def migrate(self) -> None: + nodes = self.client.load_tree() + navigation = [] + for node in nodes: + nav_item = self._convert_node(node) + if nav_item is not None: + navigation.append(nav_item) + docs_config = self._load_docs_yml() + if "navigation" not in docs_config or not self.config.overwrite_navigation: + existing = docs_config.get("navigation", []) + if not isinstance(existing, list): + existing = [] + navigation = existing + navigation + docs_config["navigation"] = navigation + if self.config.dry_run: + print(dump_yaml(docs_config)) + else: + self._write_docs_yml(docs_config) + + def _load_docs_yml(self) -> OrderedDict: + path = self.config.docs_yml_path + if path.exists(): + text = path.read_text(encoding="utf-8") + return load_yaml(text) + return OrderedDict() + + def _write_docs_yml(self, data: OrderedDict) -> None: + text = dump_yaml(data) + self.config.docs_yml_path.write_text(text, encoding="utf-8") + + def _convert_node(self, node: StoplightNode) -> Optional[OrderedDict]: + if node.is_markdown(): + return self._create_page_entry(node) + if node.is_section(): + contents = [] + for child in node.children: + child_nav = self._convert_node(child) + if child_nav is not None: + contents.append(child_nav) + if not contents: + return None + slug = self._ensure_unique_slug(node.slug or slugify(node.title)) + entry = OrderedDict() + entry["section"] = node.title + entry["slug"] = slug + entry["contents"] = contents + return entry + if node.children: + contents = [] + for child in node.children: + child_nav = self._convert_node(child) + if child_nav is not None: + contents.append(child_nav) + if contents: + slug = self._ensure_unique_slug(node.slug or slugify(node.title)) + entry = OrderedDict() + entry["section"] = node.title + entry["slug"] = slug + entry["contents"] = contents + return entry + page_entry = self._create_page_entry(node) + if page_entry is not None: + return page_entry + return None + + def _create_page_entry(self, node: StoplightNode) -> Optional[OrderedDict]: + markdown = self.client.get_markdown(node) + if markdown is None: + return None + slug = self._ensure_unique_slug(node.slug or slugify(node.title)) + filename = f"{slug}.mdx" + output_path = self.pages_dir / filename + page_content = self._wrap_markdown(node.title, slug, markdown) + if not self.config.dry_run: + output_path.write_text(page_content, encoding="utf-8") + entry = OrderedDict() + entry["page"] = node.title + entry["slug"] = slug + entry["path"] = str(output_path.relative_to(self.config.docs_yml_path.parent)) + return entry + + def _wrap_markdown(self, title: str, slug: str, markdown: str) -> str: + front_matter_lines = [ + "---", + f"slug: {slug}", + f"title: {self._format_front_matter_value(title)}", + "---", + "", + ] + body = markdown.strip() + if body: + front_matter_lines.append(body) + front_matter_lines.append("") + return "\n".join(front_matter_lines) + + @staticmethod + def _format_front_matter_value(value: str) -> str: + if not value: + return "''" + requires_quotes = any(ch in value for ch in "\n:" ) or value != value.strip() + if requires_quotes: + escaped = value.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + return value + + def _ensure_unique_slug(self, slug: str) -> str: + slug = slugify(slug) + if slug not in self.slug_registry: + self.slug_registry[slug] = 1 + return slug + counter = self.slug_registry[slug] + 1 + base = slug + while f"{base}-{counter}" in self.slug_registry: + counter += 1 + unique_slug = f"{base}-{counter}" + self.slug_registry[base] = counter + self.slug_registry[unique_slug] = 1 + return unique_slug + diff --git a/src/stoplight_migrator/simple_yaml.py b/src/stoplight_migrator/simple_yaml.py new file mode 100644 index 0000000..15c20e6 --- /dev/null +++ b/src/stoplight_migrator/simple_yaml.py @@ -0,0 +1,229 @@ +"""A tiny YAML loader/dumper sufficient for Fern docs configuration files.""" + +from __future__ import annotations + +from collections import OrderedDict +from dataclasses import dataclass +from typing import Any, Iterable, List, Tuple + + +@dataclass +class _Line: + text: str + indent: int + + +class YamlError(RuntimeError): + """Raised when parsing of YAML input fails.""" + + +class SimpleYaml: + """Parses a limited subset of YAML that covers Fern docs configs.""" + + indent_size = 2 + + def __init__(self, lines: Iterable[str]): + self.lines: List[_Line] = [ + _Line(text=line.rstrip("\n"), indent=len(line) - len(line.lstrip(" "))) + for line in lines + ] + self.index = 0 + + def parse(self) -> Any: + value = self._parse_block(expected_indent=0) + self._skip_blank_lines() + if self.index != len(self.lines): + raise YamlError("Unexpected trailing content in YAML input") + return value + + def _peek(self) -> Tuple[int, _Line]: + while self.index < len(self.lines): + line = self.lines[self.index] + if not line.text.strip() or line.text.lstrip().startswith("#"): + self.index += 1 + continue + return self.index, line + return self.index, _Line(text="", indent=0) + + def _skip_blank_lines(self) -> None: + while self.index < len(self.lines): + text = self.lines[self.index].text.strip() + if text and not text.startswith("#"): + break + self.index += 1 + + def _parse_block(self, expected_indent: int) -> Any: + collection: Any = None + items_list: List[Any] = [] + mapping = OrderedDict() + + while True: + saved_index = self.index + index, line = self._peek() + if index >= len(self.lines): + break + if line.indent < expected_indent: + self.index = saved_index + break + if line.indent > expected_indent: + raise YamlError("Unexpected indentation") + text = line.text.strip() + if text == "-" or text.startswith("- "): + if collection not in (None, "list"): + raise YamlError("Cannot mix list and mapping items") + collection = "list" + self.index = index + 1 + value = self._parse_list_item(text[1:].lstrip(), expected_indent) + items_list.append(value) + else: + if collection not in (None, "dict"): + raise YamlError("Cannot mix list and mapping items") + collection = "dict" + self.index = index + 1 + key, has_value, value_or_marker = self._parse_key_value(text) + if not has_value: + value = self._parse_block(expected_indent + self.indent_size) + elif value_or_marker == "__BLOCK__": + value = self._parse_block(expected_indent + self.indent_size) + else: + value = value_or_marker + mapping[key] = value + if collection == "list": + return items_list + if collection == "dict": + return mapping + return OrderedDict() + + def _parse_list_item(self, text: str, parent_indent: int) -> Any: + if not text: + return self._parse_block(parent_indent + self.indent_size) + if text.endswith(":"): + key = text[:-1].strip() + value = self._parse_block(parent_indent + self.indent_size) + return OrderedDict([(key, value)]) + if ":" in text: + key, remainder = text.split(":", 1) + key = key.strip() + remainder = remainder.strip() + if remainder: + value = _parse_scalar(remainder) + else: + value = self._parse_block(parent_indent + self.indent_size) + mapping = OrderedDict([(key, value)]) + self._collect_additional_mapping_entries(mapping, parent_indent + self.indent_size) + return mapping + value = _parse_scalar(text.strip()) + self._collect_additional_mapping_entries(value, parent_indent + self.indent_size) + return value + + def _collect_additional_mapping_entries(self, current: Any, indent: int) -> None: + if not isinstance(current, OrderedDict): + saved_index = self.index + index, line = self._peek() + if index < len(self.lines) and line.indent >= indent and not line.text.strip().startswith("- "): + raise YamlError("Unexpected structure following scalar list item") + self.index = saved_index + return + while True: + saved_index = self.index + index, line = self._peek() + if index >= len(self.lines) or line.indent < indent: + break + if line.indent > indent: + raise YamlError("Unexpected indentation in mapping") + text = line.text.strip() + if text.startswith("- "): + break + self.index = index + 1 + key, has_value, value_or_marker = self._parse_key_value(text) + if not has_value: + value = self._parse_block(indent + self.indent_size) + elif value_or_marker == "__BLOCK__": + value = self._parse_block(indent + self.indent_size) + else: + value = value_or_marker + current[key] = value + + def _parse_key_value(self, text: str) -> Tuple[str, bool, Any]: + if ":" not in text: + raise YamlError(f"Invalid mapping entry: {text}") + key, remainder = text.split(":", 1) + key = key.strip() + remainder = remainder.strip() + if not remainder: + return key, False, None + if remainder == "|" or remainder == ">": + raise YamlError("Multiline scalars are not supported") + return key, True, _parse_scalar(remainder) + + +def load(text: str) -> Any: + parser = SimpleYaml(text.splitlines()) + return parser.parse() + + +def dump(value: Any) -> str: + return "\n".join(_dump_value(value, indent=0)).rstrip() + "\n" + + +def _dump_value(value: Any, indent: int) -> List[str]: + prefix = " " * indent + if isinstance(value, list): + lines: List[str] = [] + for item in value: + if isinstance(item, (list, OrderedDict)): + lines.append(f"{prefix}-") + lines.extend(_dump_value(item, indent + SimpleYaml.indent_size)) + else: + lines.append(f"{prefix}- {_format_scalar(item)}") + return lines + if isinstance(value, OrderedDict): + lines = [] + for key, item in value.items(): + if isinstance(item, (list, OrderedDict)): + lines.append(f"{prefix}{key}:") + lines.extend(_dump_value(item, indent + SimpleYaml.indent_size)) + else: + lines.append(f"{prefix}{key}: {_format_scalar(item)}") + return lines + return [f"{prefix}{_format_scalar(value)}"] + + +def _format_scalar(value: Any) -> str: + if isinstance(value, bool): + return "true" if value else "false" + if value is None: + return "null" + if isinstance(value, (int, float)): + return str(value) + text = str(value) + if not text: + return "''" + if any(ch in text for ch in "\n:#{}[]\"'@`") or text.strip() != text: + escaped = text.replace("\\", "\\\\").replace("\"", "\\\"") + return f'"{escaped}"' + return text + + +def _parse_scalar(value: str) -> Any: + if not value: + return "" + lowered = value.lower() + if lowered == "null": + return None + if lowered == "true": + return True + if lowered == "false": + return False + try: + if "." in value: + return float(value) + return int(value) + except ValueError: + pass + if (value.startswith("'") and value.endswith("'")) or ( + value.startswith('"') and value.endswith('"') + ): + return value[1:-1] + return value + diff --git a/src/stoplight_migrator/toc.py b/src/stoplight_migrator/toc.py new file mode 100644 index 0000000..a65746b --- /dev/null +++ b/src/stoplight_migrator/toc.py @@ -0,0 +1,104 @@ +"""Parsing Stoplight table of contents structures.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from typing import Iterable, List, Optional + + +@dataclass +class StoplightNode: + """Represents a node in the Stoplight documentation tree.""" + + type: str + title: str + slug: Optional[str] + id: Optional[str] + raw: dict + children: List["StoplightNode"] = field(default_factory=list) + + def is_markdown(self) -> bool: + return self.type in {"article", "markdown", "page", "md"} + + def is_section(self) -> bool: + return self.type in {"group", "section", "chapter", "node", "http_service"} + + +class TocParser: + """Converts Stoplight raw TOC data into :class:`StoplightNode` objects.""" + + def __init__(self, items: Iterable[dict]): + self.items = list(items) + + def parse(self) -> List[StoplightNode]: + return [node for node in (self._parse_item(item) for item in self.items) if node is not None] + + def _parse_item(self, item: dict) -> Optional[StoplightNode]: + if not isinstance(item, dict): + return None + node_type = (item.get("type") or item.get("kind") or "").lower() + title = item.get("title") or item.get("name") or item.get("label") + if not title: + title = item.get("slug") or item.get("id") or "Untitled" + slug = item.get("slug") or item.get("uriSlug") or item.get("permalink") + node_id = item.get("id") or item.get("targetId") + children_data = None + for key in ("items", "children", "contents", "nodes"): + if key in item and isinstance(item[key], list): + children_data = item[key] + break + if node_type in {"group", "section", "chapter", "http_service", "http-service"}: + children = [ + child for child in (self._parse_item(child) for child in (children_data or [])) if child is not None + ] + return StoplightNode( + type="group", + title=title, + slug=slug, + id=node_id, + raw=item, + children=children, + ) + if node_type in {"markdown", "page", "article", "md", "http_service.operation"}: + return StoplightNode( + type="article", + title=title, + slug=slug, + id=node_id, + raw=item, + children=[], + ) + if children_data: + children = [ + child for child in (self._parse_item(child) for child in children_data) if child is not None + ] + return StoplightNode( + type="group", + title=title, + slug=slug, + id=node_id, + raw=item, + children=children, + ) + if item.get("markdown"): + return StoplightNode( + type="article", + title=title, + slug=slug, + id=node_id, + raw=item, + children=[], + ) + return None + + +_slug_pattern = re.compile(r"[^a-z0-9]+") + + +def slugify(value: str) -> str: + lowered = value.strip().lower() + replaced = _slug_pattern.sub("-", lowered) + slug = replaced.strip("-") + return slug or "page" + diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d9838dd --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + diff --git a/tests/fixtures/sample_project/documents/authentication.md b/tests/fixtures/sample_project/documents/authentication.md new file mode 100644 index 0000000..21cfec1 --- /dev/null +++ b/tests/fixtures/sample_project/documents/authentication.md @@ -0,0 +1,3 @@ +# Authentication + +Use your API key in the `Authorization` header. diff --git a/tests/fixtures/sample_project/documents/error-handling.md b/tests/fixtures/sample_project/documents/error-handling.md new file mode 100644 index 0000000..d57d3b3 --- /dev/null +++ b/tests/fixtures/sample_project/documents/error-handling.md @@ -0,0 +1,3 @@ +# Error Handling + +Errors return JSON with a `code` and `message`. diff --git a/tests/fixtures/sample_project/documents/overview.md b/tests/fixtures/sample_project/documents/overview.md new file mode 100644 index 0000000..ce5e0ce --- /dev/null +++ b/tests/fixtures/sample_project/documents/overview.md @@ -0,0 +1,3 @@ +# Overview + +Welcome to the sample Stoplight project. This page introduces the basics. diff --git a/tests/fixtures/sample_project/table_of_contents.json b/tests/fixtures/sample_project/table_of_contents.json new file mode 100644 index 0000000..8d78a61 --- /dev/null +++ b/tests/fixtures/sample_project/table_of_contents.json @@ -0,0 +1,30 @@ +{ + "items": [ + { + "id": "group-intro", + "type": "group", + "title": "Introduction", + "slug": "introduction", + "items": [ + { + "id": "doc-overview", + "type": "article", + "title": "Overview", + "slug": "overview" + }, + { + "id": "doc-authentication", + "type": "article", + "title": "Authentication", + "slug": "authentication" + } + ] + }, + { + "id": "doc-errors", + "type": "article", + "title": "Error Handling", + "slug": "error-handling" + } + ] +} diff --git a/tests/test_clients.py b/tests/test_clients.py new file mode 100644 index 0000000..7433973 --- /dev/null +++ b/tests/test_clients.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import json +import tempfile +from pathlib import Path +from typing import Dict +from unittest import TestCase +from unittest.mock import patch + +from stoplight_migrator.clients import StoplightDirectoryClient, StoplightHostedDocsClient + + +class StoplightDirectoryClientTests(TestCase): + def test_finds_nested_table_of_contents_file(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + nested = root / "export" / "data" + nested.mkdir(parents=True) + toc_path = nested / "table_of_contents.json" + toc_path.write_text( + json.dumps( + [ + { + "id": "intro", + "title": "Introduction", + "type": "group", + "items": [ + {"id": "doc", "title": "Welcome", "type": "markdown", "slug": "welcome"} + ], + } + ] + ), + encoding="utf-8", + ) + documents = nested / "documents" + documents.mkdir() + (documents / "welcome.md").write_text("# Welcome", encoding="utf-8") + + client = StoplightDirectoryClient(root) + nodes = client.load_tree() + + self.assertEqual(len(nodes), 1) + child = nodes[0].children[0] + self.assertEqual(child.slug, "welcome") + markdown = client.get_markdown(child) + self.assertIn("# Welcome", markdown or "") + + +class StoplightHostedDocsClientTests(TestCase): + def _build_next_data(self) -> Dict[str, object]: + return { + "props": { + "pageProps": { + "tableOfContents": [ + {"id": "doc", "title": "Doc", "type": "markdown", "slug": "doc"} + ] + } + } + } + + @patch("stoplight_migrator.clients.http_get") + def test_parses_next_data_with_additional_script_attributes(self, mock_http_get) -> None: + base_url = "https://example.com/docs" + html = ( + '" + ) + + def side_effect(url: str) -> str | None: + if url == base_url: + return html + if url == f"{base_url}/doc.md": + return "# Doc" + return None + + mock_http_get.side_effect = side_effect + + client = StoplightHostedDocsClient(base_url) + nodes = client.load_tree() + + self.assertEqual(len(nodes), 1) + markdown = client.get_markdown(nodes[0]) + self.assertIn("# Doc", markdown or "") + + @patch("stoplight_migrator.clients.http_get") + def test_parses_window_assignment_fallback(self, mock_http_get) -> None: + base_url = "https://example.com/alt" + next_data = json.dumps(self._build_next_data()) + html = ( + "" + ) + + def side_effect(url: str) -> str | None: + if url == base_url: + return html + if url == f"{base_url}/doc.md": + return "# Doc" + return None + + mock_http_get.side_effect = side_effect + + client = StoplightHostedDocsClient(base_url) + nodes = client.load_tree() + + self.assertEqual(len(nodes), 1) + markdown = client.get_markdown(nodes[0]) + self.assertIn("# Doc", markdown or "") + diff --git a/tests/test_migrator.py b/tests/test_migrator.py new file mode 100644 index 0000000..1ae6570 --- /dev/null +++ b/tests/test_migrator.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import shutil +import tempfile +import unittest +from pathlib import Path + +from stoplight_migrator.clients import StoplightDirectoryClient +from stoplight_migrator.migrator import MigrationConfig, StoplightMigrator +from stoplight_migrator.simple_yaml import load as load_yaml + + +class StoplightMigratorTests(unittest.TestCase): + def setUp(self) -> None: + self.temp_dir = Path(tempfile.mkdtemp()) + self.addCleanup(lambda: shutil.rmtree(self.temp_dir, ignore_errors=True)) + self.docs_yml = self.temp_dir / "docs.yml" + self.docs_yml.write_text( + "title: Sample Docs\nnavigation:\n - page: Existing\n path: docs/pages/existing.mdx\n", + encoding="utf-8", + ) + self.docs_root = self.temp_dir / "docs" + (self.docs_root / "pages").mkdir(parents=True, exist_ok=True) + + def test_migrator_generates_navigation_and_pages(self) -> None: + fixture_root = Path(__file__).parent / "fixtures" / "sample_project" + client = StoplightDirectoryClient(fixture_root) + config = MigrationConfig( + docs_yml_path=self.docs_yml, + pages_dir=self.docs_root / "pages", + overwrite_navigation=True, + ) + migrator = StoplightMigrator(client=client, config=config) + migrator.migrate() + + docs_config = load_yaml(self.docs_yml.read_text(encoding="utf-8")) + navigation = docs_config["navigation"] + self.assertEqual(len(navigation), 2) + introduction = navigation[0] + self.assertEqual(introduction["section"], "Introduction") + self.assertEqual(introduction["slug"], "introduction") + self.assertEqual(len(introduction["contents"]), 2) + overview = introduction["contents"][0] + self.assertEqual(overview["page"], "Overview") + self.assertEqual(overview["path"], "docs/pages/overview.mdx") + + error_page = navigation[1] + self.assertEqual(error_page["slug"], "error-handling") + + overview_file = self.docs_root / "pages" / "overview.mdx" + self.assertTrue(overview_file.exists()) + contents = overview_file.read_text(encoding="utf-8") + self.assertIn("slug: overview", contents) + self.assertIn("# Overview", contents) + + def test_append_navigation(self) -> None: + fixture_root = Path(__file__).parent / "fixtures" / "sample_project" + client = StoplightDirectoryClient(fixture_root) + config = MigrationConfig( + docs_yml_path=self.docs_yml, + pages_dir=self.docs_root / "pages", + overwrite_navigation=False, + ) + migrator = StoplightMigrator(client=client, config=config) + migrator.migrate() + + docs_config = load_yaml(self.docs_yml.read_text(encoding="utf-8")) + navigation = docs_config["navigation"] + self.assertEqual(len(navigation), 3) + self.assertEqual(navigation[0]["page"], "Existing") + self.assertEqual(navigation[1]["section"], "Introduction") + + +if __name__ == "__main__": # pragma: no cover + unittest.main() + diff --git a/tests/test_simple_yaml.py b/tests/test_simple_yaml.py new file mode 100644 index 0000000..0036ba4 --- /dev/null +++ b/tests/test_simple_yaml.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import unittest +from collections import OrderedDict + +from stoplight_migrator.simple_yaml import dump, load + + +class SimpleYamlTests(unittest.TestCase): + def test_round_trip(self) -> None: + original = OrderedDict( + [ + ("title", "Sample"), + ( + "navigation", + [ + OrderedDict( + [ + ("section", "Intro"), + ("contents", [OrderedDict([("page", "Welcome"), ("path", "docs/pages/welcome.mdx")])]), + ] + ) + ], + ), + ] + ) + text = dump(original) + loaded = load(text) + self.assertEqual(original, loaded) + + def test_parses_existing_docs_yml(self) -> None: + text = """title: Example\nlayout:\n tabs-placement: header\nnavigation:\n - page: Home\n path: docs/pages/home.mdx\n""" + data = load(text) + self.assertEqual(data["title"], "Example") + self.assertEqual(data["navigation"][0]["page"], "Home") + + +if __name__ == "__main__": # pragma: no cover + unittest.main() +