diff --git a/src/scribae/main.py b/src/scribae/main.py index 80a75ee..88107a2 100644 --- a/src/scribae/main.py +++ b/src/scribae/main.py @@ -5,6 +5,7 @@ from .brief_cli import brief_command from .idea_cli import idea_command from .meta_cli import meta_command +from .refine_cli import refine_command from .translate_cli import translate_command from .version_cli import version_command from .write_cli import write_command @@ -30,6 +31,7 @@ def app_callback() -> None: help="Generate a validated SEO brief (keywords, outline, FAQ, metadata) from a note.", )(brief_command) app.command("write", help="Draft an article from a note + SeoBrief JSON.")(write_command) +app.command("refine", help="Refine a draft using a validated SEO brief.")(refine_command) app.command("meta", help="Create publication metadata/frontmatter for a finished draft.")(meta_command) app.command("translate", help="Translate Markdown while preserving formatting (MT + post-edit).")(translate_command) app.command("version", help="Print the Scribae version.")(version_command) diff --git a/src/scribae/prompts/__init__.py b/src/scribae/prompts/__init__.py index 776329a..bebd115 100644 --- a/src/scribae/prompts/__init__.py +++ b/src/scribae/prompts/__init__.py @@ -5,4 +5,5 @@ "write", "idea", "meta", + "refine", ] diff --git a/src/scribae/prompts/refine.py b/src/scribae/prompts/refine.py new file mode 100644 index 0000000..2a5a293 --- /dev/null +++ b/src/scribae/prompts/refine.py @@ -0,0 +1,190 @@ +from __future__ import annotations + +import textwrap +from typing import Any + +from scribae.brief import SeoBrief +from scribae.project import ProjectConfig + +SYSTEM_PROMPT = textwrap.dedent( + """\ + You are a meticulous technical editor. + Output **Markdown only** (no frontmatter, no YAML, no HTML). + Refine the provided DRAFT section to better match the brief. + Preserve Markdown structure (lists, code blocks, tables) where possible. + Avoid inventing facts. Ground claims in the provided excerpts. + If evidence is required and missing, respond with exactly: "(no supporting evidence in the note)". + Do not add or rename section headings; the CLI will supply the heading. + """ +).strip() + + +def build_user_prompt( + *, + project: ProjectConfig, + brief: SeoBrief, + section_title: str, + draft_body: str, + note_snippets: str, + feedback: str | None, + evidence_mode: Any, + intensity: Any, + language: str, + apply_feedback: bool, + source_label: str, +) -> str: + """Render the structured user prompt for refining a section.""" + keywords = ", ".join(project["keywords"]) if project["keywords"] else "none" + snippets_block = note_snippets.strip() or "(no relevant excerpts)" + feedback_block = feedback.strip() if feedback else "(no feedback provided)" + evidence_rule = _format_evidence_rule(evidence_mode) + intensity_rules = _format_intensity_rules(intensity) + feedback_instruction = "Prioritize feedback items." if apply_feedback and feedback else "Use feedback if helpful." + + style_rules = "\n".join( + [ + "- Preserve Markdown structure where possible.", + "- Keep the original intent and key facts.", + "- Do not add new headings.", + "- Maintain the brief tone and audience.", + evidence_rule, + ] + ) + + template = textwrap.dedent( + """\ + [PROJECT CONTEXT] + Site: {site_name} ({domain}) + Audience: {audience} + Tone: {tone} + Language: {language} + Output directive: write this section in language code '{language}'. + FocusKeywords: {keywords} + + [BRIEF CONTEXT] + H1: {h1} + Current Section: {section_title} + SearchIntent: {search_intent} + PrimaryKeyword: {primary_keyword} + SecondaryKeywords: {secondary_keywords} + + [CURRENT DRAFT] + {draft_body} + + [{source_label}] + {note_snippets} + + [FEEDBACK] + {feedback_block} + + [REFINEMENT CONTROLS] + Intensity: {intensity} + Feedback handling: {feedback_instruction} + + [STYLE RULES] + {style_rules} + + [OUTPUT] + Provide only the refined section body (no headings). + """ + ).strip() + + return template.format( + site_name=project["site_name"], + domain=project["domain"], + audience=project["audience"], + tone=project["tone"], + language=language, + keywords=keywords, + h1=brief.h1, + section_title=section_title, + search_intent=brief.search_intent, + primary_keyword=brief.primary_keyword, + secondary_keywords=", ".join(brief.secondary_keywords) if brief.secondary_keywords else "none", + draft_body=draft_body.strip() or "(empty draft section)", + source_label=source_label, + note_snippets=snippets_block, + feedback_block=feedback_block, + intensity=_coerce_enum(intensity), + feedback_instruction=feedback_instruction, + style_rules="\n".join([rule for rule in [*intensity_rules, *style_rules.splitlines()] if rule.strip()]), + ) + + +def build_changelog_prompt( + *, + brief: SeoBrief, + refined_titles: list[str], + feedback: str | None, + apply_feedback: bool, +) -> str: + """Build a prompt for a concise changelog summary.""" + refined_block = "\n".join(f"- {title}" for title in refined_titles) if refined_titles else "- (none)" + feedback_block = feedback.strip() if feedback else "(no feedback provided)" + feedback_instruction = ( + "Prioritize feedback items." if apply_feedback and feedback else "Summarize key improvements." + ) + + template = textwrap.dedent( + """\ + [TASK] + Summarize the refinements applied to the draft. + + [BRIEF TITLE] + {brief_title} + + [REFINED SECTIONS] + {refined_titles} + + [FEEDBACK] + {feedback_block} + + [INSTRUCTIONS] + - Write 3-7 bullet points. + - Be concise and concrete. + - Mention any feedback items addressed. + - Do not introduce new claims or content. + - {feedback_instruction} + """ + ).strip() + + return template.format( + brief_title=brief.title, + refined_titles=refined_block, + feedback_block=feedback_block, + feedback_instruction=feedback_instruction, + ) + + +def _format_evidence_rule(mode: Any) -> str: + mode_value = _coerce_enum(mode) + if mode_value == "off": + return "- Evidence citations are optional." + if mode_value == "required": + return "- Evidence is required; if missing write exactly: \"(no supporting evidence in the note)\"." + return "- Prefer citing supporting evidence where available." + + +def _format_intensity_rules(intensity: Any) -> list[str]: + intensity_value = _coerce_enum(intensity) + if intensity_value == "minimal": + return [ + "- Make minimal edits: fix clarity, grammar, and obvious issues.", + "- Preserve original phrasing when possible.", + ] + if intensity_value == "strong": + return [ + "- Rewrite more aggressively for clarity and structure.", + "- Reorder sentences to improve flow while staying within the brief.", + ] + return [ + "- Improve clarity and flow with moderate rewriting.", + "- Tighten wording and remove redundancy.", + ] + + +def _coerce_enum(value: Any) -> str: + return str(getattr(value, "value", value)).strip().lower() + + +__all__ = ["SYSTEM_PROMPT", "build_user_prompt", "build_changelog_prompt"] diff --git a/src/scribae/refine.py b/src/scribae/refine.py new file mode 100644 index 0000000..b87752c --- /dev/null +++ b/src/scribae/refine.py @@ -0,0 +1,645 @@ +from __future__ import annotations + +import asyncio +import json +import re +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +from pydantic import ValidationError +from pydantic_ai import Agent +from pydantic_ai.settings import ModelSettings + +from .brief import SeoBrief +from .io_utils import NoteDetails, load_note +from .language import ( + LanguageMismatchError, + LanguageResolutionError, + ensure_language_output, + resolve_output_language, +) +from .llm import LLM_TIMEOUT_SECONDS, make_model +from .project import ProjectConfig +from .prompts.refine import SYSTEM_PROMPT, build_changelog_prompt, build_user_prompt +from .snippets import SnippetSelection, build_snippet_block + +Reporter = Callable[[str], None] | None + + +class RefiningError(Exception): + """Base class for refine command failures.""" + + exit_code = 1 + + def __init__(self, message: str, *, exit_code: int | None = None) -> None: + super().__init__(message) + if exit_code is not None: + self.exit_code = exit_code + + +class RefiningValidationError(RefiningError): + exit_code = 2 + + +class RefiningFileError(RefiningError): + exit_code = 3 + + +class RefiningLLMError(RefiningError): + exit_code = 4 + + +class EvidenceMode(str, Enum): + OFF = "off" + OPTIONAL = "optional" + REQUIRED = "required" + + @property + def is_required(self) -> bool: + return self is EvidenceMode.REQUIRED + + +class RefinementIntensity(str, Enum): + MINIMAL = "minimal" + MEDIUM = "medium" + STRONG = "strong" + + +@dataclass(frozen=True) +class DraftSection: + """Markdown section parsed from the draft.""" + + heading: str + title: str + body: str + anchor: str | None + + +@dataclass(frozen=True) +class DraftDocument: + """Parsed Markdown draft.""" + + preamble: str + sections: list[DraftSection] + + +@dataclass(frozen=True) +class RefinementContext: + """Artifacts required to refine a draft.""" + + draft_text: str + brief: SeoBrief + project: ProjectConfig + language: str + note: NoteDetails | None + feedback: str | None + + +@dataclass(frozen=True) +class RefinedSection: + """Output section details.""" + + index: int + title: str + heading: str + body: str + + +@dataclass(frozen=True) +class OutlineSection: + """Brief outline section metadata.""" + + title: str + index: int + + +def prepare_context( + *, + draft_path: Path, + brief_path: Path, + project: ProjectConfig, + language: str | None = None, + note_path: Path | None = None, + feedback_path: Path | None = None, + max_note_chars: int = 8000, + language_detector: Callable[[str], str] | None = None, + reporter: Reporter = None, +) -> RefinementContext: + """Load source artifacts needed for refinement.""" + if max_note_chars <= 0: + raise RefiningValidationError("--max-note-chars must be greater than zero.") + + try: + draft_text = draft_path.read_text(encoding="utf-8") + except FileNotFoundError as exc: + raise RefiningFileError(f"Draft file not found: {draft_path}") from exc + except OSError as exc: # pragma: no cover - surfaced by CLI + raise RefiningFileError(f"Unable to read draft: {exc}") from exc + + brief = _load_brief(brief_path) + + note = None + if note_path is not None: + try: + note = load_note(note_path, max_chars=max_note_chars) + except FileNotFoundError as exc: + raise RefiningFileError(str(exc)) from exc + except ValueError as exc: + raise RefiningFileError(str(exc)) from exc + except OSError as exc: # pragma: no cover - surfaced by CLI + raise RefiningFileError(f"Unable to read note: {exc}") from exc + + feedback = _load_feedback(feedback_path) if feedback_path else None + + _report(reporter, f"Loaded draft '{draft_path.name}' and brief '{brief.title}'.") + + language_source_text = note.body if note else draft_text + try: + language_resolution = resolve_output_language( + flag_language=language, + project_language=project.get("language"), + metadata=note.metadata if note else None, + text=language_source_text, + language_detector=language_detector, + ) + except LanguageResolutionError as exc: + raise RefiningValidationError(str(exc)) from exc + + _report( + reporter, + f"Resolved output language: {language_resolution.language} (source: {language_resolution.source})", + ) + + return RefinementContext( + draft_text=draft_text, + brief=brief, + project=project, + language=language_resolution.language, + note=note, + feedback=feedback, + ) + + +def outline_sections(brief: SeoBrief) -> list[OutlineSection]: + """Return OutlineSection objects for the brief outline.""" + if not brief.outline: + raise RefiningValidationError("Brief outline is empty.") + + sections = [ + OutlineSection(title=brief.outline[idx - 1].strip(), index=idx) + for idx in range(1, len(brief.outline) + 1) + if brief.outline[idx - 1].strip() + ] + + if not sections: + raise RefiningValidationError("No outline sections selected.") + return sections + + +def render_dry_run_prompt( + context: RefinementContext, + *, + intensity: RefinementIntensity, + evidence_mode: EvidenceMode, + section_range: tuple[int, int] | None, + apply_feedback: bool, + preserve_anchors: bool, +) -> str: + """Return the prompt for the first selected section.""" + draft = parse_draft(context.draft_text) + outline = outline_sections(context.brief) + refined_sections = _prepare_sections(draft, outline, preserve_anchors=preserve_anchors) + selected = _select_sections(refined_sections, section_range=section_range) + if not selected: + raise RefiningValidationError("No sections selected for refinement.") + + first = selected[0] + prompt, _ = build_prompt_for_section( + context, + section=first, + draft_body=_find_draft_body(draft, index=first.index), + evidence_mode=evidence_mode, + intensity=intensity, + apply_feedback=apply_feedback, + ) + return prompt + + +def refine_draft( + context: RefinementContext, + *, + model_name: str, + temperature: float, + intensity: RefinementIntensity, + evidence_mode: EvidenceMode, + section_range: tuple[int, int] | None = None, + apply_feedback: bool = False, + preserve_anchors: bool = False, + reporter: Reporter = None, + save_prompt_dir: Path | None = None, + changelog_path: Path | None = None, + language_detector: Callable[[str], str] | None = None, +) -> tuple[str, str | None]: + """Refine a draft and optionally return a changelog.""" + draft = parse_draft(context.draft_text) + outline = outline_sections(context.brief) + refined_sections = _prepare_sections(draft, outline, preserve_anchors=preserve_anchors) + + if save_prompt_dir is not None: + try: + save_prompt_dir.mkdir(parents=True, exist_ok=True) + except OSError as exc: + raise RefiningFileError(f"Unable to create prompt directory: {exc}") from exc + + selected_sections = _select_sections(refined_sections, section_range=section_range) + selected_indexes = {section.index for section in selected_sections} + + output_sections: list[RefinedSection] = [] + refined_titles: list[str] = [] + + for section in refined_sections: + draft_body = _find_draft_body(draft, index=section.index) + if section.index not in selected_indexes: + output_sections.append( + RefinedSection( + index=section.index, + title=section.title, + heading=section.heading, + body=draft_body, + ) + ) + continue + + prompt, snippets = build_prompt_for_section( + context, + section=section, + draft_body=draft_body, + evidence_mode=evidence_mode, + intensity=intensity, + apply_feedback=apply_feedback, + ) + _report(reporter, f"Refining section {section.index}: {section.title}") + + if evidence_mode.is_required and context.note is not None and snippets.matches == 0: + body = "(no supporting evidence in the note)" + else: + try: + body = ensure_language_output( + prompt=prompt, + expected_language=context.language, + invoke=lambda prompt: _invoke_model( + prompt, model_name=model_name, temperature=temperature + ), + extract_text=lambda text: text, + reporter=reporter, + language_detector=language_detector, + ) + except LanguageMismatchError as exc: + raise RefiningValidationError(str(exc)) from exc + except LanguageResolutionError as exc: + raise RefiningValidationError(str(exc)) from exc + + cleaned_body = _sanitize_section(str(body)) + output_sections.append( + RefinedSection( + index=section.index, + title=section.title, + heading=section.heading, + body=cleaned_body, + ) + ) + refined_titles.append(section.title) + + if save_prompt_dir is not None: + _save_section_artifacts(save_prompt_dir, section, prompt, cleaned_body) + + output_sections.extend(_append_unmapped_sections(draft, start_index=len(refined_sections))) + markdown = assemble_markdown(draft.preamble, output_sections) + + changelog_text = None + if changelog_path is not None: + changelog_text = generate_changelog( + context, + model_name=model_name, + temperature=temperature, + refined_titles=refined_titles, + apply_feedback=apply_feedback, + reporter=reporter, + ) + + return markdown, changelog_text + + +def parse_section_range(value: str) -> tuple[int, int]: + """Parse `--section N..M` specification.""" + match = re.fullmatch(r"(\d+)\.\.(\d+)", value.strip()) + if not match: + raise RefiningValidationError("--section must use the format N..M (e.g., 2..4).") + start, end = int(match.group(1)), int(match.group(2)) + if start <= 0 or end <= 0: + raise RefiningValidationError("Section numbers must be positive.") + if start > end: + raise RefiningValidationError("Section range start must be <= end.") + return start, end + + +def parse_draft(text: str) -> DraftDocument: + """Parse a Markdown draft into sections split by level-2 headings.""" + lines = text.splitlines() + preamble_lines: list[str] = [] + sections: list[DraftSection] = [] + current_heading: str | None = None + current_lines: list[str] = [] + in_code_fence = False + + for line in lines: + stripped = line.strip() + if stripped.startswith("```"): + in_code_fence = not in_code_fence + if not in_code_fence and line.startswith("## "): + if current_heading is None: + preamble_lines = current_lines + else: + sections.append(_build_section(current_heading, current_lines)) + current_heading = line.strip() + current_lines = [] + else: + current_lines.append(line) + + if current_heading is None: + preamble_lines = current_lines + else: + sections.append(_build_section(current_heading, current_lines)) + + preamble = "\n".join(preamble_lines).rstrip() + return DraftDocument(preamble=preamble, sections=sections) + + +def assemble_markdown(preamble: str, sections: Sequence[RefinedSection]) -> str: + """Assemble refined Markdown with the original preamble.""" + blocks: list[str] = [] + if preamble.strip(): + blocks.append(preamble.rstrip()) + for section in sections: + body = section.body.strip() + block = f"{section.heading}\n\n{body}".rstrip() + blocks.append(block) + return "\n\n".join(blocks).strip() + "\n" + + +def build_prompt_for_section( + context: RefinementContext, + *, + section: RefinedSection, + draft_body: str, + evidence_mode: EvidenceMode, + intensity: RefinementIntensity, + apply_feedback: bool, + max_snippet_chars: int = 1800, +) -> tuple[str, SnippetSelection]: + """Return the user prompt and snippet selection for a section.""" + source_text = context.note.body if context.note else draft_body + snippets = build_snippet_block( + source_text, + section_title=section.title, + primary_keyword=context.brief.primary_keyword, + secondary_keywords=context.brief.secondary_keywords, + max_chars=max_snippet_chars, + ) + prompt = build_user_prompt( + project=context.project, + brief=context.brief, + section_title=section.title, + draft_body=draft_body, + note_snippets=snippets.text, + feedback=context.feedback, + evidence_mode=evidence_mode, + intensity=intensity, + language=context.language, + apply_feedback=apply_feedback, + source_label="NOTE EXCERPTS" if context.note else "SOURCE EXCERPTS", + ) + return prompt, snippets + + +def generate_changelog( + context: RefinementContext, + *, + model_name: str, + temperature: float, + refined_titles: Sequence[str], + apply_feedback: bool, + reporter: Reporter, +) -> str: + """Generate a changelog summary for the refinement.""" + prompt = build_changelog_prompt( + brief=context.brief, + refined_titles=list(refined_titles), + feedback=context.feedback, + apply_feedback=apply_feedback, + ) + _report(reporter, "Generating changelog summary") + + try: + text = _invoke_model(prompt, model_name=model_name, temperature=temperature) + except RefiningLLMError: + raise + + return text.strip() or "(no changes summarized)" + + +def _load_brief(path: Path) -> SeoBrief: + try: + text = path.read_text(encoding="utf-8") + except FileNotFoundError as exc: + raise RefiningFileError(f"Brief JSON not found: {path}") from exc + except OSError as exc: # pragma: no cover - surfaced by CLI + raise RefiningFileError(f"Unable to read brief: {exc}") from exc + + try: + payload = json.loads(text) + except json.JSONDecodeError as exc: + raise RefiningValidationError(f"Brief file is not valid JSON: {exc}") from exc + + try: + brief = SeoBrief.model_validate(payload) + except ValidationError as exc: + raise RefiningValidationError(f"Brief JSON failed validation: {exc}") from exc + + return brief + + +def _load_feedback(path: Path) -> str: + try: + payload = path.read_text(encoding="utf-8") + except FileNotFoundError as exc: + raise RefiningFileError(f"Feedback file not found: {path}") from exc + except OSError as exc: # pragma: no cover - surfaced by CLI + raise RefiningFileError(f"Unable to read feedback: {exc}") from exc + + try: + parsed = json.loads(payload) + except json.JSONDecodeError: + return payload.strip() + + return json.dumps(parsed, indent=2, ensure_ascii=False).strip() + + +def _prepare_sections( + draft: DraftDocument, + outline: Sequence[OutlineSection], + *, + preserve_anchors: bool, +) -> list[RefinedSection]: + if len(draft.sections) < len(outline): + raise RefiningValidationError( + f"Draft has {len(draft.sections)} sections but brief expects {len(outline)} outline items." + ) + refined_sections: list[RefinedSection] = [] + for index, spec in enumerate(outline, start=1): + title = str(getattr(spec, "title", "")).strip() + if not title: + continue + draft_section = draft.sections[index - 1] + anchor = draft_section.anchor if preserve_anchors else None + heading = _compose_heading(title, anchor=anchor) + refined_sections.append( + RefinedSection(index=index, title=title, heading=heading, body=draft_section.body) + ) + if not refined_sections: + raise RefiningValidationError("No outline sections selected.") + return refined_sections + + +def _select_sections( + sections: Sequence[RefinedSection], + *, + section_range: tuple[int, int] | None, +) -> list[RefinedSection]: + if section_range is None: + return list(sections) + start, end = section_range + total = len(sections) + if not (1 <= start <= total and 1 <= end <= total and start <= end): + raise RefiningValidationError(f"Section range {start}..{end} is invalid for {total} outline items.") + return [section for section in sections if start <= section.index <= end] + + +def _append_unmapped_sections(draft: DraftDocument, *, start_index: int) -> list[RefinedSection]: + extra_sections: list[RefinedSection] = [] + for offset, section in enumerate(draft.sections[start_index:], start=start_index + 1): + extra_sections.append( + RefinedSection( + index=offset, + title=section.title, + heading=section.heading, + body=section.body, + ) + ) + return extra_sections + + +def _find_draft_body(draft: DraftDocument, *, index: int) -> str: + if index <= 0 or index > len(draft.sections): + return "" + return draft.sections[index - 1].body + + +def _build_section(heading: str, lines: Sequence[str]) -> DraftSection: + title, anchor = _parse_heading(heading) + body = "\n".join(lines).rstrip() + return DraftSection(heading=heading, title=title, body=body, anchor=anchor) + + +def _parse_heading(heading: str) -> tuple[str, str | None]: + text = heading.lstrip("#").strip() + match = re.match(r"^(?P.+?)\s*(\{#.+\})\s*$", text) + if match: + title = match.group("title").strip() + anchor = match.group(2).strip() + return title, anchor + return text, None + + +def _compose_heading(title: str, *, anchor: str | None) -> str: + if anchor: + return f"## {title} {anchor}".strip() + return f"## {title}".strip() + + +def _invoke_model(prompt: str, *, model_name: str, temperature: float) -> str: + model_settings = ModelSettings(temperature=temperature) + model = make_model(model_name, model_settings=model_settings) + agent = Agent(model=model, instructions=SYSTEM_PROMPT) + + async def _call() -> str: + run = await agent.run(prompt) + output = getattr(run, "output", "") + return str(output).strip() + + try: + return asyncio.run(asyncio.wait_for(_call(), LLM_TIMEOUT_SECONDS)) + except TimeoutError as exc: + raise RefiningLLMError(f"LLM request timed out after {int(LLM_TIMEOUT_SECONDS)} seconds.") from exc + except KeyboardInterrupt: + raise + except Exception as exc: # pragma: no cover - surfaced to CLI + raise RefiningLLMError(f"LLM request failed: {exc}") from exc + + +def _sanitize_section(body: str) -> str: + text = body.strip() + lines = text.splitlines() + cleaned_lines: list[str] = [] + heading_dropped = False + for line in lines: + stripped = line.strip() + if stripped.startswith("#") and not heading_dropped: + heading_dropped = True + stripped = stripped.lstrip("#").strip() + if not stripped: + continue + cleaned_lines.append(stripped) + cleaned = "\n".join(cleaned_lines).strip() + return cleaned or "(no content generated)" + + +def _save_section_artifacts(directory: Path, section: RefinedSection, prompt: str, response: str) -> None: + slug = _slugify(section.title) or f"section-{section.index}" + prompt_path = directory / f"{section.index:02d}-{slug}.prompt.txt" + response_path = directory / f"{section.index:02d}-{slug}.response.md" + payload = f"SYSTEM PROMPT:\n{SYSTEM_PROMPT}\n\nUSER PROMPT:\n{prompt}\n" + try: + prompt_path.write_text(payload, encoding="utf-8") + response_path.write_text(response.strip() + "\n", encoding="utf-8") + except OSError as exc: + raise RefiningFileError(f"Unable to save prompt artifacts: {exc}") from exc + + +def _slugify(value: str) -> str: + lowered = value.lower() + return re.sub(r"[^a-z0-9]+", "-", lowered).strip("-") + + +def _report(reporter: Reporter, message: str) -> None: + if reporter: + reporter(message) + + +__all__ = [ + "EvidenceMode", + "RefinementIntensity", + "RefiningError", + "RefiningValidationError", + "RefiningFileError", + "RefiningLLMError", + "prepare_context", + "parse_draft", + "assemble_markdown", + "outline_sections", + "parse_section_range", + "render_dry_run_prompt", + "refine_draft", +] diff --git a/src/scribae/refine_cli.py b/src/scribae/refine_cli.py new file mode 100644 index 0000000..9ca0425 --- /dev/null +++ b/src/scribae/refine_cli.py @@ -0,0 +1,247 @@ +from __future__ import annotations + +from pathlib import Path + +import typer + +from .llm import DEFAULT_MODEL_NAME +from .project import load_default_project, load_project +from .refine import ( + EvidenceMode, + RefinementIntensity, + RefiningError, + RefiningLLMError, + RefiningValidationError, + parse_section_range, + prepare_context, + refine_draft, + render_dry_run_prompt, +) + + +def refine_command( + draft: Path = typer.Option( # noqa: B008 + ..., + "--in", + help="Path to the Markdown draft to refine.", + ), + brief: Path = typer.Option( # noqa: B008 + ..., + "--brief", + "-b", + help="Path to the SeoBrief JSON output from `scribae brief`.", + ), + note: Path | None = typer.Option( # noqa: B008 + None, + "--note", + "-n", + help="Optional source note for grounding.", + ), + feedback: Path | None = typer.Option( # noqa: B008 + None, + "--feedback", + help="Optional feedback report (Markdown or JSON).", + ), + section: str | None = typer.Option( # noqa: B008 + None, + "--section", + help="Refine only a numbered subset of sections using N..M (1-indexed).", + ), + intensity: RefinementIntensity = typer.Option( # noqa: B008 + RefinementIntensity.MEDIUM, + "--intensity", + case_sensitive=False, + help="Rewrite intensity: minimal|medium|strong.", + ), + evidence: EvidenceMode = typer.Option( # noqa: B008 + EvidenceMode.OPTIONAL, + "--evidence", + case_sensitive=False, + help="Evidence requirements: off|optional|required.", + ), + project: str | None = typer.Option( # noqa: B008 + None, + "--project", + "-p", + help="Project name (loads <name>.yml/.yaml from current directory) or path to a project file.", + ), + language: str | None = typer.Option( # noqa: B008 + None, + "--language", + "-l", + help="Language code for the refined output (overrides project config).", + ), + model: str = typer.Option( # noqa: B008 + DEFAULT_MODEL_NAME, + "--model", + "-m", + help="Model name to request via OpenAI-compatible API.", + ), + temperature: float = typer.Option( # noqa: B008 + 0.2, + "--temperature", + min=0.0, + max=2.0, + help="Temperature for the LLM request.", + ), + dry_run: bool = typer.Option( # noqa: B008 + False, + "--dry-run", + help="Print the prompt and exit (no LLM call).", + ), + save_prompt: Path | None = typer.Option( # noqa: B008 + None, + "--save-prompt", + file_okay=False, + dir_okay=True, + help="Directory for saving prompt/response files.", + ), + apply_feedback: bool = typer.Option( # noqa: B008 + False, + "--apply-feedback", + help="Prioritize feedback items when refining.", + ), + changelog: Path | None = typer.Option( # noqa: B008 + None, + "--changelog", + help="Write a changelog summary to this file.", + ), + preserve_anchors: bool = typer.Option( # noqa: B008 + False, + "--preserve-anchors", + help="Preserve heading anchor IDs when rewriting section titles.", + ), + verbose: bool = typer.Option( # noqa: B008 + False, + "--verbose", + "-v", + help="Print progress information to stderr.", + ), + out: Path | None = typer.Option( # noqa: B008 + None, + "--out", + "-o", + help="Write Markdown output to this file (stdout if omitted).", + ), +) -> None: + """CLI handler for `scribae refine`.""" + if dry_run and (out is not None or save_prompt is not None or changelog is not None): + raise typer.BadParameter( + "--dry-run cannot be combined with --out/--save-prompt/--changelog.", + param_hint="--dry-run", + ) + + reporter = (lambda msg: typer.secho(msg, err=True)) if verbose else None + + if project: + try: + project_config = load_project(project) + except (FileNotFoundError, ValueError, OSError) as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(5) from exc + else: + try: + project_config, project_source = load_default_project() + except (FileNotFoundError, ValueError, OSError) as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(5) from exc + if not project_source: + typer.secho( + "No project provided; using default context (language=en, tone=neutral).", + err=True, + fg=typer.colors.YELLOW, + ) + + draft_path = draft.expanduser() + brief_path = brief.expanduser() + note_path = note.expanduser() if note else None + feedback_path = feedback.expanduser() if feedback else None + save_prompt_path = save_prompt.expanduser() if save_prompt else None + changelog_path = changelog.expanduser() if changelog else None + out_path = out.expanduser() if out else None + + try: + context = prepare_context( + draft_path=draft_path, + brief_path=brief_path, + project=project_config, + language=language, + note_path=note_path, + feedback_path=feedback_path, + reporter=reporter, + ) + except RefiningError as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(exc.exit_code) from exc + + section_range = None + if section: + try: + section_range = parse_section_range(section) + except RefiningValidationError as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(exc.exit_code) from exc + + if dry_run: + try: + prompt = render_dry_run_prompt( + context, + intensity=intensity, + evidence_mode=evidence, + section_range=section_range, + apply_feedback=apply_feedback, + preserve_anchors=preserve_anchors, + ) + except RefiningError as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(exc.exit_code) from exc + typer.echo(prompt) + return + + try: + refined, changelog_text = refine_draft( + context, + model_name=model, + temperature=temperature, + intensity=intensity, + evidence_mode=evidence, + section_range=section_range, + apply_feedback=apply_feedback, + preserve_anchors=preserve_anchors, + reporter=reporter, + save_prompt_dir=save_prompt_path, + changelog_path=changelog_path, + ) + except KeyboardInterrupt: + typer.secho("Cancelled by user.", err=True, fg=typer.colors.YELLOW) + raise typer.Exit(130) from None + except RefiningLLMError as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(exc.exit_code) from exc + except RefiningError as exc: + typer.secho(str(exc), err=True, fg=typer.colors.RED) + raise typer.Exit(exc.exit_code) from exc + + if changelog_path is not None: + changelog_path.parent.mkdir(parents=True, exist_ok=True) + try: + changelog_path.write_text((changelog_text or "").strip() + "\n", encoding="utf-8") + except OSError as exc: + typer.secho(f"Unable to write changelog: {exc}", err=True, fg=typer.colors.RED) + raise typer.Exit(3) from exc + typer.echo(f"Wrote changelog to {changelog_path}") + + if out_path is not None: + out_path.parent.mkdir(parents=True, exist_ok=True) + try: + out_path.write_text(refined, encoding="utf-8") + except OSError as exc: + typer.secho(f"Unable to write refined draft: {exc}", err=True, fg=typer.colors.RED) + raise typer.Exit(3) from exc + typer.echo(f"Wrote refined draft to {out_path}") + return + + typer.echo(refined, nl=False) + + +__all__ = ["refine_command"] diff --git a/tests/fixtures/draft.md b/tests/fixtures/draft.md new file mode 100644 index 0000000..0260982 --- /dev/null +++ b/tests/fixtures/draft.md @@ -0,0 +1,23 @@ +## Introduction to Observability + +Observability helps engineering teams understand how services behave. + +## Logging Foundations + +Start with structured logs that use consistent identifiers and rotation policies. + +## Tracing Distributed Services + +Distributed tracing connects API calls across systems and highlights latency hotspots. + +## Metrics and Capacity Planning + +Metrics track backlog, saturation, and queue retries for capacity forecasting. + +## AI Generated Status Reports + +Automation can summarize incidents, but it should stay grounded in validated telemetry. + +## Summary and Next Steps + +Capture lessons learned and share playbooks with new teams. diff --git a/tests/unit/refine_cli_test.py b/tests/unit/refine_cli_test.py new file mode 100644 index 0000000..7a81239 --- /dev/null +++ b/tests/unit/refine_cli_test.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scribae.main import app + +runner = CliRunner() + + +@pytest.fixture() +def fixtures_dir() -> Path: + return Path(__file__).resolve().parents[1] / "fixtures" + + +@pytest.fixture() +def note_path(fixtures_dir: Path) -> Path: + return fixtures_dir / "note_short.md" + + +@pytest.fixture() +def brief_path(fixtures_dir: Path) -> Path: + return fixtures_dir / "brief_valid.json" + + +@pytest.fixture() +def draft_path(fixtures_dir: Path) -> Path: + return fixtures_dir / "draft.md" + + +class RecordingLLM: + def __init__(self) -> None: + self.prompts: list[str] = [] + + def __call__(self, prompt: str, *, model_name: str, temperature: float) -> str: + self.prompts.append(prompt) + if "Summarize the refinements applied to the draft." in prompt: + return "- Tightened phrasing.\n- Applied feedback items." + section_title = "" + for line in prompt.splitlines(): + if line.startswith("Current Section:"): + section_title = line.split(":", 1)[1].strip() + break + return f"{section_title or 'Section'} refined." + + +@pytest.fixture() +def recording_llm(monkeypatch: pytest.MonkeyPatch) -> RecordingLLM: + recorder = RecordingLLM() + monkeypatch.setattr("scribae.refine._invoke_model", recorder) + return recorder + + +def test_refine_full_draft( + recording_llm: RecordingLLM, + draft_path: Path, + brief_path: Path, + note_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "refine", + "--in", + str(draft_path), + "--brief", + str(brief_path), + "--note", + str(note_path), + ], + ) + + assert result.exit_code == 0, result.stderr + body = result.stdout.strip() + assert body.count("## ") == 6 + assert "## Introduction to Observability" in body + assert "Introduction to Observability refined." in body + assert "Summary and Next Steps refined." in body + assert len(recording_llm.prompts) == 6 + + +def test_evidence_required_skips_missing_section( + recording_llm: RecordingLLM, + draft_path: Path, + brief_path: Path, + note_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "refine", + "--in", + str(draft_path), + "--brief", + str(brief_path), + "--note", + str(note_path), + "--evidence", + "required", + ], + ) + + assert result.exit_code == 0, result.stderr + body = result.stdout + assert "(no supporting evidence in the note)" in body + assert len(recording_llm.prompts) == 5 + + +def test_section_range_only_refines_subset( + recording_llm: RecordingLLM, + draft_path: Path, + brief_path: Path, + note_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "refine", + "--in", + str(draft_path), + "--brief", + str(brief_path), + "--note", + str(note_path), + "--section", + "2..3", + ], + ) + + assert result.exit_code == 0, result.stderr + body = result.stdout.strip() + assert "Logging Foundations refined." in body + assert "Tracing Distributed Services refined." in body + assert "Observability helps engineering teams understand how services behave." in body + assert "Metrics track backlog, saturation, and queue retries for capacity forecasting." in body + assert len(recording_llm.prompts) == 2 + + +def test_dry_run_includes_feedback( + draft_path: Path, + brief_path: Path, + note_path: Path, + tmp_path: Path, +) -> None: + feedback_path = tmp_path / "feedback.json" + feedback_path.write_text('{"issue": "Tighten the intro"}', encoding="utf-8") + + result = runner.invoke( + app, + [ + "refine", + "--in", + str(draft_path), + "--brief", + str(brief_path), + "--note", + str(note_path), + "--feedback", + str(feedback_path), + "--apply-feedback", + "--dry-run", + ], + ) + + assert result.exit_code == 0 + prompt = result.stdout + assert "[FEEDBACK]" in prompt + assert "Tighten the intro" in prompt + assert "Feedback handling: Prioritize feedback items." in prompt + + +def test_changelog_written( + recording_llm: RecordingLLM, + draft_path: Path, + brief_path: Path, + note_path: Path, + tmp_path: Path, +) -> None: + changelog_path = tmp_path / "changelog.md" + + result = runner.invoke( + app, + [ + "refine", + "--in", + str(draft_path), + "--brief", + str(brief_path), + "--note", + str(note_path), + "--changelog", + str(changelog_path), + ], + ) + + assert result.exit_code == 0, result.stderr + assert changelog_path.exists() + changelog = changelog_path.read_text(encoding="utf-8") + assert "Applied feedback items" in changelog