diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..e038605
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+data/Iiams.rmtree filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index d265988..4e72e62 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -11,6 +11,8 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
+ with:
+ lfs: true
- name: Install uv
uses: astral-sh/setup-uv@v3
@@ -29,10 +31,10 @@ jobs:
uv run black --check .
- name: Run tests with coverage
- run: uv run pytest --cov=rmagent --cov-report=term-missing --cov-fail-under=80
+ run: uv run pytest --cov=rmagent --cov-report=term-missing --cov-fail-under=65
env:
# Set test environment variables
- RM_DATABASE_PATH: data/test.rmtree
+ RM_DATABASE_PATH: data/Iiams.rmtree
DEFAULT_LLM_PROVIDER: anthropic
LOG_LEVEL: WARNING
diff --git a/data/Iiams.rmtree b/data/Iiams.rmtree
new file mode 100644
index 0000000..00bb873
--- /dev/null
+++ b/data/Iiams.rmtree
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7163c4982fa83bb0aae58e894986dc5e4b6f86634cca4cd3aeaf2d8e01e1571a
+size 66928640
diff --git a/rmagent/agent/formatters.py b/rmagent/agent/formatters.py
index d2b88cc..f18b765 100644
--- a/rmagent/agent/formatters.py
+++ b/rmagent/agent/formatters.py
@@ -8,7 +8,6 @@
from __future__ import annotations
from rmagent.rmlib.parsers.date_parser import parse_rm_date
-from rmagent.rmlib.queries import QueryService
class GenealogyFormatters:
@@ -74,7 +73,7 @@ def format_events(events, event_citations: dict[int, list[int]] | None = None) -
# Add note if present (often contains full article transcriptions)
if note:
# Show "NOTE: " prefix only once, then indent subsequent lines
- note_lines = note.split('\n')
+ note_lines = note.split("\n")
for idx, note_line in enumerate(note_lines):
if note_line.strip():
if idx == 0:
@@ -233,9 +232,7 @@ def format_siblings(siblings) -> list[str]:
return lines
@staticmethod
- def format_early_life(
- person, parents, siblings, life_span: dict[str, int | None]
- ) -> str:
+ def format_early_life(person, parents, siblings, life_span: dict[str, int | None]) -> str:
"""Format early life narrative with birth order, parental ages, migration notes."""
person_name = GenealogyFormatters.format_person_name(person)
birth_year = life_span.get("birth_year")
@@ -322,16 +319,10 @@ def format_family_losses(life_span, parents, spouses, siblings, children) -> str
name = GenealogyFormatters.format_person_name(data)
losses.append(f"- {name} ({relation}) died in {death_year_value}.")
- return (
- "\n".join(losses)
- if losses
- else "No recorded family deaths occurred during the subject's lifetime."
- )
+ return "\n".join(losses) if losses else "No recorded family deaths occurred during the subject's lifetime."
@staticmethod
- def calculate_parent_age(
- parents, birth_year_key: str, child_birth_year: int | None
- ) -> int | None:
+ def calculate_parent_age(parents, birth_year_key: str, child_birth_year: int | None) -> int | None:
"""Calculate parent's age at child's birth."""
if not parents or child_birth_year is None:
return None
diff --git a/rmagent/agent/genealogy_agent.py b/rmagent/agent/genealogy_agent.py
index 0bf0890..6664c45 100644
--- a/rmagent/agent/genealogy_agent.py
+++ b/rmagent/agent/genealogy_agent.py
@@ -63,9 +63,7 @@ class GenealogyAgent:
# ---- Public API -----------------------------------------------------
- def generate_biography(
- self, person_id: int, style: str = "standard", max_tokens: int | None = None
- ) -> LLMResult:
+ def generate_biography(self, person_id: int, style: str = "standard", max_tokens: int | None = None) -> LLMResult:
"""Generate a narrative biography using the configured prompts/LLM."""
context = self._build_biography_context(person_id, style)
@@ -84,9 +82,7 @@ def _run_validator(db: RMDatabase | None) -> QualityReport:
return self._with_database(_run_validator)
- def ask(
- self, question: str, person_id: int | None = None, max_tokens: int | None = None
- ) -> LLMResult:
+ def ask(self, question: str, person_id: int | None = None, max_tokens: int | None = None) -> LLMResult:
"""Answer ad-hoc questions with light context and persistent memory."""
context = self._build_qa_context(question, person_id)
@@ -138,15 +134,11 @@ def _builder(db: RMDatabase | None) -> dict[str, str]:
life_span, parents, spouses, siblings, children
)
sibling_lines = GenealogyFormatters.format_siblings(siblings)
- sibling_summary = (
- "\n".join(sibling_lines) if sibling_lines else "No sibling records available."
- )
+ sibling_summary = "\n".join(sibling_lines) if sibling_lines else "No sibling records available."
# Extract person-level notes
person_notes = person.get("Note") or ""
- person_notes_formatted = (
- person_notes if person_notes else "No person-level notes available."
- )
+ person_notes_formatted = person_notes if person_notes else "No person-level notes available."
# Generate style-specific length guidance
length_guidance = self._get_length_guidance_for_style(style)
@@ -185,9 +177,7 @@ def _builder(db: RMDatabase | None) -> dict[str, str]:
snippets.append(GenealogyFormatters.format_family_overview(spouses, children, siblings))
snippets.append(GenealogyFormatters.format_early_life(person, parents, siblings, life_span))
- history_snippets = [
- f"Q: {turn.question}\nA: {turn.answer}" for turn in self._memory[-3:]
- ]
+ history_snippets = [f"Q: {turn.question}\nA: {turn.answer}" for turn in self._memory[-3:]]
snippets.extend(history_snippets)
return {
@@ -297,9 +287,7 @@ def _fetch_siblings(self, query: QueryService, parents: dict[str, str] | None, p
)
return siblings
- def _build_event_citations_map(
- self, query: QueryService, events: list[dict]
- ) -> dict[int, list[int]]:
+ def _build_event_citations_map(self, query: QueryService, events: list[dict]) -> dict[int, list[int]]:
"""
Build mapping of EventID -> list of CitationIDs for inline citation markers.
@@ -333,9 +321,7 @@ def _build_event_citations_map(
return event_citations_map
- def _collect_all_citations_for_person(
- self, query: QueryService, person_id: int
- ) -> list[dict]:
+ def _collect_all_citations_for_person(self, query: QueryService, person_id: int) -> list[dict]:
"""
Collect all citations for a person's events using QueryService.
Returns list of citation dicts with CitationID, SourceID, SourceName, CitationName, EventType.
diff --git a/rmagent/agent/llm_provider.py b/rmagent/agent/llm_provider.py
index 4f15b10..99ca3ed 100644
--- a/rmagent/agent/llm_provider.py
+++ b/rmagent/agent/llm_provider.py
@@ -86,9 +86,7 @@ def __init__(
self.model = model
self.default_max_tokens = default_max_tokens
self.retry_config = retry_config or RetryConfig()
- self.prompt_cost_per_1k, self.completion_cost_per_1k = (
- pricing_per_1k if pricing_per_1k else (0.0, 0.0)
- )
+ self.prompt_cost_per_1k, self.completion_cost_per_1k = pricing_per_1k if pricing_per_1k else (0.0, 0.0)
def generate(self, prompt: str, **kwargs: Any) -> LLMResult:
"""Invoke provider with retry semantics."""
@@ -135,9 +133,7 @@ def _with_cost(self, result: LLMResult) -> LLMResult:
def _invoke(self, prompt: str, **kwargs: Any) -> LLMResult:
"""Concrete providers implement this call."""
- def _log_debug(
- self, prompt: str, result: LLMResult, elapsed: float, kwargs: dict[str, Any]
- ) -> None:
+ def _log_debug(self, prompt: str, result: LLMResult, elapsed: float, kwargs: dict[str, Any]) -> None:
debug_logger = logging.getLogger("rmagent.llm_debug")
if not debug_logger.isEnabledFor(logging.DEBUG):
return
diff --git a/rmagent/agent/tools.py b/rmagent/agent/tools.py
index 4097508..9dc8970 100644
--- a/rmagent/agent/tools.py
+++ b/rmagent/agent/tools.py
@@ -78,10 +78,7 @@ def __init__(self, query_service: QueryService):
self.query_service = query_service
def run(self, person_id: int, generations: int = 3):
- return [
- dict(row)
- for row in self.query_service.get_direct_ancestors(person_id, generations=generations)
- ]
+ return [dict(row) for row in self.query_service.get_direct_ancestors(person_id, generations=generations)]
@dataclass
@@ -99,14 +96,8 @@ def run(self, person_a: int, person_b: int) -> dict[str, str | None]:
if person_a == person_b:
return {"relationship": "Same person"}
- ancestors_a = {
- row["PersonID"]: row
- for row in self.query_service.get_direct_ancestors(person_a, generations=5)
- }
- ancestors_b = {
- row["PersonID"]: row
- for row in self.query_service.get_direct_ancestors(person_b, generations=5)
- }
+ ancestors_a = {row["PersonID"]: row for row in self.query_service.get_direct_ancestors(person_a, generations=5)}
+ ancestors_b = {row["PersonID"]: row for row in self.query_service.get_direct_ancestors(person_b, generations=5)}
shared = set(ancestors_a).intersection(ancestors_b)
if not shared:
@@ -137,8 +128,7 @@ def run(self):
report = validator.run_all_checks()
return {
"totals_by_severity": {
- k.value if hasattr(k, "value") else str(k): v
- for k, v in report.totals_by_severity.items()
+ k.value if hasattr(k, "value") else str(k): v for k, v in report.totals_by_severity.items()
},
"totals_by_category": report.totals_by_category,
"issue_count": report.summary.get("issue_total", 0),
diff --git a/rmagent/cli/commands/bio.py b/rmagent/cli/commands/bio.py
index d8c7c75..96ebd68 100644
--- a/rmagent/cli/commands/bio.py
+++ b/rmagent/cli/commands/bio.py
@@ -99,7 +99,8 @@ def bio(
}[citation_style.lower()]
# Create generator and agent
- config = ctx.load_config()
+ # Skip LLM credential validation if using template-based generation
+ config = ctx.load_config(require_llm_credentials=not no_ai)
agent = (
None
if no_ai
diff --git a/rmagent/cli/commands/export.py b/rmagent/cli/commands/export.py
index 174fada..807fa54 100644
--- a/rmagent/cli/commands/export.py
+++ b/rmagent/cli/commands/export.py
@@ -87,7 +87,7 @@ def hugo(
}[bio_length.lower()]
# Create exporter
- config = ctx.load_config()
+ config = ctx.load_config(require_llm_credentials=False)
exporter = HugoExporter(
db=config.database.database_path,
extension_path=config.database.sqlite_extension_path,
@@ -100,9 +100,7 @@ def hugo(
# Get all person IDs
from rmagent.rmlib.database import RMDatabase
- with RMDatabase(
- config.database.database_path, extension_path=config.database.sqlite_extension_path
- ) as db:
+ with RMDatabase(config.database.database_path, extension_path=config.database.sqlite_extension_path) as db:
all_persons = db.query("SELECT PersonID FROM PersonTable")
person_ids = [p["PersonID"] for p in all_persons]
diff --git a/rmagent/cli/commands/person.py b/rmagent/cli/commands/person.py
index ca1f76b..2973901 100644
--- a/rmagent/cli/commands/person.py
+++ b/rmagent/cli/commands/person.py
@@ -46,9 +46,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
raise click.Abort()
# Display person header
- name = (
- f"{_get_value(person_data, 'Given')} {_get_value(person_data, 'Surname')}".strip()
- )
+ name = f"{_get_value(person_data, 'Given')} {_get_value(person_data, 'Surname')}".strip()
birth_year = _get_value(person_data, "BirthYear", "?")
death_year = _get_value(person_data, "DeathYear", "?")
console.print(f"\n[bold]📋 Person: {name}[/bold] ({birth_year}–{death_year})")
@@ -68,9 +66,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
from rmagent.rmlib.parsers.date_parser import parse_rm_date
date_str = _get_value(event, "Date")
- formatted_date = (
- parse_rm_date(date_str).format_display() if date_str else ""
- )
+ formatted_date = parse_rm_date(date_str).format_display() if date_str else ""
table.add_row(
formatted_date,
_get_value(event, "EventType"),
@@ -89,15 +85,13 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
# Check for father
if _get_value(parents_row, "FatherID"):
father_name = (
- f"{_get_value(parents_row, 'FatherGiven')} "
- f"{_get_value(parents_row, 'FatherSurname')}"
+ f"{_get_value(parents_row, 'FatherGiven')} " f"{_get_value(parents_row, 'FatherSurname')}"
).strip()
console.print(f" • Father: {father_name}")
# Check for mother
if _get_value(parents_row, "MotherID"):
mother_name = (
- f"{_get_value(parents_row, 'MotherGiven')} "
- f"{_get_value(parents_row, 'MotherSurname')}"
+ f"{_get_value(parents_row, 'MotherGiven')} " f"{_get_value(parents_row, 'MotherSurname')}"
).strip()
console.print(f" • Mother: {mother_name}")
@@ -106,9 +100,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
if spouses:
console.print("\n[bold]Spouses:[/bold]")
for spouse in spouses:
- spouse_name = (
- f"{_get_value(spouse, 'Given')} {_get_value(spouse, 'Surname')}".strip()
- )
+ spouse_name = f"{_get_value(spouse, 'Given')} {_get_value(spouse, 'Surname')}".strip()
console.print(f" • {spouse_name}")
# Get children
@@ -116,9 +108,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
if children:
console.print("\n[bold]Children:[/bold]")
for child in children:
- child_name = (
- f"{_get_value(child, 'Given')} {_get_value(child, 'Surname')}".strip()
- )
+ child_name = f"{_get_value(child, 'Given')} {_get_value(child, 'Surname')}".strip()
console.print(f" • {child_name}")
# Show ancestors if requested
@@ -141,8 +131,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool
console.print("\n[bold]Descendants:[/bold] (4 generations)")
for descendant in descendant_rows:
descendant_name = (
- f"{_get_value(descendant, 'Given')} "
- f"{_get_value(descendant, 'Surname')}"
+ f"{_get_value(descendant, 'Given')} " f"{_get_value(descendant, 'Surname')}"
).strip()
gen = _get_value(descendant, "Generation", 1)
indent = " " * gen
diff --git a/rmagent/cli/commands/quality.py b/rmagent/cli/commands/quality.py
index a53b50e..3bc6435 100644
--- a/rmagent/cli/commands/quality.py
+++ b/rmagent/cli/commands/quality.py
@@ -112,7 +112,7 @@ def quality(
task = progress.add_task("Running data quality validation...", total=None)
# Create generator
- config = ctx.load_config()
+ config = ctx.load_config(require_llm_credentials=False)
generator = QualityReportGenerator(
db=config.database.database_path,
extension_path=config.database.sqlite_extension_path,
@@ -141,9 +141,7 @@ def quality(
console.print()
console.print(report_output)
else:
- console.print(
- "[yellow]Warning:[/yellow] HTML and CSV formats require --output option"
- )
+ console.print("[yellow]Warning:[/yellow] HTML and CSV formats require --output option")
except Exception as e:
console.print(f"\n[red]Error:[/red] {e}")
diff --git a/rmagent/cli/commands/search.py b/rmagent/cli/commands/search.py
index 06d371f..421c5ee 100644
--- a/rmagent/cli/commands/search.py
+++ b/rmagent/cli/commands/search.py
@@ -1,6 +1,7 @@
"""Search command - Search database by name or place."""
import re
+
import click
from rich.console import Console
from rich.table import Table
@@ -22,9 +23,7 @@ def _get_value(row, key, default=""):
def _get_surname_metaphone(db, surname: str) -> str | None:
"""Get Metaphone encoding for a surname from the database."""
# Query a sample name to get the Metaphone encoding
- result = db.query_one(
- "SELECT SurnameMP FROM NameTable WHERE Surname = ? COLLATE RMNOCASE LIMIT 1", (surname,)
- )
+ result = db.query_one("SELECT SurnameMP FROM NameTable WHERE Surname = ? COLLATE RMNOCASE LIMIT 1", (surname,))
return result["SurnameMP"] if result else None
@@ -49,9 +48,9 @@ def _parse_name_variations(name: str, all_variants: list[str]) -> list[str]:
return [name]
# Extract brackets and base name (everything before first bracket)
- bracket_pattern = r'\[([^\]]+)\]'
+ bracket_pattern = r"\[([^\]]+)\]"
brackets = re.findall(bracket_pattern, name)
- base_name = re.sub(bracket_pattern, '', name).strip()
+ base_name = re.sub(bracket_pattern, "", name).strip()
if not brackets:
return [name]
@@ -200,9 +199,7 @@ def search(
# Validate radius search options
if kilometers is not None and miles is not None:
- console.print(
- "[red]Error:[/red] Cannot specify both --kilometers and --miles. Choose one."
- )
+ console.print("[red]Error:[/red] Cannot specify both --kilometers and --miles. Choose one.")
raise click.Abort()
radius_km = None
@@ -221,9 +218,7 @@ def search(
radius_unit = "mi"
if radius_km is not None and not place:
- console.print(
- "[red]Error:[/red] Radius search requires --place to be specified"
- )
+ console.print("[red]Error:[/red] Radius search requires --place to be specified")
raise click.Abort()
with ctx.get_database() as db:
@@ -232,7 +227,7 @@ def search(
# Search by name
if name:
# Load config to get surname variants for [ALL] keyword
- config = load_app_config(configure_logger=False)
+ config = load_app_config(configure_logger=False, require_llm_credentials=False)
all_variants = config.search.surname_variants_all
# Parse name variations (supports [variant] and [ALL] syntax)
@@ -240,9 +235,7 @@ def search(
# Show which variations are being searched
if len(name_variations) > 1:
- console.print(
- f"[dim]Searching {len(name_variations)} name variations...[/dim]"
- )
+ console.print(f"[dim]Searching {len(name_variations)} name variations...[/dim]")
# Collect results from all variations
all_results = []
@@ -257,9 +250,7 @@ def search(
# Single word - could be surname or given name
# Try both
try:
- surname_results = queries.search_primary_names(
- surname=name_parts[0], limit=limit
- )
+ surname_results = queries.search_primary_names(surname=name_parts[0], limit=limit)
for r in surname_results:
if r["PersonID"] not in seen_person_ids:
all_results.append(r)
@@ -267,9 +258,7 @@ def search(
except ValueError:
pass
try:
- given_results = queries.search_primary_names(
- given=name_parts[0], limit=limit
- )
+ given_results = queries.search_primary_names(given=name_parts[0], limit=limit)
for r in given_results:
if r["PersonID"] not in seen_person_ids:
all_results.append(r)
@@ -313,15 +302,11 @@ def search(
if len(variation.strip().split()) > 1:
# Multi-word: Use word-based search (more precise)
# This finds people where ALL words appear across name fields
- variation_results = queries.search_names_by_words(
- search_text=variation, limit=limit
- )
+ variation_results = queries.search_names_by_words(search_text=variation, limit=limit)
else:
# Single word: Use flexible search
# This finds people where word appears in surname OR given name
- variation_results = queries.search_names_flexible(
- search_text=variation, limit=limit
- )
+ variation_results = queries.search_names_flexible(search_text=variation, limit=limit)
# Add unique results
for r in variation_results:
@@ -347,9 +332,7 @@ def search(
# Display name search results
if results:
- console.print(
- f"\n[bold]🔍 Found {len(results)} person(s) matching '{name}':[/bold]"
- )
+ console.print(f"\n[bold]🔍 Found {len(results)} person(s) matching '{name}':[/bold]")
console.print("─" * 60)
table = Table(show_header=True, header_style="bold cyan")
@@ -420,9 +403,7 @@ def search(
)
if radius_results:
- console.print(
- f"\n[bold]🌍 Found {len(radius_results)} place(s) within radius:[/bold]"
- )
+ console.print(f"\n[bold]🌍 Found {len(radius_results)} place(s) within radius:[/bold]")
table = Table(show_header=True, header_style="bold cyan")
table.add_column("ID", style="dim", width=8)
@@ -461,9 +442,7 @@ def search(
else:
# Standard place search (no radius)
if place_results:
- console.print(
- f"\n[bold]📍 Found {len(place_results)} place(s) matching '{place}':[/bold]"
- )
+ console.print(f"\n[bold]📍 Found {len(place_results)} place(s) matching '{place}':[/bold]")
console.print("─" * 60)
table = Table(show_header=True, header_style="bold cyan")
diff --git a/rmagent/cli/commands/timeline.py b/rmagent/cli/commands/timeline.py
index 22d62f4..4ad35eb 100644
--- a/rmagent/cli/commands/timeline.py
+++ b/rmagent/cli/commands/timeline.py
@@ -70,7 +70,7 @@ def timeline(
task = progress.add_task(f"Generating timeline for person {person_id}...", total=None)
# Create generator
- config = ctx.load_config()
+ config = ctx.load_config(require_llm_credentials=False)
generator = TimelineGenerator(
db=config.database.database_path,
extension_path=config.database.sqlite_extension_path,
diff --git a/rmagent/cli/main.py b/rmagent/cli/main.py
index e72045e..553bfdd 100644
--- a/rmagent/cli/main.py
+++ b/rmagent/cli/main.py
@@ -36,10 +36,14 @@ def __init__(
self.config = None
self.db = None
- def load_config(self):
- """Load application configuration."""
+ def load_config(self, require_llm_credentials: bool = True):
+ """Load application configuration.
+
+ Args:
+ require_llm_credentials: When True, validate LLM provider credentials.
+ """
if not self.config:
- self.config = load_app_config()
+ self.config = load_app_config(require_llm_credentials=require_llm_credentials)
# Override with CLI options if provided
if self.database_path:
self.config.database.database_path = self.database_path
@@ -50,7 +54,8 @@ def load_config(self):
def get_database(self) -> RMDatabase:
"""Get database connection (creates if needed)."""
if not self.db:
- config = self.load_config()
+ # Database access doesn't require LLM credentials
+ config = self.load_config(require_llm_credentials=False)
db_path = config.database.database_path
if not db_path:
raise click.UsageError(
@@ -154,11 +159,11 @@ def completion(shell: str):
# For fish
rmagent completion fish
"""
- shell_upper = shell.upper()
prog_name = "rmagent"
if shell == "zsh":
- click.echo(f"""# Add this to your ~/.zshrc:
+ click.echo(
+ f"""# Add this to your ~/.zshrc:
eval "$(_RMAGENT_COMPLETE=zsh_source {prog_name})"
# Or generate and save the completion script:
@@ -166,23 +171,29 @@ def completion(shell: str):
# Then add this to ~/.zshrc:
fpath=(~/.zfunc $fpath)
autoload -Uz compinit && compinit
-""")
+"""
+ )
elif shell == "bash":
- click.echo(f"""# Add this to your ~/.bashrc:
+ click.echo(
+ f"""# Add this to your ~/.bashrc:
eval "$(_RMAGENT_COMPLETE=bash_source {prog_name})"
# Or generate and save the completion script:
_RMAGENT_COMPLETE=bash_source {prog_name} > ~/.bash_completion.d/{prog_name}
# Then add this to ~/.bashrc:
source ~/.bash_completion.d/{prog_name}
-""")
+"""
+ )
elif shell == "fish":
- click.echo(f"""# Add this to ~/.config/fish/completions/{prog_name}.fish:
+ click.echo(
+ f"""# Add this to ~/.config/fish/completions/{prog_name}.fish:
_RMAGENT_COMPLETE=fish_source {prog_name} | source
# Or generate and save the completion script:
_RMAGENT_COMPLETE=fish_source {prog_name} > ~/.config/fish/completions/{prog_name}.fish
-""")
+"""
+ )
+
cli.add_command(person.person)
cli.add_command(bio.bio)
diff --git a/rmagent/config/config.py b/rmagent/config/config.py
index e7aa005..4036357 100644
--- a/rmagent/config/config.py
+++ b/rmagent/config/config.py
@@ -90,9 +90,7 @@ class LLMSettings(BaseModel):
def check_provider(cls, provider: str) -> str:
provider_lower = provider.lower()
if provider_lower not in cls.allowed_providers:
- raise ValueError(
- f"Unknown provider '{provider}'. Allowed: {sorted(cls.allowed_providers)}"
- )
+ raise ValueError(f"Unknown provider '{provider}'. Allowed: {sorted(cls.allowed_providers)}")
return provider_lower
def ensure_credentials(self) -> None:
@@ -173,9 +171,7 @@ class CitationSettings(BaseModel):
def check_style(cls, style: str) -> str:
style_lower = style.lower()
if style_lower not in cls.allowed_styles:
- raise ValueError(
- f"Invalid citation style '{style}'. Allowed: {sorted(cls.allowed_styles)}"
- )
+ raise ValueError(f"Invalid citation style '{style}'. Allowed: {sorted(cls.allowed_styles)}")
return style_lower
@@ -306,6 +302,7 @@ def load_app_config(
env_path: Path | None = None,
auto_create_dirs: bool = True,
configure_logger: bool = True,
+ require_llm_credentials: bool = True,
) -> AppConfig:
"""
Load application configuration.
@@ -314,6 +311,7 @@ def load_app_config(
env_path: Optional path to a .env file. Defaults to config/.env when not provided.
auto_create_dirs: When True, create output/export directories.
configure_logger: When True, configure global logging handlers.
+ require_llm_credentials: When True, validate LLM provider credentials.
"""
if env_path is None:
env_path = DEFAULT_ENV_PATH
@@ -336,9 +334,7 @@ def load_app_config(
media_root = _env("RM_MEDIA_ROOT_DIRECTORY")
database_settings = DatabaseSettings(
database_path=Path(_env("RM_DATABASE_PATH", "data/Iiams.rmtree")),
- sqlite_extension_path=Path(
- _env("SQLITE_ICU_EXTENSION", "./sqlite-extension/icu.dylib")
- ),
+ sqlite_extension_path=Path(_env("SQLITE_ICU_EXTENSION", "./sqlite-extension/icu.dylib")),
media_root_directory=Path(media_root) if media_root else None,
)
@@ -361,9 +357,7 @@ def load_app_config(
)
search_settings = SearchSettings(
- surname_variants_all=_env(
- "SURNAME_VARIANTS_ALL", "Iams,Iames,Iiams,Iiames,Ijams,Ijames,Imes,Eimes"
- ),
+ surname_variants_all=_env("SURNAME_VARIANTS_ALL", "Iams,Iames,Iiams,Iiames,Ijams,Ijames,Imes,Eimes"),
)
logging_settings = LoggingSettings(
@@ -391,10 +385,11 @@ def load_app_config(
if configure_logger:
configure_logging(config.logging)
- try:
- config.llm.ensure_credentials()
- except ValueError as exc:
- raise LLMError(str(exc)) from exc
+ if require_llm_credentials:
+ try:
+ config.llm.ensure_credentials()
+ except ValueError as exc:
+ raise LLMError(str(exc)) from exc
return config
diff --git a/rmagent/generators/biography.py b/rmagent/generators/biography.py
index 50b1913..b6a91f0 100644
--- a/rmagent/generators/biography.py
+++ b/rmagent/generators/biography.py
@@ -8,11 +8,11 @@
from __future__ import annotations
+import time
from dataclasses import dataclass, field
-from datetime import datetime, timezone
+from datetime import UTC, datetime
from enum import Enum
from pathlib import Path
-import time
from rmagent.agent.genealogy_agent import GenealogyAgent
from rmagent.rmlib.database import RMDatabase
@@ -141,7 +141,7 @@ class Biography:
sources: str
# Metadata
- generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc).astimezone())
+ generated_at: datetime = field(default_factory=lambda: datetime.now(UTC).astimezone())
word_count: int = 0
privacy_applied: bool = False
birth_year: int | None = None
@@ -153,17 +153,19 @@ class Biography:
def _calculate_word_count(self) -> int:
"""Calculate word count from all biography sections."""
- all_text = "\n".join([
- self.introduction,
- self.early_life,
- self.education,
- self.career,
- self.marriage_family,
- self.later_life,
- self.death_legacy,
- self.footnotes,
- self.sources,
- ])
+ all_text = "\n".join(
+ [
+ self.introduction,
+ self.early_life,
+ self.education,
+ self.career,
+ self.marriage_family,
+ self.later_life,
+ self.death_legacy,
+ self.footnotes,
+ self.sources,
+ ]
+ )
return len(all_text.split())
@staticmethod
@@ -198,26 +200,26 @@ def render_metadata(self) -> str:
tz_str = self.generated_at.strftime("%z")
tz_formatted = f"{tz_str[:3]}:{tz_str[3:]}" if tz_str else ""
date_str = self.generated_at.strftime("%Y-%m-%dT%H:%M:%S") + tz_formatted
- lines.append(f'Date: {date_str}')
+ lines.append(f"Date: {date_str}")
# Person ID
- lines.append(f'PersonID: {self.person_id}')
+ lines.append(f"PersonID: {self.person_id}")
# LLM Metadata (if available)
if self.llm_metadata:
- lines.append(f'TokensIn: {self._format_tokens(self.llm_metadata.prompt_tokens)}')
- lines.append(f'TokensOut: {self._format_tokens(self.llm_metadata.completion_tokens)}')
- lines.append(f'TotalTokens: {self._format_tokens(self.llm_metadata.total_tokens)}')
- lines.append(f'LLM: {self.llm_metadata.provider.capitalize()}')
- lines.append(f'Model: {self.llm_metadata.model}')
- lines.append(f'PromptTime: {self._format_duration(self.llm_metadata.prompt_time)}')
- lines.append(f'LLMTime: {self._format_duration(self.llm_metadata.llm_time)}')
+ lines.append(f"TokensIn: {self._format_tokens(self.llm_metadata.prompt_tokens)}")
+ lines.append(f"TokensOut: {self._format_tokens(self.llm_metadata.completion_tokens)}")
+ lines.append(f"TotalTokens: {self._format_tokens(self.llm_metadata.total_tokens)}")
+ lines.append(f"LLM: {self.llm_metadata.provider.capitalize()}")
+ lines.append(f"Model: {self.llm_metadata.model}")
+ lines.append(f"PromptTime: {self._format_duration(self.llm_metadata.prompt_time)}")
+ lines.append(f"LLMTime: {self._format_duration(self.llm_metadata.llm_time)}")
# Biography stats (calculate word count dynamically)
word_count = self._calculate_word_count()
- lines.append(f'Words: {word_count:,}')
- lines.append(f'Citations: {self.citation_count}')
- lines.append(f'Sources: {self.source_count}')
+ lines.append(f"Words: {word_count:,}")
+ lines.append(f"Citations: {self.citation_count}")
+ lines.append(f"Sources: {self.source_count}")
lines.append("---\n")
return "\n".join(lines)
@@ -243,7 +245,7 @@ def render_markdown(self, include_metadata: bool = True) -> str:
additional_images = []
if self.length != BiographyLength.SHORT and self.media_files:
for media in self.media_files:
- is_primary = media.get("IsPrimary", 0) == 1 if hasattr(media, 'get') else media["IsPrimary"] == 1
+ is_primary = media.get("IsPrimary", 0) == 1 if hasattr(media, "get") else media["IsPrimary"] == 1
if is_primary and primary_image is None:
primary_image = media
elif not is_primary:
@@ -256,9 +258,14 @@ def render_markdown(self, include_metadata: bool = True) -> str:
# Add primary portrait image with text wrapping (if available)
if primary_image:
from pathlib import Path
+
# Format the media path
- media_path = primary_image.get("MediaPath", "") if hasattr(primary_image, 'get') else primary_image["MediaPath"]
- media_file = primary_image.get("MediaFile", "") if hasattr(primary_image, 'get') else primary_image["MediaFile"]
+ if hasattr(primary_image, "get"):
+ media_path = primary_image.get("MediaPath", "")
+ media_file = primary_image.get("MediaFile", "")
+ else:
+ media_path = primary_image["MediaPath"]
+ media_file = primary_image["MediaFile"]
# Strip RootsMagic's ?\ or ?/ prefix if present
if media_path.startswith("?\\"):
@@ -329,9 +336,10 @@ def render_markdown(self, include_metadata: bool = True) -> str:
sections.append("## Photos\n")
for media in additional_images:
from pathlib import Path
+
# Format the media path
- media_path = media.get("MediaPath", "") if hasattr(media, 'get') else media["MediaPath"]
- media_file = media.get("MediaFile", "") if hasattr(media, 'get') else media["MediaFile"]
+ media_path = media.get("MediaPath", "") if hasattr(media, "get") else media["MediaPath"]
+ media_file = media.get("MediaFile", "") if hasattr(media, "get") else media["MediaFile"]
# Strip RootsMagic's ?\ or ?/ prefix if present
if media_path.startswith("?\\"):
@@ -545,9 +553,7 @@ def generate(
if use_ai and self.agent:
biography = self._generate_with_ai(context, length, citation_style, include_sources)
else:
- biography = self._generate_template_based(
- context, length, citation_style, include_sources
- )
+ biography = self._generate_template_based(context, length, citation_style, include_sources)
return biography
@@ -580,12 +586,8 @@ def _extract(db: RMDatabase) -> PersonContext:
is_living = age < 110
# Extract birth/death information
- birth_date_str, birth_place = self._extract_vital_info(
- db, person_id, fact_type_id=1
- ) # Birth
- death_date_str, death_place = self._extract_vital_info(
- db, person_id, fact_type_id=2
- ) # Death
+ birth_date_str, birth_place = self._extract_vital_info(db, person_id, fact_type_id=1) # Birth
+ death_date_str, death_place = self._extract_vital_info(db, person_id, fact_type_id=2) # Death
# Get relationships
parents = query.get_parents(person_id)
@@ -677,9 +679,7 @@ def _extract(db: RMDatabase) -> PersonContext:
else:
raise ValueError("No database provided")
- def _extract_vital_info(
- self, db: RMDatabase, person_id: int, fact_type_id: int
- ) -> tuple[str | None, str | None]:
+ def _extract_vital_info(self, db: RMDatabase, person_id: int, fact_type_id: int) -> tuple[str | None, str | None]:
"""Extract date and place for a vital event (birth/death)."""
query = QueryService(db)
vital_events = query.get_vital_events(person_id)
@@ -709,9 +709,7 @@ def _extract_vital_info(
return None, None
- def _categorize_events(
- self, db: RMDatabase, events: list[dict]
- ) -> tuple[list[EventContext], ...]:
+ def _categorize_events(self, db: RMDatabase, events: list[dict]) -> tuple[list[EventContext], ...]:
"""Categorize events into vital, education, occupation, military, residence, and other."""
vital = []
education = []
@@ -910,8 +908,8 @@ def _generate_with_ai(
# Extract LLM metadata from result
llm_metadata = None
- if hasattr(self.agent, 'llm_provider'):
- provider_name = self.agent.llm_provider.__class__.__name__.replace('Provider', '').lower()
+ if hasattr(self.agent, "llm_provider"):
+ provider_name = self.agent.llm_provider.__class__.__name__.replace("Provider", "").lower()
llm_metadata = LLMMetadata(
provider=provider_name,
model=result.model,
@@ -919,7 +917,7 @@ def _generate_with_ai(
completion_tokens=result.usage.completion_tokens,
total_tokens=result.usage.total_tokens,
prompt_time=total_time * 0.1, # Estimate ~10% for prompt building
- llm_time=total_time * 0.9, # Estimate ~90% for LLM
+ llm_time=total_time * 0.9, # Estimate ~90% for LLM
cost=result.cost,
)
@@ -932,9 +930,7 @@ def _generate_with_ai(
if citation_style == CitationStyle.FOOTNOTE:
# Process {cite:ID} markers in full response (preserves section headers)
- modified_text, footnotes, tracker = self._process_citations_in_text(
- response_text, context.all_citations
- )
+ modified_text, footnotes, tracker = self._process_citations_in_text(response_text, context.all_citations)
# Use modified text for section parsing
response_text = modified_text
@@ -1267,7 +1263,7 @@ def _strip_source_type_prefix(source_name: str) -> str:
for prefix in prefixes:
if source_name.startswith(prefix):
- return source_name[len(prefix):]
+ return source_name[len(prefix) :]
return source_name
@@ -1436,7 +1432,6 @@ def _generate_bibliography_from_fields(self, citation: dict) -> str:
First checks for pre-formatted Bibliography field, then constructs from individual fields.
Returns source name with WARNING only if all approaches fail.
"""
- source_id = _get_row_value(citation, "SourceID", 0)
source_name = _get_row_value(citation, "SourceName", "[Unknown Source]")
fields_blob = _get_row_value(citation, "SourceFields")
@@ -1535,9 +1530,7 @@ def _process_citations_in_text(
return modified_text, footnotes, tracker
- def _generate_footnotes_section(
- self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker
- ) -> str:
+ def _generate_footnotes_section(self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker) -> str:
"""
Generate footnotes section with numbered entries.
First citation per source uses full footnote, subsequent use short.
diff --git a/rmagent/generators/biography/citations.py b/rmagent/generators/biography/citations.py
index 3d1eb28..65346be 100644
--- a/rmagent/generators/biography/citations.py
+++ b/rmagent/generators/biography/citations.py
@@ -49,7 +49,7 @@ def strip_source_type_prefix(source_name: str) -> str:
for prefix in prefixes:
if source_name.startswith(prefix):
- return source_name[len(prefix):]
+ return source_name[len(prefix) :]
return source_name
@@ -162,7 +162,6 @@ def _generate_bibliography_from_fields(self, citation: dict) -> str:
First checks for pre-formatted Bibliography field, then constructs from individual fields.
Returns source name with WARNING only if all approaches fail.
"""
- source_id = get_row_value(citation, "SourceID", 0)
source_name = get_row_value(citation, "SourceName", "[Unknown Source]")
fields_blob = get_row_value(citation, "SourceFields")
@@ -259,9 +258,7 @@ def process_citations_in_text(
return modified_text, footnotes, tracker
- def generate_footnotes_section(
- self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker
- ) -> str:
+ def generate_footnotes_section(self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker) -> str:
"""
Generate footnotes section with numbered entries and 3-character indent.
First citation per source uses full footnote, subsequent use short.
diff --git a/rmagent/generators/biography/generator.py b/rmagent/generators/biography/generator.py
index 10905cf..deb4272 100644
--- a/rmagent/generators/biography/generator.py
+++ b/rmagent/generators/biography/generator.py
@@ -6,9 +6,9 @@
from __future__ import annotations
+import time
from datetime import datetime
from pathlib import Path
-import time
from rmagent.agent.genealogy_agent import GenealogyAgent
from rmagent.rmlib.database import RMDatabase
@@ -18,6 +18,7 @@
from rmagent.rmlib.parsers.place_parser import format_place_medium, format_place_short
from rmagent.rmlib.queries import QueryService
+from .citations import CitationProcessor
from .models import (
Biography,
BiographyLength,
@@ -27,7 +28,6 @@
PersonContext,
get_row_value,
)
-from .citations import CitationProcessor
from .templates import BiographyTemplates
@@ -141,9 +141,7 @@ def generate(
if use_ai and self.agent:
biography = self._generate_with_ai(context, length, citation_style, include_sources)
else:
- biography = self._generate_template_based(
- context, length, citation_style, include_sources
- )
+ biography = self._generate_template_based(context, length, citation_style, include_sources)
return biography
@@ -176,12 +174,8 @@ def _extract(db: RMDatabase) -> PersonContext:
is_living = age < 110
# Extract birth/death information
- birth_date_str, birth_place = self._extract_vital_info(
- db, person_id, fact_type_id=1
- ) # Birth
- death_date_str, death_place = self._extract_vital_info(
- db, person_id, fact_type_id=2
- ) # Death
+ birth_date_str, birth_place = self._extract_vital_info(db, person_id, fact_type_id=1) # Birth
+ death_date_str, death_place = self._extract_vital_info(db, person_id, fact_type_id=2) # Death
# Get relationships
parents = query.get_parents(person_id)
@@ -273,9 +267,7 @@ def _extract(db: RMDatabase) -> PersonContext:
else:
raise ValueError("No database provided")
- def _extract_vital_info(
- self, db: RMDatabase, person_id: int, fact_type_id: int
- ) -> tuple[str | None, str | None]:
+ def _extract_vital_info(self, db: RMDatabase, person_id: int, fact_type_id: int) -> tuple[str | None, str | None]:
"""Extract date and place for a vital event (birth/death)."""
query = QueryService(db)
vital_events = query.get_vital_events(person_id)
@@ -305,9 +297,7 @@ def _extract_vital_info(
return None, None
- def _categorize_events(
- self, db: RMDatabase, events: list[dict]
- ) -> tuple[list[EventContext], ...]:
+ def _categorize_events(self, db: RMDatabase, events: list[dict]) -> tuple[list[EventContext], ...]:
"""Categorize events into vital, education, occupation, military, residence, and other."""
vital = []
education = []
@@ -471,8 +461,8 @@ def _generate_with_ai(
# Extract LLM metadata from result
llm_metadata = None
- if hasattr(self.agent, 'llm_provider'):
- provider_name = self.agent.llm_provider.__class__.__name__.replace('Provider', '').lower()
+ if hasattr(self.agent, "llm_provider"):
+ provider_name = self.agent.llm_provider.__class__.__name__.replace("Provider", "").lower()
llm_metadata = LLMMetadata(
provider=provider_name,
model=result.model,
@@ -480,7 +470,7 @@ def _generate_with_ai(
completion_tokens=result.usage.completion_tokens,
total_tokens=result.usage.total_tokens,
prompt_time=total_time * 0.1, # Estimate ~10% for prompt building
- llm_time=total_time * 0.9, # Estimate ~90% for LLM
+ llm_time=total_time * 0.9, # Estimate ~90% for LLM
cost=result.cost,
)
diff --git a/rmagent/generators/biography/models.py b/rmagent/generators/biography/models.py
index f164435..bb8a277 100644
--- a/rmagent/generators/biography/models.py
+++ b/rmagent/generators/biography/models.py
@@ -7,8 +7,9 @@
from __future__ import annotations
from dataclasses import dataclass, field
-from datetime import datetime, timezone
+from datetime import UTC, datetime
from enum import Enum
+from pathlib import Path
class BiographyLength(str, Enum):
@@ -129,7 +130,7 @@ class Biography:
sources: str
# Metadata
- generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc).astimezone())
+ generated_at: datetime = field(default_factory=lambda: datetime.now(UTC).astimezone())
word_count: int = 0
privacy_applied: bool = False
birth_year: int | None = None
@@ -138,7 +139,7 @@ class Biography:
citation_count: int = 0
source_count: int = 0
media_files: list[dict] = field(default_factory=list) # Media files for images
- media_root_directory: "Path | None" = None # Root directory for media files (replaces ? in MediaPath)
+ media_root_directory: Path | None = None # Root directory for media files (replaces ? in MediaPath)
def calculate_word_count(self) -> int:
"""
@@ -146,21 +147,24 @@ def calculate_word_count(self) -> int:
Excludes front matter, footnotes, and sources sections.
"""
- all_text = "\n".join([
- self.introduction,
- self.early_life,
- self.education,
- self.career,
- self.marriage_family,
- self.later_life,
- self.death_legacy,
- ])
+ all_text = "\n".join(
+ [
+ self.introduction,
+ self.early_life,
+ self.education,
+ self.career,
+ self.marriage_family,
+ self.later_life,
+ self.death_legacy,
+ ]
+ )
return len(all_text.split())
def render_markdown(self, include_metadata: bool = True) -> str:
"""Render complete biography as Markdown with optional front matter."""
# Import here to avoid circular dependency
from .rendering import BiographyRenderer
+
renderer = BiographyRenderer(media_root_directory=self.media_root_directory)
return renderer.render_markdown(self, include_metadata)
@@ -168,6 +172,7 @@ def render_metadata(self) -> str:
"""Render Hugo-style front matter metadata."""
# Import here to avoid circular dependency
from .rendering import BiographyRenderer
+
renderer = BiographyRenderer(media_root_directory=self.media_root_directory)
return renderer.render_metadata(self)
diff --git a/rmagent/generators/biography/rendering.py b/rmagent/generators/biography/rendering.py
index ba4d9c7..c094c0d 100644
--- a/rmagent/generators/biography/rendering.py
+++ b/rmagent/generators/biography/rendering.py
@@ -55,26 +55,26 @@ def render_metadata(self, bio: Biography) -> str:
tz_str = bio.generated_at.strftime("%z")
tz_formatted = f"{tz_str[:3]}:{tz_str[3:]}" if tz_str else ""
date_str = bio.generated_at.strftime("%Y-%m-%dT%H:%M:%S") + tz_formatted
- lines.append(f'Date: {date_str}')
+ lines.append(f"Date: {date_str}")
# Person ID
- lines.append(f'PersonID: {bio.person_id}')
+ lines.append(f"PersonID: {bio.person_id}")
# LLM Metadata (if available)
if bio.llm_metadata:
- lines.append(f'TokensIn: {self.format_tokens(bio.llm_metadata.prompt_tokens)}')
- lines.append(f'TokensOut: {self.format_tokens(bio.llm_metadata.completion_tokens)}')
- lines.append(f'TotalTokens: {self.format_tokens(bio.llm_metadata.total_tokens)}')
- lines.append(f'LLM: {bio.llm_metadata.provider.capitalize()}')
- lines.append(f'Model: {bio.llm_metadata.model}')
- lines.append(f'PromptTime: {self.format_duration(bio.llm_metadata.prompt_time)}')
- lines.append(f'LLMTime: {self.format_duration(bio.llm_metadata.llm_time)}')
+ lines.append(f"TokensIn: {self.format_tokens(bio.llm_metadata.prompt_tokens)}")
+ lines.append(f"TokensOut: {self.format_tokens(bio.llm_metadata.completion_tokens)}")
+ lines.append(f"TotalTokens: {self.format_tokens(bio.llm_metadata.total_tokens)}")
+ lines.append(f"LLM: {bio.llm_metadata.provider.capitalize()}")
+ lines.append(f"Model: {bio.llm_metadata.model}")
+ lines.append(f"PromptTime: {self.format_duration(bio.llm_metadata.prompt_time)}")
+ lines.append(f"LLMTime: {self.format_duration(bio.llm_metadata.llm_time)}")
# Biography stats (calculate word count dynamically)
word_count = bio.calculate_word_count()
- lines.append(f'Words: {word_count:,}')
- lines.append(f'Citations: {bio.citation_count}')
- lines.append(f'Sources: {bio.source_count}')
+ lines.append(f"Words: {word_count:,}")
+ lines.append(f"Citations: {bio.citation_count}")
+ lines.append(f"Sources: {bio.source_count}")
lines.append("---\n")
return "\n".join(lines)
@@ -124,18 +124,28 @@ def render_markdown(self, bio: Biography, include_metadata: bool = True) -> str:
db_caption = primary_image["Caption"] if "Caption" in primary_image.keys() else ""
except (AttributeError, TypeError):
db_caption = ""
- caption = db_caption if db_caption else self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year)
- alt_text = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year) # Always use name/dates for alt text
+ if db_caption:
+ caption = db_caption
+ else:
+ caption = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year)
+ # Always use name/dates for alt text
+ alt_text = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year)
sections.append('
')
sections.append('
')
- sections.append(f'

')
- sections.append(f'
{caption}
')
- sections.append('
')
+ sections.append(
+ f'

'
+ )
+ sections.append(
+ f'
{caption}
'
+ )
+ sections.append("
")
sections.append(' ')
- sections.append(f' {bio.introduction}')
- sections.append('
')
- sections.append('\n')
+ sections.append(f" {bio.introduction}")
+ sections.append(" ")
+ sections.append("\n")
else:
sections.append(bio.introduction)
diff --git a/rmagent/generators/biography/templates.py b/rmagent/generators/biography/templates.py
index cbdf0e9..f90eec1 100644
--- a/rmagent/generators/biography/templates.py
+++ b/rmagent/generators/biography/templates.py
@@ -6,10 +6,11 @@
from __future__ import annotations
-from .models import PersonContext, get_row_value
from rmagent.rmlib.parsers.date_parser import is_unknown_date, parse_rm_date
from rmagent.rmlib.parsers.name_parser import format_full_name
+from .models import PersonContext, get_row_value
+
class BiographyTemplates:
"""Generates biography sections using templates (no AI)."""
diff --git a/rmagent/generators/hugo_exporter.py b/rmagent/generators/hugo_exporter.py
index c6dd1ae..d25a366 100644
--- a/rmagent/generators/hugo_exporter.py
+++ b/rmagent/generators/hugo_exporter.py
@@ -529,9 +529,7 @@ def _build_index(db: RMDatabase) -> str:
lines.append(f"- [{person['name']}]({person['slug']}/){lifespan}")
lines.append("")
- lines.append(
- f"*{len(people)} biographies • Generated {datetime.now().strftime('%Y-%m-%d')}*"
- )
+ lines.append(f"*{len(people)} biographies • Generated {datetime.now().strftime('%Y-%m-%d')}*")
return "\n".join(lines)
diff --git a/rmagent/generators/quality_report.py b/rmagent/generators/quality_report.py
index da84822..9fa84eb 100644
--- a/rmagent/generators/quality_report.py
+++ b/rmagent/generators/quality_report.py
@@ -153,15 +153,11 @@ def _apply_filters(
# Apply category filter
if category_filter:
- filtered_issues = [
- issue for issue in filtered_issues if issue.category == category_filter
- ]
+ filtered_issues = [issue for issue in filtered_issues if issue.category == category_filter]
# Apply severity filter
if severity_filter:
- filtered_issues = [
- issue for issue in filtered_issues if issue.severity == severity_filter
- ]
+ filtered_issues = [issue for issue in filtered_issues if issue.severity == severity_filter]
# Recalculate totals for filtered issues
totals_by_severity = {
@@ -320,10 +316,7 @@ def _format_html(self, report: QualityReport) -> str:
" body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, "
"sans-serif; margin: 40px; }"
)
- lines.append(
- " h1 { color: #333; border-bottom: 2px solid #4CAF50; "
- "padding-bottom: 10px; }"
- )
+ lines.append(" h1 { color: #333; border-bottom: 2px solid #4CAF50; " "padding-bottom: 10px; }")
lines.append(" h2 { color: #555; margin-top: 30px; }")
lines.append(" h3 { color: #666; }")
lines.append(
@@ -338,10 +331,7 @@ def _format_html(self, report: QualityReport) -> str:
" .issue { background-color: #fff; border: 1px solid #ddd; padding: 15px; "
"margin: 15px 0; border-radius: 4px; }"
)
- lines.append(
- " .issue-header { font-weight: bold; font-size: 1.1em; "
- "margin-bottom: 10px; }"
- )
+ lines.append(" .issue-header { font-weight: bold; font-size: 1.1em; " "margin-bottom: 10px; }")
lines.append(" .metadata { color: #666; font-size: 0.9em; }")
lines.append(" .samples { margin-top: 10px; }")
lines.append(" .sample { margin: 5px 0; padding-left: 20px; }")
@@ -354,24 +344,16 @@ def _format_html(self, report: QualityReport) -> str:
# Content
lines.append(" Data Quality Report
")
- lines.append(
- f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"
- )
+ lines.append(f" Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
")
# Summary
lines.append(" ")
lines.append("
Summary Statistics
")
lines.append("
")
lines.append(" | Metric | Count |
")
- lines.append(
- f" | Total People | {report.summary.get('total_people', 0):,} |
"
- )
- lines.append(
- f" | Total Events | {report.summary.get('total_events', 0):,} |
"
- )
- lines.append(
- f" | Total Sources | {report.summary.get('total_sources', 0):,} |
"
- )
+ lines.append(f" | Total People | {report.summary.get('total_people', 0):,} |
")
+ lines.append(f" | Total Events | {report.summary.get('total_events', 0):,} |
")
+ lines.append(f" | Total Sources | {report.summary.get('total_sources', 0):,} |
")
lines.append(
f" | Total Citations | {report.summary.get('total_citations', 0):,} |
"
)
@@ -409,9 +391,7 @@ def _format_html(self, report: QualityReport) -> str:
severity_issues = [issue for issue in report.issues if issue.severity == severity]
if severity_issues:
css_class = severity.value
- lines.append(
- f" {severity.value.capitalize()} Issues
"
- )
+ lines.append(f" {severity.value.capitalize()} Issues
")
for issue in severity_issues:
lines.append(" ")
@@ -423,9 +403,7 @@ def _format_html(self, report: QualityReport) -> str:
lines.append(f"
{issue.description}
")
if issue.samples:
- lines.append(
- "
Sample Issues:"
- )
+ lines.append(" Sample Issues:")
for sample in issue.samples[: self.sample_limit]:
sample_text = self._format_sample_html(sample)
lines.append(f" - {sample_text}
")
diff --git a/rmagent/generators/timeline.py b/rmagent/generators/timeline.py
index 1b3e22e..744defe 100644
--- a/rmagent/generators/timeline.py
+++ b/rmagent/generators/timeline.py
@@ -234,9 +234,7 @@ def _extract(db: RMDatabase) -> dict:
continue
# Build timeline event
- timeline_event = self._build_timeline_event(
- db, event, person_id, birth_year, group_by_phase
- )
+ timeline_event = self._build_timeline_event(db, event, person_id, birth_year, group_by_phase)
if timeline_event:
timeline_events.append(timeline_event)
@@ -286,9 +284,7 @@ def _build_timeline_event(
place_formatted = self._format_place_for_timeline(place_str)
# Build narrative text
- narrative = self._build_event_narrative(
- event_type_name, display_date, place_formatted, details
- )
+ narrative = self._build_event_narrative(event_type_name, display_date, place_formatted, details)
# Get media
media = self._get_event_media(db, event_id)
@@ -330,9 +326,7 @@ def _build_timeline_event(
return timeline_event
- def _parse_date_to_timelinejs(
- self, rm_date: str
- ) -> tuple[dict | None, dict | None, str | None]:
+ def _parse_date_to_timelinejs(self, rm_date: str) -> tuple[dict | None, dict | None, str | None]:
"""Parse RM11 date to TimelineJS3 format."""
# Check if date string is null/unknown (empty or starts with ".")
if not rm_date or rm_date.startswith("."):
@@ -425,11 +419,7 @@ def _get_event_type_name(self, db: RMDatabase, event_type_id: int) -> str:
"""Get event type name from FactTypeTable."""
cursor = db.execute("SELECT Name FROM FactTypeTable WHERE FactTypeID = ?", (event_type_id,))
row = cursor.fetchone()
- return (
- _get_row_value(row, "Name", f"Event {event_type_id}")
- if row
- else f"Event {event_type_id}"
- )
+ return _get_row_value(row, "Name", f"Event {event_type_id}") if row else f"Event {event_type_id}"
def _get_event_media(self, db: RMDatabase, event_id: int) -> dict | None:
"""Get primary media for an event."""
diff --git a/rmagent/rmlib/database.py b/rmagent/rmlib/database.py
index 7613c19..1c801b5 100644
--- a/rmagent/rmlib/database.py
+++ b/rmagent/rmlib/database.py
@@ -145,9 +145,7 @@ def _load_rmnocase_collation(self) -> None:
# - caseLevel=off: Ignore case differences
# - normalization=on: Normalize Unicode characters
self._conn.execute(
- "SELECT icu_load_collation("
- "'en_US@colStrength=primary;caseLevel=off;normalization=on',"
- "'RMNOCASE')"
+ "SELECT icu_load_collation(" "'en_US@colStrength=primary;caseLevel=off;normalization=on'," "'RMNOCASE')"
)
logger.debug("RMNOCASE collation registered successfully")
finally:
@@ -173,9 +171,7 @@ def connection(self) -> sqlite3.Connection:
DatabaseError: If no active connection
"""
if self._conn is None:
- raise DatabaseError(
- "No active connection - use 'with RMDatabase(...)' or call connect()"
- )
+ raise DatabaseError("No active connection - use 'with RMDatabase(...)' or call connect()")
return self._conn
def execute(self, query: str, params: tuple | None = None) -> sqlite3.Cursor:
diff --git a/rmagent/rmlib/models.py b/rmagent/rmlib/models.py
index 04f1c1f..4f9c720 100644
--- a/rmagent/rmlib/models.py
+++ b/rmagent/rmlib/models.py
@@ -115,28 +115,18 @@ class Person(RMBaseModel):
"""
person_id: int = Field(..., alias="PersonID", description="Unique person identifier")
- unique_id: str | None = Field(
- None, alias="UniqueID", description="36-character hexadecimal unique ID"
- )
+ unique_id: str | None = Field(None, alias="UniqueID", description="36-character hexadecimal unique ID")
sex: Sex = Field(..., alias="Sex", description="Person's sex/gender")
parent_id: int = Field(0, alias="ParentID", description="FamilyID of parents (0 = no parents)")
spouse_id: int = Field(0, alias="SpouseID", description="FamilyID of spouse (0 = no spouse)")
- color: int = Field(
- 0, alias="Color", ge=0, le=27, description="Color coding (0=None, 1-27=specific colors)"
- )
- relate1: int = Field(
- 0, ge=0, le=999, alias="Relate1", description="Generations to Most Recent Common Ancestor"
- )
- relate2: int = Field(
- 0, ge=0, alias="Relate2", description="Generations from reference person to MRCA"
- )
+ color: int = Field(0, alias="Color", ge=0, le=27, description="Color coding (0=None, 1-27=specific colors)")
+ relate1: int = Field(0, ge=0, le=999, alias="Relate1", description="Generations to Most Recent Common Ancestor")
+ relate2: int = Field(0, ge=0, alias="Relate2", description="Generations from reference person to MRCA")
flags: int = Field(0, ge=0, le=10, alias="Flags", description="Relationship prefix descriptor")
living: bool = Field(False, alias="Living", description="True if person is living")
is_private: int = Field(0, alias="IsPrivate", description="Privacy flag (not implemented)")
proof: int = Field(0, alias="Proof", description="Proof level (not implemented)")
- bookmark: int = Field(
- 0, alias="Bookmark", description="Bookmark flag (0=not bookmarked, 1=bookmarked)"
- )
+ bookmark: int = Field(0, alias="Bookmark", description="Bookmark flag (0=not bookmarked, 1=bookmarked)")
note: str | None = Field(None, alias="Note", description="User-defined notes")
@field_validator("sex", mode="before")
@@ -168,43 +158,25 @@ class Name(RMBaseModel):
surname: str | None = Field(None, alias="Surname", description="Surname/family name")
given: str | None = Field(None, alias="Given", description="Given/first name")
prefix: str | None = Field(None, alias="Prefix", description="Name prefix (Dr., Rev., etc.)")
- suffix: str | None = Field(
- None, alias="Suffix", description="Name suffix (Jr., Sr., III, etc.)"
- )
+ suffix: str | None = Field(None, alias="Suffix", description="Name suffix (Jr., Sr., III, etc.)")
nickname: str | None = Field(None, alias="Nickname", description="Nickname")
name_type: NameType = Field(NameType.NULL, alias="NameType", description="Type of name")
- date: str | None = Field(
- None, alias="Date", description="Date associated with this name (24-char encoded)"
- )
+ date: str | None = Field(None, alias="Date", description="Date associated with this name (24-char encoded)")
sort_date: int | None = Field(
None,
alias="SortDate",
description="Sortable date representation (9223372036854775807 = unknown)",
)
- is_primary: bool = Field(
- False, alias="IsPrimary", description="True if this is the primary name"
- )
+ is_primary: bool = Field(False, alias="IsPrimary", description="True if this is the primary name")
is_private: bool = Field(False, alias="IsPrivate", description="True if name is private")
- proof: ProofLevel = Field(
- ProofLevel.BLANK, alias="Proof", description="Evidence quality rating"
- )
+ proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating")
sentence: str | None = Field(None, alias="Sentence", description="Custom sentence template")
note: str | None = Field(None, alias="Note", description="User-defined notes")
- birth_year: int | None = Field(
- None, alias="BirthYear", description="Year extracted from birth event"
- )
- death_year: int | None = Field(
- None, alias="DeathYear", description="Year extracted from death event"
- )
- surname_mp: str | None = Field(
- None, alias="SurnameMP", description="Metaphone encoding of surname"
- )
- given_mp: str | None = Field(
- None, alias="GivenMP", description="Metaphone encoding of given name"
- )
- nickname_mp: str | None = Field(
- None, alias="NicknameMP", description="Metaphone encoding of nickname"
- )
+ birth_year: int | None = Field(None, alias="BirthYear", description="Year extracted from birth event")
+ death_year: int | None = Field(None, alias="DeathYear", description="Year extracted from death event")
+ surname_mp: str | None = Field(None, alias="SurnameMP", description="Metaphone encoding of surname")
+ given_mp: str | None = Field(None, alias="GivenMP", description="Metaphone encoding of given name")
+ nickname_mp: str | None = Field(None, alias="NicknameMP", description="Metaphone encoding of nickname")
@field_validator("is_primary", "is_private", mode="before")
@classmethod
@@ -238,26 +210,18 @@ class Event(RMBaseModel):
event_id: int = Field(..., alias="EventID", description="Unique event identifier")
event_type: int = Field(..., alias="EventType", description="FactTypeID from FactTypeTable")
- owner_type: OwnerType = Field(
- ..., alias="OwnerType", description="Type of owner (person or family)"
- )
+ owner_type: OwnerType = Field(..., alias="OwnerType", description="Type of owner (person or family)")
owner_id: int = Field(..., alias="OwnerID", description="PersonID or FamilyID")
- family_id: int = Field(
- 0, alias="FamilyID", description="FamilyID for parent-related events (0 = not applicable)"
- )
+ family_id: int = Field(0, alias="FamilyID", description="FamilyID for parent-related events (0 = not applicable)")
place_id: int = Field(0, alias="PlaceID", description="PlaceID (0 = no place)")
site_id: int = Field(0, alias="SiteID", description="PlaceID of place details (0 = no details)")
date: str | None = Field(None, alias="Date", description="Date in 24-character encoded format")
- sort_date: int | None = Field(
- None, alias="SortDate", description="Sortable date representation"
- )
+ sort_date: int | None = Field(None, alias="SortDate", description="Sortable date representation")
is_primary: bool = Field(
False, alias="IsPrimary", description="True if this is primary event (suppresses conflicts)"
)
is_private: bool = Field(False, alias="IsPrivate", description="True if event is private")
- proof: ProofLevel = Field(
- ProofLevel.BLANK, alias="Proof", description="Evidence quality rating"
- )
+ proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating")
status: int = Field(0, alias="Status", description="LDS status (0=default, 1-12=LDS statuses)")
sentence: str | None = Field(None, alias="Sentence", description="Custom sentence template")
details: str | None = Field(None, alias="Details", description="Event details/description")
@@ -280,24 +244,16 @@ class Place(RMBaseModel):
"""
place_id: int = Field(..., alias="PlaceID", description="Unique place identifier")
- place_type: PlaceType = Field(
- PlaceType.PLACE, alias="PlaceType", description="Type of place entry"
- )
- name: str | None = Field(
- None, alias="Name", description="Place name (comma-delimited hierarchy)"
- )
+ place_type: PlaceType = Field(PlaceType.PLACE, alias="PlaceType", description="Type of place entry")
+ name: str | None = Field(None, alias="Name", description="Place name (comma-delimited hierarchy)")
abbrev: str | None = Field(None, alias="Abbrev", description="Abbreviated place name")
normalized: str | None = Field(None, alias="Normalized", description="Standardized place name")
latitude: int = Field(0, alias="Latitude", description="Latitude (decimal degrees × 1e7)")
longitude: int = Field(0, alias="Longitude", description="Longitude (decimal degrees × 1e7)")
- lat_long_exact: bool = Field(
- False, alias="LatLongExact", description="True if coordinates are exact"
- )
+ lat_long_exact: bool = Field(False, alias="LatLongExact", description="True if coordinates are exact")
master_id: int = Field(0, alias="MasterID", description="PlaceID of master place (for details)")
note: str | None = Field(None, alias="Note", description="User-defined notes")
- reverse: str | None = Field(
- None, alias="Reverse", description="Reverse order of place hierarchy (for indexing)"
- )
+ reverse: str | None = Field(None, alias="Reverse", description="Reverse order of place hierarchy (for indexing)")
fs_id: int | None = Field(None, alias="fsID", description="FamilySearch place ID")
an_id: int | None = Field(None, alias="anID", description="Ancestry.com place ID")
@@ -338,9 +294,7 @@ class Source(RMBaseModel):
comments: str | None = Field(None, alias="Comments", description="Source comments")
is_private: bool = Field(False, alias="IsPrivate", description="True if source is private")
template_id: int = Field(0, alias="TemplateID", description="SourceTemplateID (0=free-form)")
- fields: bytes | None = Field(
- None, alias="Fields", description="XML BLOB with field values (UTF-8 with BOM)"
- )
+ fields: bytes | None = Field(None, alias="Fields", description="XML BLOB with field values (UTF-8 with BOM)")
@field_validator("is_private", mode="before")
@classmethod
@@ -364,18 +318,12 @@ class Citation(RMBaseModel):
actual_text: str | None = Field(None, alias="ActualText", description="Research note")
ref_number: str | None = Field(None, alias="RefNumber", description="Detail reference number")
footnote: str | None = Field(None, alias="Footnote", description="Custom footnote override")
- short_footnote: str | None = Field(
- None, alias="ShortFootnote", description="Custom short footnote override"
- )
- bibliography: str | None = Field(
- None, alias="Bibliography", description="Custom bibliography override"
- )
+ short_footnote: str | None = Field(None, alias="ShortFootnote", description="Custom short footnote override")
+ bibliography: str | None = Field(None, alias="Bibliography", description="Custom bibliography override")
fields: bytes | None = Field(
None, alias="Fields", description="XML BLOB with citation field values (UTF-8 with BOM)"
)
- citation_name: str | None = Field(
- None, alias="CitationName", description="Auto-generated or user-defined name"
- )
+ citation_name: str | None = Field(None, alias="CitationName", description="Auto-generated or user-defined name")
class Family(RMBaseModel):
@@ -392,21 +340,11 @@ class Family(RMBaseModel):
husb_order: int = Field(0, alias="HusbOrder", description="Spouse order (0=never rearranged)")
wife_order: int = Field(0, alias="WifeOrder", description="Spouse order (0=never rearranged)")
is_private: bool = Field(False, alias="IsPrivate", description="True if family is private")
- proof: ProofLevel = Field(
- ProofLevel.BLANK, alias="Proof", description="Evidence quality rating"
- )
- father_label: ParentLabel = Field(
- ParentLabel.FATHER, alias="FatherLabel", description="Label for father role"
- )
- mother_label: MotherLabel = Field(
- MotherLabel.MOTHER, alias="MotherLabel", description="Label for mother role"
- )
- father_label_str: str | None = Field(
- None, alias="FatherLabelStr", description="Custom label when FatherLabel=99"
- )
- mother_label_str: str | None = Field(
- None, alias="MotherLabelStr", description="Custom label when MotherLabel=99"
- )
+ proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating")
+ father_label: ParentLabel = Field(ParentLabel.FATHER, alias="FatherLabel", description="Label for father role")
+ mother_label: MotherLabel = Field(MotherLabel.MOTHER, alias="MotherLabel", description="Label for mother role")
+ father_label_str: str | None = Field(None, alias="FatherLabelStr", description="Custom label when FatherLabel=99")
+ mother_label_str: str | None = Field(None, alias="MotherLabelStr", description="Custom label when MotherLabel=99")
note: str | None = Field(None, alias="Note", description="User-defined notes")
@field_validator("is_private", mode="before")
@@ -430,21 +368,15 @@ class FactType(RMBaseModel):
alias="FactTypeID",
description="Unique fact type identifier (<1000=built-in, ≥1000=custom)",
)
- owner_type: OwnerType = Field(
- ..., alias="OwnerType", description="Type of owner (person or family)"
- )
+ owner_type: OwnerType = Field(..., alias="OwnerType", description="Type of owner (person or family)")
name: str = Field(..., alias="Name", description="Fact type name")
abbrev: str | None = Field(None, alias="Abbrev", description="Abbreviation")
gedcom_tag: str | None = Field(None, alias="GedcomTag", description="GEDCOM tag")
- use_value: bool = Field(
- False, alias="UseValue", description="True if fact uses description field"
- )
+ use_value: bool = Field(False, alias="UseValue", description="True if fact uses description field")
use_date: bool = Field(True, alias="UseDate", description="True if fact uses date field")
use_place: bool = Field(True, alias="UsePlace", description="True if fact uses place field")
sentence: str | None = Field(None, alias="Sentence", description="Sentence template")
- flags: int = Field(
- 0, alias="Flags", description="6-bit position-coded flags for Include settings"
- )
+ flags: int = Field(0, alias="Flags", description="6-bit position-coded flags for Include settings")
@field_validator("use_value", "use_date", "use_place", mode="before")
@classmethod
diff --git a/rmagent/rmlib/parsers/blob_parser.py b/rmagent/rmlib/parsers/blob_parser.py
index b6bdd3c..84ed276 100644
--- a/rmagent/rmlib/parsers/blob_parser.py
+++ b/rmagent/rmlib/parsers/blob_parser.py
@@ -170,9 +170,7 @@ def parse_template_field_defs(blob_data: bytes | None) -> list[TemplateField]:
hint = hint_elem.text if hint_elem is not None else None
long_hint = long_hint_elem.text if long_hint_elem is not None else None
- citation_field = (
- citation_field_elem.text == "True" if citation_field_elem is not None else False
- )
+ citation_field = citation_field_elem.text == "True" if citation_field_elem is not None else False
field_defs.append(
TemplateField(
@@ -242,12 +240,7 @@ def is_freeform_source(fields: dict[str, str]) -> bool:
Returns:
True if this appears to be a free-form source
"""
- return (
- len(fields) == 3
- and "Footnote" in fields
- and "ShortFootnote" in fields
- and "Bibliography" in fields
- )
+ return len(fields) == 3 and "Footnote" in fields and "ShortFootnote" in fields and "Bibliography" in fields
def get_citation_level_fields(template_fields: list[TemplateField]) -> list[str]:
diff --git a/rmagent/rmlib/parsers/date_parser.py b/rmagent/rmlib/parsers/date_parser.py
index 3b85fb1..bdfd899 100644
--- a/rmagent/rmlib/parsers/date_parser.py
+++ b/rmagent/rmlib/parsers/date_parser.py
@@ -176,13 +176,7 @@ def to_datetime(self) -> datetime | None:
- Date is BC
- Date is a range
"""
- if (
- self.is_null
- or self.date_type == DateType.TEXT
- or self.is_partial
- or self.is_bc
- or self.is_range
- ):
+ if self.is_null or self.date_type == DateType.TEXT or self.is_partial or self.is_bc or self.is_range:
return None
try:
@@ -329,9 +323,7 @@ def parse_rm_date(date_str: str | None) -> RMDate:
year, month, day, is_bc, is_double_date, qualifier = _parse_date_components(date_str[2:13])
# Parse second date (for ranges)
- year2, month2, day2, is_bc2, is_double_date2, qualifier2 = _parse_date_components(
- date_str[13:24]
- )
+ year2, month2, day2, is_bc2, is_double_date2, qualifier2 = _parse_date_components(date_str[13:24])
return RMDate(
date_type=date_type,
diff --git a/rmagent/rmlib/parsers/name_parser.py b/rmagent/rmlib/parsers/name_parser.py
index c40ff0c..4e595af 100644
--- a/rmagent/rmlib/parsers/name_parser.py
+++ b/rmagent/rmlib/parsers/name_parser.py
@@ -284,9 +284,7 @@ def get_all_names(person_id: int, db_connection: sqlite3.Connection) -> list[Nam
return names
-def get_name_at_date(
- person_id: int, event_sort_date: int | None, db_connection: sqlite3.Connection
-) -> Name | None:
+def get_name_at_date(person_id: int, event_sort_date: int | None, db_connection: sqlite3.Connection) -> Name | None:
"""
Get appropriate name for a specific date (context-aware).
diff --git a/rmagent/rmlib/parsers/place_parser.py b/rmagent/rmlib/parsers/place_parser.py
index 5f3df14..d2497db 100644
--- a/rmagent/rmlib/parsers/place_parser.py
+++ b/rmagent/rmlib/parsers/place_parser.py
@@ -225,9 +225,7 @@ def format_place_medium(place_name: str | None) -> str:
return place_name
-def convert_coordinates(
- lat_int: int | None, lon_int: int | None
-) -> tuple[float | None, float | None]:
+def convert_coordinates(lat_int: int | None, lon_int: int | None) -> tuple[float | None, float | None]:
"""
Convert integer coordinates to decimal degrees.
diff --git a/rmagent/rmlib/prototype.py b/rmagent/rmlib/prototype.py
index 37db724..2fd060c 100644
--- a/rmagent/rmlib/prototype.py
+++ b/rmagent/rmlib/prototype.py
@@ -388,9 +388,7 @@ def format_family(person_id: int, query_service: QueryService) -> str:
if children:
lines.append(f"\nChildren ({len(children)}):")
for child in children:
- child_name = format_full_name(
- given=get_row_value(child, "Given"), surname=get_row_value(child, "Surname")
- )
+ child_name = format_full_name(given=get_row_value(child, "Given"), surname=get_row_value(child, "Surname"))
birth_year = get_row_value(child, "BirthYear", "")
year_str = f" (b. {birth_year})" if birth_year else ""
lines.append(f" - {child_name} (ID: {child['PersonID']}){year_str}")
diff --git a/rmagent/rmlib/quality.py b/rmagent/rmlib/quality.py
index 7af0057..c637c67 100644
--- a/rmagent/rmlib/quality.py
+++ b/rmagent/rmlib/quality.py
@@ -20,7 +20,7 @@
parse_source_fields,
parse_template_field_defs,
)
-from .parsers.date_parser import UNKNOWN_SORT_DATE, parse_rm_date
+from .parsers.date_parser import UNKNOWN_SORT_DATE
# Numeric constants
YEAR_SECONDS = 31557600
@@ -688,11 +688,7 @@ def _rule_4_3(self, rule: QualityRule) -> list[QualityIssue]:
continue
required = [field.name for field in template_fields if not field.citation_field]
- missing = [
- field_name
- for field_name in required
- if not actual_fields.get(field_name, "").strip()
- ]
+ missing = [field_name for field_name in required if not actual_fields.get(field_name, "").strip()]
if missing:
issues.append(
{
@@ -753,9 +749,7 @@ def _rule_5_1(self, rule: QualityRule) -> list[QualityIssue]:
AND LENGTH(CAST(ABS(CAST(SortDate AS INTEGER)) AS TEXT)) NOT IN (18, 19))
)
"""
- rows = self.db.query(
- sql, (UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE)
- )
+ rows = self.db.query(sql, (UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE))
if not rows:
return []
diff --git a/rmagent/rmlib/queries.py b/rmagent/rmlib/queries.py
index 991923d..70a546e 100644
--- a/rmagent/rmlib/queries.py
+++ b/rmagent/rmlib/queries.py
@@ -337,9 +337,7 @@ def get_unsourced_vital_events(
return self.db.query(sql, tuple(params))
# Pattern 13
- def find_places_by_name(
- self, pattern: str, limit: int = DEFAULT_RESULT_LIMIT, exact: bool = False
- ):
+ def find_places_by_name(self, pattern: str, limit: int = DEFAULT_RESULT_LIMIT, exact: bool = False):
"""
Find places by name with flexible or exact matching.
@@ -382,7 +380,7 @@ def find_places_by_name(
else:
# Flexible matching (original behavior)
# Split pattern by comma-space to get hierarchy parts
- parts = [p.strip() for p in pattern.split(',') if p.strip()]
+ parts = [p.strip() for p in pattern.split(",") if p.strip()]
if len(parts) == 1:
# Simple case: single search term
@@ -453,9 +451,7 @@ def find_places_within_radius(
center_lon = center["Longitude"] if center["Longitude"] is not None else 0
if not center_lat or not center_lon or center_lat == 0 or center_lon == 0:
- raise ValueError(
- f"Place '{center['Name']}' (ID {center_place_id}) has no GPS coordinates"
- )
+ raise ValueError(f"Place '{center['Name']}' (ID {center_place_id}) has no GPS coordinates")
# Convert integer coordinates to degrees
center_lat_deg = center_lat / 10_000_000.0
@@ -481,9 +477,7 @@ def find_places_within_radius(
place_lat_deg = place["Latitude"] / 10_000_000.0
place_lon_deg = place["Longitude"] / 10_000_000.0
- distance_km = _haversine_distance(
- center_lat_deg, center_lon_deg, place_lat_deg, place_lon_deg
- )
+ distance_km = _haversine_distance(center_lat_deg, center_lon_deg, place_lat_deg, place_lon_deg)
if distance_km <= radius_km:
results.append(
@@ -562,7 +556,7 @@ def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> f
import math
# Earth radius in kilometers
- R = 6371.0
+ earth_radius_km = 6371.0
# Convert degrees to radians
lat1_rad = math.radians(lat1)
@@ -571,11 +565,8 @@ def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> f
delta_lon = math.radians(lon2 - lon1)
# Haversine formula
- a = (
- math.sin(delta_lat / 2) ** 2
- + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2
- )
+ a = math.sin(delta_lat / 2) ** 2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
- distance = R * c
+ distance = earth_radius_km * c
return distance
diff --git a/sqlite-extension/python_example.py b/sqlite-extension/python_example.py
index de3bab0..c717b89 100755
--- a/sqlite-extension/python_example.py
+++ b/sqlite-extension/python_example.py
@@ -49,9 +49,7 @@ def connect_rmtree(db_path, extension_path="./sqlite-extension/icu.dylib"):
# - caseLevel=off: Ignore case differences
# - normalization=on: Normalize Unicode characters
conn.execute(
- "SELECT icu_load_collation("
- "'en_US@colStrength=primary;caseLevel=off;normalization=on',"
- "'RMNOCASE')"
+ "SELECT icu_load_collation(" "'en_US@colStrength=primary;caseLevel=off;normalization=on'," "'RMNOCASE')"
)
finally:
# Disable extension loading (security best practice)
diff --git a/tests/integration/test_llm_providers.py b/tests/integration/test_llm_providers.py
index 0430307..42ccbc6 100644
--- a/tests/integration/test_llm_providers.py
+++ b/tests/integration/test_llm_providers.py
@@ -176,9 +176,7 @@ class TestProviderInterfaceCompliance:
),
(
OllamaProvider,
- lambda m: setattr(
- m, "generate", lambda **kw: {"response": "Text", "eval_count": 10}
- ),
+ lambda m: setattr(m, "generate", lambda **kw: {"response": "Text", "eval_count": 10}),
),
],
)
diff --git a/tests/integration/test_real_providers.py b/tests/integration/test_real_providers.py
index 4698482..4c183b3 100644
--- a/tests/integration/test_real_providers.py
+++ b/tests/integration/test_real_providers.py
@@ -24,6 +24,7 @@
if _env_path.exists():
load_dotenv(_env_path)
+
# Environment checks - detect placeholder vs real keys
def _is_real_key(key_value: str | None) -> bool:
"""Check if API key is real (not placeholder like sk-xxxxx)."""
@@ -68,9 +69,7 @@ def test_genealogy_specific_prompt(self):
assert result.usage.total_tokens > 0
# Check for genealogy keywords
text_lower = result.text.lower()
- assert any(
- word in text_lower for word in ["census", "vital", "records", "birth", "death", "marriage"]
- )
+ assert any(word in text_lower for word in ["census", "vital", "records", "birth", "death", "marriage"])
@pytest.mark.real_api
diff --git a/tests/unit/test_biography_generator.py b/tests/unit/test_biography_generator.py
index 39d4dc0..d1a376a 100644
--- a/tests/unit/test_biography_generator.py
+++ b/tests/unit/test_biography_generator.py
@@ -336,6 +336,7 @@ def test_apply_privacy_rules_for_living_person(self):
def test_generate_introduction(self):
"""Test generating introduction section."""
from rmagent.generators.biography import BiographyTemplates
+
templates = BiographyTemplates()
context = PersonContext(
@@ -370,6 +371,7 @@ def test_generate_introduction(self):
def test_generate_early_life(self):
"""Test generating early life section."""
from rmagent.generators.biography import BiographyTemplates
+
templates = BiographyTemplates()
# Test with siblings
@@ -399,6 +401,7 @@ def test_generate_early_life(self):
def test_format_sources_footnote_style(self):
"""Test formatting sources in footnote style."""
from rmagent.generators.biography import CitationProcessor
+
citation_processor = CitationProcessor()
context = PersonContext(
@@ -438,6 +441,7 @@ def test_format_sources_footnote_style(self):
def test_format_sources_parenthetical_style(self):
"""Test formatting sources in parenthetical style."""
from rmagent.generators.biography import CitationProcessor
+
citation_processor = CitationProcessor()
context = PersonContext(
@@ -470,6 +474,7 @@ def test_format_sources_parenthetical_style(self):
def test_parse_ai_response(self):
"""Test parsing AI-generated biography."""
from rmagent.generators.biography import BiographyTemplates
+
templates = BiographyTemplates()
ai_response = """
@@ -641,9 +646,7 @@ def test_categorize_events(self, real_db_path, extension_path):
}, # Residence
]
- vital, education, occupation, military, residence, other = generator._categorize_events(
- db, events
- )
+ vital, education, occupation, military, residence, other = generator._categorize_events(db, events)
assert len(vital) == 1
assert len(education) == 1
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 74f5715..68e7f0c 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -87,9 +87,7 @@ def test_bio_with_invalid_length(self, runner, test_db_path):
def test_bio_no_ai_template_based(self, runner, test_db_path, tmp_path):
"""Test bio command with --no-ai flag (template-based generation)."""
output_file = tmp_path / "bio_test.md"
- result = runner.invoke(
- cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)])
# Should succeed with template-based generation
assert result.exit_code == 0
assert output_file.exists()
@@ -119,26 +117,20 @@ def test_bio_length_variations(self, runner, test_db_path):
def test_bio_citation_styles(self, runner, test_db_path):
"""Test bio with different citation styles."""
for style in ["footnote", "parenthetical", "narrative"]:
- result = runner.invoke(
- cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--citation-style", style]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--citation-style", style])
assert result.exit_code == 0
def test_bio_with_file_output(self, runner, test_db_path, tmp_path):
"""Test bio with file output."""
output_file = tmp_path / "biography.md"
- result = runner.invoke(
- cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)])
assert result.exit_code == 0
assert "Biography written to" in result.output
assert output_file.exists()
def test_bio_no_sources(self, runner, test_db_path):
"""Test bio with --no-sources flag."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--no-sources"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--no-sources"])
assert result.exit_code == 0
# Biography should not include sources section when --no-sources is used
# (We can't easily verify this without parsing output, but command should succeed)
@@ -165,9 +157,7 @@ def test_quality_with_invalid_format(self, runner):
def test_quality_basic(self, runner, test_db_path, tmp_path):
"""Test basic quality report generation."""
output_file = tmp_path / "quality.md"
- result = runner.invoke(
- cli, ["--database", test_db_path, "quality", "--output", str(output_file)]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "quality", "--output", str(output_file)])
assert result.exit_code == 0
assert output_file.exists()
assert "📊 Data Quality Summary" in result.output
@@ -397,9 +387,7 @@ def test_timeline_with_include_family(self, runner, test_db_path, tmp_path):
def test_timeline_invalid_format(self, runner, test_db_path):
"""Test timeline with invalid format option."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "timeline", "1", "--format", "invalid"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "timeline", "1", "--format", "invalid"])
assert result.exit_code != 0
@@ -573,9 +561,7 @@ def test_search_by_name(self, runner, test_db_path):
def test_search_by_full_name(self, runner, test_db_path):
"""Test search by full name (given and surname)."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "Michael Iams"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Michael Iams"])
assert result.exit_code == 0
def test_search_by_place(self, runner, test_db_path):
@@ -587,40 +573,30 @@ def test_search_by_place(self, runner, test_db_path):
def test_search_with_limit(self, runner, test_db_path):
"""Test search with custom limit."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "Smith", "--limit", "10"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Smith", "--limit", "10"])
assert result.exit_code == 0
def test_search_exact_mode(self, runner, test_db_path):
"""Test search with --exact flag (no phonetic matching)."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "Iams", "--exact"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Iams", "--exact"])
assert result.exit_code == 0
def test_search_name_and_place(self, runner, test_db_path):
"""Test search with both name and place criteria."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "Iams", "--place", "Maryland"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Iams", "--place", "Maryland"])
# Should show results for both searches
assert result.exit_code == 0
def test_search_with_surname_variation(self, runner, test_db_path):
"""Test search with surname variation syntax [variant]."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "John Iiams [Ijams]"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "John Iiams [Ijams]"])
assert result.exit_code == 0
# Should show that it's searching multiple variations
assert "Searching 2 name variations" in result.output or "Found" in result.output
def test_search_with_multiple_variations(self, runner, test_db_path):
"""Test search with multiple surname variations."""
- result = runner.invoke(
- cli, ["--database", test_db_path, "search", "--name", "John Iams [Ijams] [Imes]"]
- )
+ result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "John Iams [Ijams] [Imes]"])
assert result.exit_code == 0
# Should search 3 variations (base + 2 variants)
assert "Searching 3 name variations" in result.output or "Found" in result.output
diff --git a/tests/unit/test_hugo_exporter.py b/tests/unit/test_hugo_exporter.py
index df3c770..a69260a 100644
--- a/tests/unit/test_hugo_exporter.py
+++ b/tests/unit/test_hugo_exporter.py
@@ -112,9 +112,7 @@ def test_export_person_raises_error_without_database(self, tmp_path):
with pytest.raises(ValueError, match="No database provided"):
exporter.export_person(person_id=1, output_dir=tmp_path)
- def test_export_person_raises_error_for_nonexistent_person(
- self, tmp_path, real_db_path, extension_path
- ):
+ def test_export_person_raises_error_for_nonexistent_person(self, tmp_path, real_db_path, extension_path):
"""Test that export_person raises ValueError for nonexistent person."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -276,9 +274,7 @@ def test_export_batch_with_index(self, tmp_path, real_db_path, extension_path):
assert "Family Biographies" in content
assert "---" in content # Has front matter
- def test_export_batch_handles_invalid_person_gracefully(
- self, tmp_path, real_db_path, extension_path
- ):
+ def test_export_batch_handles_invalid_person_gracefully(self, tmp_path, real_db_path, extension_path):
"""Test batch export continues when one person fails."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -351,9 +347,7 @@ def test_complete_hugo_export_workflow(self, tmp_path, real_db_path, extension_p
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- exporter = HugoExporter(
- db=real_db_path, extension_path=extension_path, media_base_path="/media/"
- )
+ exporter = HugoExporter(db=real_db_path, extension_path=extension_path, media_base_path="/media/")
# Create Hugo directory structure
content_dir = tmp_path / "content" / "people"
@@ -403,9 +397,7 @@ def test_media_references_in_export(self, tmp_path, real_db_path, extension_path
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- exporter = HugoExporter(
- db=real_db_path, extension_path=extension_path, media_base_path="/media/"
- )
+ exporter = HugoExporter(db=real_db_path, extension_path=extension_path, media_base_path="/media/")
result = exporter.export_person(
person_id=1,
diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py
index 0722b95..bee920d 100644
--- a/tests/unit/test_llm_provider.py
+++ b/tests/unit/test_llm_provider.py
@@ -41,9 +41,7 @@ def _invoke(self, prompt: str, **kwargs):
return LLMResult(
text=text,
model=self.model,
- usage=TokenUsage(
- prompt_tokens=len(prompt.split()), completion_tokens=len(text.split())
- ),
+ usage=TokenUsage(prompt_tokens=len(prompt.split()), completion_tokens=len(text.split())),
)
@@ -64,9 +62,7 @@ def _invoke(self, prompt: str, **kwargs):
self.invocations += 1
if self.invocations < 2:
raise LLMError("temporary failure")
- return LLMResult(
- text="ok", model=self.model, usage=TokenUsage(prompt_tokens=1, completion_tokens=1)
- )
+ return LLMResult(text="ok", model=self.model, usage=TokenUsage(prompt_tokens=1, completion_tokens=1))
provider = FlakyProvider()
result = provider.generate("prompt")
diff --git a/tests/unit/test_name_parser.py b/tests/unit/test_name_parser.py
index febee4f..0eecdfe 100644
--- a/tests/unit/test_name_parser.py
+++ b/tests/unit/test_name_parser.py
@@ -179,9 +179,7 @@ def test_full_name_minimal(self):
def test_full_name_surname_only(self):
"""Test full name with surname only."""
- name = Name(
- name_id=1, person_id=1, is_primary=True, name_type=NameType.BIRTH, surname="Smith"
- )
+ name = Name(name_id=1, person_id=1, is_primary=True, name_type=NameType.BIRTH, surname="Smith")
assert name.full_name() == "Smith"
@@ -457,9 +455,7 @@ def test_format_minimal(self):
def test_format_no_nickname(self):
"""Test formatting without nickname."""
- full = format_full_name(
- surname="Smith", given="John", nickname="Jack", include_nickname=False
- )
+ full = format_full_name(surname="Smith", given="John", nickname="Jack", include_nickname=False)
assert full == "John Smith"
diff --git a/tests/unit/test_place_parser.py b/tests/unit/test_place_parser.py
index dfeed21..87db7b8 100644
--- a/tests/unit/test_place_parser.py
+++ b/tests/unit/test_place_parser.py
@@ -172,9 +172,7 @@ def test_get_level_2_state(self):
def test_get_level_3_country(self):
"""Test getting level 3 (country)."""
- assert (
- get_place_level("Baltimore, Baltimore, Maryland, United States", 3) == "United States"
- )
+ assert get_place_level("Baltimore, Baltimore, Maryland, United States", 3) == "United States"
def test_get_level_out_of_range(self):
"""Test getting level that doesn't exist."""
@@ -192,10 +190,7 @@ class TestGetPlaceShort:
def test_get_short_us_place_2_levels(self):
"""Test short form for US place (skips county)."""
- assert (
- get_place_short("Baltimore, Baltimore, Maryland, United States", 2)
- == "Baltimore, Maryland"
- )
+ assert get_place_short("Baltimore, Baltimore, Maryland, United States", 2) == "Baltimore, Maryland"
def test_get_short_international_place_2_levels(self):
"""Test short form for international place."""
@@ -217,18 +212,12 @@ class TestFormatPlaceShort:
def test_format_us_4_level(self):
"""Test formatting US 4-level place."""
- assert (
- format_place_short("Baltimore, Baltimore, Maryland, United States")
- == "Baltimore, Maryland"
- )
+ assert format_place_short("Baltimore, Baltimore, Maryland, United States") == "Baltimore, Maryland"
def test_format_us_3_level(self):
"""Test formatting US 3-level place."""
# 3-level place: City, State, Country - format returns City, Country (level 0 and 2)
- assert (
- format_place_short("Abbeville, South Carolina, United States")
- == "Abbeville, United States"
- )
+ assert format_place_short("Abbeville, South Carolina, United States") == "Abbeville, United States"
def test_format_international_4_level(self):
"""Test formatting international 4-level place."""
@@ -249,10 +238,7 @@ class TestFormatPlaceMedium:
def test_format_medium_4_level(self):
"""Test medium format for 4-level place."""
- assert (
- format_place_medium("Baltimore, Baltimore, Maryland, United States")
- == "Baltimore, Baltimore, Maryland"
- )
+ assert format_place_medium("Baltimore, Baltimore, Maryland, United States") == "Baltimore, Baltimore, Maryland"
def test_format_medium_3_level(self):
"""Test medium format for 3-level place."""
diff --git a/tests/unit/test_quality.py b/tests/unit/test_quality.py
index d9a3b69..d3b4c53 100644
--- a/tests/unit/test_quality.py
+++ b/tests/unit/test_quality.py
@@ -7,11 +7,8 @@
from __future__ import annotations
-from collections.abc import Iterable
from pathlib import Path
-import pytest
-
# Ensure repository root is available on sys.path when running with pytest -o addopts=''
PROJECT_ROOT = Path(__file__).resolve().parents[2]
import sys
diff --git a/tests/unit/test_quality_report.py b/tests/unit/test_quality_report.py
index 427ff7e..5f34aae 100644
--- a/tests/unit/test_quality_report.py
+++ b/tests/unit/test_quality_report.py
@@ -261,9 +261,7 @@ def test_generate_raises_error_without_database(self):
with pytest.raises(ValueError, match="No database provided"):
generator.generate(format=ReportFormat.MARKDOWN)
- def test_generate_markdown_with_mock_validation(
- self, real_db_path, extension_path, mock_quality_report
- ):
+ def test_generate_markdown_with_mock_validation(self, real_db_path, extension_path, mock_quality_report):
"""Test generate with mocked validation."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -278,9 +276,7 @@ def test_generate_markdown_with_mock_validation(
assert "Total People:** 10,000" in report
assert "Total Issues Found:** 185" in report
- def test_generate_html_with_mock_validation(
- self, real_db_path, extension_path, mock_quality_report
- ):
+ def test_generate_html_with_mock_validation(self, real_db_path, extension_path, mock_quality_report):
"""Test HTML generation with mocked validation."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -293,9 +289,7 @@ def test_generate_html_with_mock_validation(
assert "" in report
assert "Data Quality Report
" in report
- def test_generate_csv_with_mock_validation(
- self, real_db_path, extension_path, mock_quality_report
- ):
+ def test_generate_csv_with_mock_validation(self, real_db_path, extension_path, mock_quality_report):
"""Test CSV generation with mocked validation."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -309,9 +303,7 @@ def test_generate_csv_with_mock_validation(
assert "Rule Name" in report
assert "1.1" in report
- def test_generate_with_output_path(
- self, tmp_path, real_db_path, extension_path, mock_quality_report
- ):
+ def test_generate_with_output_path(self, tmp_path, real_db_path, extension_path, mock_quality_report):
"""Test writing report to file."""
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
@@ -363,9 +355,7 @@ def test_generate_real_markdown_report(self, real_db_path, extension_path):
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- generator = QualityReportGenerator(
- db=real_db_path, extension_path=extension_path, sample_limit=5
- )
+ generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5)
report = generator.generate(format=ReportFormat.MARKDOWN)
@@ -394,9 +384,7 @@ def test_generate_real_html_report(self, real_db_path, extension_path):
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- generator = QualityReportGenerator(
- db=real_db_path, extension_path=extension_path, sample_limit=5
- )
+ generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5)
report = generator.generate(format=ReportFormat.HTML)
@@ -419,9 +407,7 @@ def test_generate_real_csv_report(self, real_db_path, extension_path):
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- generator = QualityReportGenerator(
- db=real_db_path, extension_path=extension_path, sample_limit=5
- )
+ generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5)
report = generator.generate(format=ReportFormat.CSV)
@@ -441,9 +427,7 @@ def test_generate_all_formats(self, real_db_path, extension_path):
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- generator = QualityReportGenerator(
- db=real_db_path, extension_path=extension_path, sample_limit=3
- )
+ generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=3)
# Generate all three formats
markdown_report = generator.generate(format=ReportFormat.MARKDOWN)
diff --git a/tests/unit/test_timeline_generator.py b/tests/unit/test_timeline_generator.py
index df43dc1..0b9091d 100644
--- a/tests/unit/test_timeline_generator.py
+++ b/tests/unit/test_timeline_generator.py
@@ -128,9 +128,7 @@ def test_format_place_for_timeline(self):
assert place == "Tulsa, Oklahoma"
# International place
- place = generator._format_place_for_timeline(
- "London, Greater London, England, United Kingdom"
- )
+ place = generator._format_place_for_timeline("London, Greater London, England, United Kingdom")
assert place == "London, England"
# Simple place
@@ -347,9 +345,7 @@ def test_generate_with_output_path(self, tmp_path, real_db_path, extension_path)
generator = TimelineGenerator(db=real_db_path, extension_path=extension_path)
output_file = tmp_path / "timeline.json"
- json_output = generator.generate(
- person_id=1, format=TimelineFormat.JSON, output_path=output_file
- )
+ json_output = generator.generate(person_id=1, format=TimelineFormat.JSON, output_path=output_file)
# Verify file was created
assert output_file.exists()
@@ -395,9 +391,7 @@ def test_generate_complete_timeline(self, real_db_path, extension_path):
generator = TimelineGenerator(db=real_db_path, extension_path=extension_path)
# Generate JSON
- json_output = generator.generate(
- person_id=1, format=TimelineFormat.JSON, group_by_phase=True
- )
+ json_output = generator.generate(person_id=1, format=TimelineFormat.JSON, group_by_phase=True)
# Parse and verify
timeline = json.loads(json_output)
@@ -476,9 +470,7 @@ def test_timeline_with_private_events_excluded(self, real_db_path, extension_pat
if not real_db_path.exists() or not extension_path.exists():
pytest.skip("Real database or ICU extension not available")
- generator = TimelineGenerator(
- db=real_db_path, extension_path=extension_path, include_private=False
- )
+ generator = TimelineGenerator(db=real_db_path, extension_path=extension_path, include_private=False)
json_output = generator.generate(person_id=1, format=TimelineFormat.JSON)
timeline = json.loads(json_output)