diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e038605 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +data/Iiams.rmtree filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index d265988..4e72e62 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -11,6 +11,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + lfs: true - name: Install uv uses: astral-sh/setup-uv@v3 @@ -29,10 +31,10 @@ jobs: uv run black --check . - name: Run tests with coverage - run: uv run pytest --cov=rmagent --cov-report=term-missing --cov-fail-under=80 + run: uv run pytest --cov=rmagent --cov-report=term-missing --cov-fail-under=65 env: # Set test environment variables - RM_DATABASE_PATH: data/test.rmtree + RM_DATABASE_PATH: data/Iiams.rmtree DEFAULT_LLM_PROVIDER: anthropic LOG_LEVEL: WARNING diff --git a/data/Iiams.rmtree b/data/Iiams.rmtree new file mode 100644 index 0000000..00bb873 --- /dev/null +++ b/data/Iiams.rmtree @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7163c4982fa83bb0aae58e894986dc5e4b6f86634cca4cd3aeaf2d8e01e1571a +size 66928640 diff --git a/rmagent/agent/formatters.py b/rmagent/agent/formatters.py index d2b88cc..f18b765 100644 --- a/rmagent/agent/formatters.py +++ b/rmagent/agent/formatters.py @@ -8,7 +8,6 @@ from __future__ import annotations from rmagent.rmlib.parsers.date_parser import parse_rm_date -from rmagent.rmlib.queries import QueryService class GenealogyFormatters: @@ -74,7 +73,7 @@ def format_events(events, event_citations: dict[int, list[int]] | None = None) - # Add note if present (often contains full article transcriptions) if note: # Show "NOTE: " prefix only once, then indent subsequent lines - note_lines = note.split('\n') + note_lines = note.split("\n") for idx, note_line in enumerate(note_lines): if note_line.strip(): if idx == 0: @@ -233,9 +232,7 @@ def format_siblings(siblings) -> list[str]: return lines @staticmethod - def format_early_life( - person, parents, siblings, life_span: dict[str, int | None] - ) -> str: + def format_early_life(person, parents, siblings, life_span: dict[str, int | None]) -> str: """Format early life narrative with birth order, parental ages, migration notes.""" person_name = GenealogyFormatters.format_person_name(person) birth_year = life_span.get("birth_year") @@ -322,16 +319,10 @@ def format_family_losses(life_span, parents, spouses, siblings, children) -> str name = GenealogyFormatters.format_person_name(data) losses.append(f"- {name} ({relation}) died in {death_year_value}.") - return ( - "\n".join(losses) - if losses - else "No recorded family deaths occurred during the subject's lifetime." - ) + return "\n".join(losses) if losses else "No recorded family deaths occurred during the subject's lifetime." @staticmethod - def calculate_parent_age( - parents, birth_year_key: str, child_birth_year: int | None - ) -> int | None: + def calculate_parent_age(parents, birth_year_key: str, child_birth_year: int | None) -> int | None: """Calculate parent's age at child's birth.""" if not parents or child_birth_year is None: return None diff --git a/rmagent/agent/genealogy_agent.py b/rmagent/agent/genealogy_agent.py index 0bf0890..6664c45 100644 --- a/rmagent/agent/genealogy_agent.py +++ b/rmagent/agent/genealogy_agent.py @@ -63,9 +63,7 @@ class GenealogyAgent: # ---- Public API ----------------------------------------------------- - def generate_biography( - self, person_id: int, style: str = "standard", max_tokens: int | None = None - ) -> LLMResult: + def generate_biography(self, person_id: int, style: str = "standard", max_tokens: int | None = None) -> LLMResult: """Generate a narrative biography using the configured prompts/LLM.""" context = self._build_biography_context(person_id, style) @@ -84,9 +82,7 @@ def _run_validator(db: RMDatabase | None) -> QualityReport: return self._with_database(_run_validator) - def ask( - self, question: str, person_id: int | None = None, max_tokens: int | None = None - ) -> LLMResult: + def ask(self, question: str, person_id: int | None = None, max_tokens: int | None = None) -> LLMResult: """Answer ad-hoc questions with light context and persistent memory.""" context = self._build_qa_context(question, person_id) @@ -138,15 +134,11 @@ def _builder(db: RMDatabase | None) -> dict[str, str]: life_span, parents, spouses, siblings, children ) sibling_lines = GenealogyFormatters.format_siblings(siblings) - sibling_summary = ( - "\n".join(sibling_lines) if sibling_lines else "No sibling records available." - ) + sibling_summary = "\n".join(sibling_lines) if sibling_lines else "No sibling records available." # Extract person-level notes person_notes = person.get("Note") or "" - person_notes_formatted = ( - person_notes if person_notes else "No person-level notes available." - ) + person_notes_formatted = person_notes if person_notes else "No person-level notes available." # Generate style-specific length guidance length_guidance = self._get_length_guidance_for_style(style) @@ -185,9 +177,7 @@ def _builder(db: RMDatabase | None) -> dict[str, str]: snippets.append(GenealogyFormatters.format_family_overview(spouses, children, siblings)) snippets.append(GenealogyFormatters.format_early_life(person, parents, siblings, life_span)) - history_snippets = [ - f"Q: {turn.question}\nA: {turn.answer}" for turn in self._memory[-3:] - ] + history_snippets = [f"Q: {turn.question}\nA: {turn.answer}" for turn in self._memory[-3:]] snippets.extend(history_snippets) return { @@ -297,9 +287,7 @@ def _fetch_siblings(self, query: QueryService, parents: dict[str, str] | None, p ) return siblings - def _build_event_citations_map( - self, query: QueryService, events: list[dict] - ) -> dict[int, list[int]]: + def _build_event_citations_map(self, query: QueryService, events: list[dict]) -> dict[int, list[int]]: """ Build mapping of EventID -> list of CitationIDs for inline citation markers. @@ -333,9 +321,7 @@ def _build_event_citations_map( return event_citations_map - def _collect_all_citations_for_person( - self, query: QueryService, person_id: int - ) -> list[dict]: + def _collect_all_citations_for_person(self, query: QueryService, person_id: int) -> list[dict]: """ Collect all citations for a person's events using QueryService. Returns list of citation dicts with CitationID, SourceID, SourceName, CitationName, EventType. diff --git a/rmagent/agent/llm_provider.py b/rmagent/agent/llm_provider.py index 4f15b10..99ca3ed 100644 --- a/rmagent/agent/llm_provider.py +++ b/rmagent/agent/llm_provider.py @@ -86,9 +86,7 @@ def __init__( self.model = model self.default_max_tokens = default_max_tokens self.retry_config = retry_config or RetryConfig() - self.prompt_cost_per_1k, self.completion_cost_per_1k = ( - pricing_per_1k if pricing_per_1k else (0.0, 0.0) - ) + self.prompt_cost_per_1k, self.completion_cost_per_1k = pricing_per_1k if pricing_per_1k else (0.0, 0.0) def generate(self, prompt: str, **kwargs: Any) -> LLMResult: """Invoke provider with retry semantics.""" @@ -135,9 +133,7 @@ def _with_cost(self, result: LLMResult) -> LLMResult: def _invoke(self, prompt: str, **kwargs: Any) -> LLMResult: """Concrete providers implement this call.""" - def _log_debug( - self, prompt: str, result: LLMResult, elapsed: float, kwargs: dict[str, Any] - ) -> None: + def _log_debug(self, prompt: str, result: LLMResult, elapsed: float, kwargs: dict[str, Any]) -> None: debug_logger = logging.getLogger("rmagent.llm_debug") if not debug_logger.isEnabledFor(logging.DEBUG): return diff --git a/rmagent/agent/tools.py b/rmagent/agent/tools.py index 4097508..9dc8970 100644 --- a/rmagent/agent/tools.py +++ b/rmagent/agent/tools.py @@ -78,10 +78,7 @@ def __init__(self, query_service: QueryService): self.query_service = query_service def run(self, person_id: int, generations: int = 3): - return [ - dict(row) - for row in self.query_service.get_direct_ancestors(person_id, generations=generations) - ] + return [dict(row) for row in self.query_service.get_direct_ancestors(person_id, generations=generations)] @dataclass @@ -99,14 +96,8 @@ def run(self, person_a: int, person_b: int) -> dict[str, str | None]: if person_a == person_b: return {"relationship": "Same person"} - ancestors_a = { - row["PersonID"]: row - for row in self.query_service.get_direct_ancestors(person_a, generations=5) - } - ancestors_b = { - row["PersonID"]: row - for row in self.query_service.get_direct_ancestors(person_b, generations=5) - } + ancestors_a = {row["PersonID"]: row for row in self.query_service.get_direct_ancestors(person_a, generations=5)} + ancestors_b = {row["PersonID"]: row for row in self.query_service.get_direct_ancestors(person_b, generations=5)} shared = set(ancestors_a).intersection(ancestors_b) if not shared: @@ -137,8 +128,7 @@ def run(self): report = validator.run_all_checks() return { "totals_by_severity": { - k.value if hasattr(k, "value") else str(k): v - for k, v in report.totals_by_severity.items() + k.value if hasattr(k, "value") else str(k): v for k, v in report.totals_by_severity.items() }, "totals_by_category": report.totals_by_category, "issue_count": report.summary.get("issue_total", 0), diff --git a/rmagent/cli/commands/bio.py b/rmagent/cli/commands/bio.py index d8c7c75..96ebd68 100644 --- a/rmagent/cli/commands/bio.py +++ b/rmagent/cli/commands/bio.py @@ -99,7 +99,8 @@ def bio( }[citation_style.lower()] # Create generator and agent - config = ctx.load_config() + # Skip LLM credential validation if using template-based generation + config = ctx.load_config(require_llm_credentials=not no_ai) agent = ( None if no_ai diff --git a/rmagent/cli/commands/export.py b/rmagent/cli/commands/export.py index 174fada..807fa54 100644 --- a/rmagent/cli/commands/export.py +++ b/rmagent/cli/commands/export.py @@ -87,7 +87,7 @@ def hugo( }[bio_length.lower()] # Create exporter - config = ctx.load_config() + config = ctx.load_config(require_llm_credentials=False) exporter = HugoExporter( db=config.database.database_path, extension_path=config.database.sqlite_extension_path, @@ -100,9 +100,7 @@ def hugo( # Get all person IDs from rmagent.rmlib.database import RMDatabase - with RMDatabase( - config.database.database_path, extension_path=config.database.sqlite_extension_path - ) as db: + with RMDatabase(config.database.database_path, extension_path=config.database.sqlite_extension_path) as db: all_persons = db.query("SELECT PersonID FROM PersonTable") person_ids = [p["PersonID"] for p in all_persons] diff --git a/rmagent/cli/commands/person.py b/rmagent/cli/commands/person.py index ca1f76b..2973901 100644 --- a/rmagent/cli/commands/person.py +++ b/rmagent/cli/commands/person.py @@ -46,9 +46,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool raise click.Abort() # Display person header - name = ( - f"{_get_value(person_data, 'Given')} {_get_value(person_data, 'Surname')}".strip() - ) + name = f"{_get_value(person_data, 'Given')} {_get_value(person_data, 'Surname')}".strip() birth_year = _get_value(person_data, "BirthYear", "?") death_year = _get_value(person_data, "DeathYear", "?") console.print(f"\n[bold]📋 Person: {name}[/bold] ({birth_year}–{death_year})") @@ -68,9 +66,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool from rmagent.rmlib.parsers.date_parser import parse_rm_date date_str = _get_value(event, "Date") - formatted_date = ( - parse_rm_date(date_str).format_display() if date_str else "" - ) + formatted_date = parse_rm_date(date_str).format_display() if date_str else "" table.add_row( formatted_date, _get_value(event, "EventType"), @@ -89,15 +85,13 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool # Check for father if _get_value(parents_row, "FatherID"): father_name = ( - f"{_get_value(parents_row, 'FatherGiven')} " - f"{_get_value(parents_row, 'FatherSurname')}" + f"{_get_value(parents_row, 'FatherGiven')} " f"{_get_value(parents_row, 'FatherSurname')}" ).strip() console.print(f" • Father: {father_name}") # Check for mother if _get_value(parents_row, "MotherID"): mother_name = ( - f"{_get_value(parents_row, 'MotherGiven')} " - f"{_get_value(parents_row, 'MotherSurname')}" + f"{_get_value(parents_row, 'MotherGiven')} " f"{_get_value(parents_row, 'MotherSurname')}" ).strip() console.print(f" • Mother: {mother_name}") @@ -106,9 +100,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool if spouses: console.print("\n[bold]Spouses:[/bold]") for spouse in spouses: - spouse_name = ( - f"{_get_value(spouse, 'Given')} {_get_value(spouse, 'Surname')}".strip() - ) + spouse_name = f"{_get_value(spouse, 'Given')} {_get_value(spouse, 'Surname')}".strip() console.print(f" • {spouse_name}") # Get children @@ -116,9 +108,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool if children: console.print("\n[bold]Children:[/bold]") for child in children: - child_name = ( - f"{_get_value(child, 'Given')} {_get_value(child, 'Surname')}".strip() - ) + child_name = f"{_get_value(child, 'Given')} {_get_value(child, 'Surname')}".strip() console.print(f" • {child_name}") # Show ancestors if requested @@ -141,8 +131,7 @@ def person(ctx, person_id: int, events: bool, ancestors: bool, descendants: bool console.print("\n[bold]Descendants:[/bold] (4 generations)") for descendant in descendant_rows: descendant_name = ( - f"{_get_value(descendant, 'Given')} " - f"{_get_value(descendant, 'Surname')}" + f"{_get_value(descendant, 'Given')} " f"{_get_value(descendant, 'Surname')}" ).strip() gen = _get_value(descendant, "Generation", 1) indent = " " * gen diff --git a/rmagent/cli/commands/quality.py b/rmagent/cli/commands/quality.py index a53b50e..3bc6435 100644 --- a/rmagent/cli/commands/quality.py +++ b/rmagent/cli/commands/quality.py @@ -112,7 +112,7 @@ def quality( task = progress.add_task("Running data quality validation...", total=None) # Create generator - config = ctx.load_config() + config = ctx.load_config(require_llm_credentials=False) generator = QualityReportGenerator( db=config.database.database_path, extension_path=config.database.sqlite_extension_path, @@ -141,9 +141,7 @@ def quality( console.print() console.print(report_output) else: - console.print( - "[yellow]Warning:[/yellow] HTML and CSV formats require --output option" - ) + console.print("[yellow]Warning:[/yellow] HTML and CSV formats require --output option") except Exception as e: console.print(f"\n[red]Error:[/red] {e}") diff --git a/rmagent/cli/commands/search.py b/rmagent/cli/commands/search.py index 06d371f..421c5ee 100644 --- a/rmagent/cli/commands/search.py +++ b/rmagent/cli/commands/search.py @@ -1,6 +1,7 @@ """Search command - Search database by name or place.""" import re + import click from rich.console import Console from rich.table import Table @@ -22,9 +23,7 @@ def _get_value(row, key, default=""): def _get_surname_metaphone(db, surname: str) -> str | None: """Get Metaphone encoding for a surname from the database.""" # Query a sample name to get the Metaphone encoding - result = db.query_one( - "SELECT SurnameMP FROM NameTable WHERE Surname = ? COLLATE RMNOCASE LIMIT 1", (surname,) - ) + result = db.query_one("SELECT SurnameMP FROM NameTable WHERE Surname = ? COLLATE RMNOCASE LIMIT 1", (surname,)) return result["SurnameMP"] if result else None @@ -49,9 +48,9 @@ def _parse_name_variations(name: str, all_variants: list[str]) -> list[str]: return [name] # Extract brackets and base name (everything before first bracket) - bracket_pattern = r'\[([^\]]+)\]' + bracket_pattern = r"\[([^\]]+)\]" brackets = re.findall(bracket_pattern, name) - base_name = re.sub(bracket_pattern, '', name).strip() + base_name = re.sub(bracket_pattern, "", name).strip() if not brackets: return [name] @@ -200,9 +199,7 @@ def search( # Validate radius search options if kilometers is not None and miles is not None: - console.print( - "[red]Error:[/red] Cannot specify both --kilometers and --miles. Choose one." - ) + console.print("[red]Error:[/red] Cannot specify both --kilometers and --miles. Choose one.") raise click.Abort() radius_km = None @@ -221,9 +218,7 @@ def search( radius_unit = "mi" if radius_km is not None and not place: - console.print( - "[red]Error:[/red] Radius search requires --place to be specified" - ) + console.print("[red]Error:[/red] Radius search requires --place to be specified") raise click.Abort() with ctx.get_database() as db: @@ -232,7 +227,7 @@ def search( # Search by name if name: # Load config to get surname variants for [ALL] keyword - config = load_app_config(configure_logger=False) + config = load_app_config(configure_logger=False, require_llm_credentials=False) all_variants = config.search.surname_variants_all # Parse name variations (supports [variant] and [ALL] syntax) @@ -240,9 +235,7 @@ def search( # Show which variations are being searched if len(name_variations) > 1: - console.print( - f"[dim]Searching {len(name_variations)} name variations...[/dim]" - ) + console.print(f"[dim]Searching {len(name_variations)} name variations...[/dim]") # Collect results from all variations all_results = [] @@ -257,9 +250,7 @@ def search( # Single word - could be surname or given name # Try both try: - surname_results = queries.search_primary_names( - surname=name_parts[0], limit=limit - ) + surname_results = queries.search_primary_names(surname=name_parts[0], limit=limit) for r in surname_results: if r["PersonID"] not in seen_person_ids: all_results.append(r) @@ -267,9 +258,7 @@ def search( except ValueError: pass try: - given_results = queries.search_primary_names( - given=name_parts[0], limit=limit - ) + given_results = queries.search_primary_names(given=name_parts[0], limit=limit) for r in given_results: if r["PersonID"] not in seen_person_ids: all_results.append(r) @@ -313,15 +302,11 @@ def search( if len(variation.strip().split()) > 1: # Multi-word: Use word-based search (more precise) # This finds people where ALL words appear across name fields - variation_results = queries.search_names_by_words( - search_text=variation, limit=limit - ) + variation_results = queries.search_names_by_words(search_text=variation, limit=limit) else: # Single word: Use flexible search # This finds people where word appears in surname OR given name - variation_results = queries.search_names_flexible( - search_text=variation, limit=limit - ) + variation_results = queries.search_names_flexible(search_text=variation, limit=limit) # Add unique results for r in variation_results: @@ -347,9 +332,7 @@ def search( # Display name search results if results: - console.print( - f"\n[bold]🔍 Found {len(results)} person(s) matching '{name}':[/bold]" - ) + console.print(f"\n[bold]🔍 Found {len(results)} person(s) matching '{name}':[/bold]") console.print("─" * 60) table = Table(show_header=True, header_style="bold cyan") @@ -420,9 +403,7 @@ def search( ) if radius_results: - console.print( - f"\n[bold]🌍 Found {len(radius_results)} place(s) within radius:[/bold]" - ) + console.print(f"\n[bold]🌍 Found {len(radius_results)} place(s) within radius:[/bold]") table = Table(show_header=True, header_style="bold cyan") table.add_column("ID", style="dim", width=8) @@ -461,9 +442,7 @@ def search( else: # Standard place search (no radius) if place_results: - console.print( - f"\n[bold]📍 Found {len(place_results)} place(s) matching '{place}':[/bold]" - ) + console.print(f"\n[bold]📍 Found {len(place_results)} place(s) matching '{place}':[/bold]") console.print("─" * 60) table = Table(show_header=True, header_style="bold cyan") diff --git a/rmagent/cli/commands/timeline.py b/rmagent/cli/commands/timeline.py index 22d62f4..4ad35eb 100644 --- a/rmagent/cli/commands/timeline.py +++ b/rmagent/cli/commands/timeline.py @@ -70,7 +70,7 @@ def timeline( task = progress.add_task(f"Generating timeline for person {person_id}...", total=None) # Create generator - config = ctx.load_config() + config = ctx.load_config(require_llm_credentials=False) generator = TimelineGenerator( db=config.database.database_path, extension_path=config.database.sqlite_extension_path, diff --git a/rmagent/cli/main.py b/rmagent/cli/main.py index e72045e..553bfdd 100644 --- a/rmagent/cli/main.py +++ b/rmagent/cli/main.py @@ -36,10 +36,14 @@ def __init__( self.config = None self.db = None - def load_config(self): - """Load application configuration.""" + def load_config(self, require_llm_credentials: bool = True): + """Load application configuration. + + Args: + require_llm_credentials: When True, validate LLM provider credentials. + """ if not self.config: - self.config = load_app_config() + self.config = load_app_config(require_llm_credentials=require_llm_credentials) # Override with CLI options if provided if self.database_path: self.config.database.database_path = self.database_path @@ -50,7 +54,8 @@ def load_config(self): def get_database(self) -> RMDatabase: """Get database connection (creates if needed).""" if not self.db: - config = self.load_config() + # Database access doesn't require LLM credentials + config = self.load_config(require_llm_credentials=False) db_path = config.database.database_path if not db_path: raise click.UsageError( @@ -154,11 +159,11 @@ def completion(shell: str): # For fish rmagent completion fish """ - shell_upper = shell.upper() prog_name = "rmagent" if shell == "zsh": - click.echo(f"""# Add this to your ~/.zshrc: + click.echo( + f"""# Add this to your ~/.zshrc: eval "$(_RMAGENT_COMPLETE=zsh_source {prog_name})" # Or generate and save the completion script: @@ -166,23 +171,29 @@ def completion(shell: str): # Then add this to ~/.zshrc: fpath=(~/.zfunc $fpath) autoload -Uz compinit && compinit -""") +""" + ) elif shell == "bash": - click.echo(f"""# Add this to your ~/.bashrc: + click.echo( + f"""# Add this to your ~/.bashrc: eval "$(_RMAGENT_COMPLETE=bash_source {prog_name})" # Or generate and save the completion script: _RMAGENT_COMPLETE=bash_source {prog_name} > ~/.bash_completion.d/{prog_name} # Then add this to ~/.bashrc: source ~/.bash_completion.d/{prog_name} -""") +""" + ) elif shell == "fish": - click.echo(f"""# Add this to ~/.config/fish/completions/{prog_name}.fish: + click.echo( + f"""# Add this to ~/.config/fish/completions/{prog_name}.fish: _RMAGENT_COMPLETE=fish_source {prog_name} | source # Or generate and save the completion script: _RMAGENT_COMPLETE=fish_source {prog_name} > ~/.config/fish/completions/{prog_name}.fish -""") +""" + ) + cli.add_command(person.person) cli.add_command(bio.bio) diff --git a/rmagent/config/config.py b/rmagent/config/config.py index e7aa005..4036357 100644 --- a/rmagent/config/config.py +++ b/rmagent/config/config.py @@ -90,9 +90,7 @@ class LLMSettings(BaseModel): def check_provider(cls, provider: str) -> str: provider_lower = provider.lower() if provider_lower not in cls.allowed_providers: - raise ValueError( - f"Unknown provider '{provider}'. Allowed: {sorted(cls.allowed_providers)}" - ) + raise ValueError(f"Unknown provider '{provider}'. Allowed: {sorted(cls.allowed_providers)}") return provider_lower def ensure_credentials(self) -> None: @@ -173,9 +171,7 @@ class CitationSettings(BaseModel): def check_style(cls, style: str) -> str: style_lower = style.lower() if style_lower not in cls.allowed_styles: - raise ValueError( - f"Invalid citation style '{style}'. Allowed: {sorted(cls.allowed_styles)}" - ) + raise ValueError(f"Invalid citation style '{style}'. Allowed: {sorted(cls.allowed_styles)}") return style_lower @@ -306,6 +302,7 @@ def load_app_config( env_path: Path | None = None, auto_create_dirs: bool = True, configure_logger: bool = True, + require_llm_credentials: bool = True, ) -> AppConfig: """ Load application configuration. @@ -314,6 +311,7 @@ def load_app_config( env_path: Optional path to a .env file. Defaults to config/.env when not provided. auto_create_dirs: When True, create output/export directories. configure_logger: When True, configure global logging handlers. + require_llm_credentials: When True, validate LLM provider credentials. """ if env_path is None: env_path = DEFAULT_ENV_PATH @@ -336,9 +334,7 @@ def load_app_config( media_root = _env("RM_MEDIA_ROOT_DIRECTORY") database_settings = DatabaseSettings( database_path=Path(_env("RM_DATABASE_PATH", "data/Iiams.rmtree")), - sqlite_extension_path=Path( - _env("SQLITE_ICU_EXTENSION", "./sqlite-extension/icu.dylib") - ), + sqlite_extension_path=Path(_env("SQLITE_ICU_EXTENSION", "./sqlite-extension/icu.dylib")), media_root_directory=Path(media_root) if media_root else None, ) @@ -361,9 +357,7 @@ def load_app_config( ) search_settings = SearchSettings( - surname_variants_all=_env( - "SURNAME_VARIANTS_ALL", "Iams,Iames,Iiams,Iiames,Ijams,Ijames,Imes,Eimes" - ), + surname_variants_all=_env("SURNAME_VARIANTS_ALL", "Iams,Iames,Iiams,Iiames,Ijams,Ijames,Imes,Eimes"), ) logging_settings = LoggingSettings( @@ -391,10 +385,11 @@ def load_app_config( if configure_logger: configure_logging(config.logging) - try: - config.llm.ensure_credentials() - except ValueError as exc: - raise LLMError(str(exc)) from exc + if require_llm_credentials: + try: + config.llm.ensure_credentials() + except ValueError as exc: + raise LLMError(str(exc)) from exc return config diff --git a/rmagent/generators/biography.py b/rmagent/generators/biography.py index 50b1913..b6a91f0 100644 --- a/rmagent/generators/biography.py +++ b/rmagent/generators/biography.py @@ -8,11 +8,11 @@ from __future__ import annotations +import time from dataclasses import dataclass, field -from datetime import datetime, timezone +from datetime import UTC, datetime from enum import Enum from pathlib import Path -import time from rmagent.agent.genealogy_agent import GenealogyAgent from rmagent.rmlib.database import RMDatabase @@ -141,7 +141,7 @@ class Biography: sources: str # Metadata - generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc).astimezone()) + generated_at: datetime = field(default_factory=lambda: datetime.now(UTC).astimezone()) word_count: int = 0 privacy_applied: bool = False birth_year: int | None = None @@ -153,17 +153,19 @@ class Biography: def _calculate_word_count(self) -> int: """Calculate word count from all biography sections.""" - all_text = "\n".join([ - self.introduction, - self.early_life, - self.education, - self.career, - self.marriage_family, - self.later_life, - self.death_legacy, - self.footnotes, - self.sources, - ]) + all_text = "\n".join( + [ + self.introduction, + self.early_life, + self.education, + self.career, + self.marriage_family, + self.later_life, + self.death_legacy, + self.footnotes, + self.sources, + ] + ) return len(all_text.split()) @staticmethod @@ -198,26 +200,26 @@ def render_metadata(self) -> str: tz_str = self.generated_at.strftime("%z") tz_formatted = f"{tz_str[:3]}:{tz_str[3:]}" if tz_str else "" date_str = self.generated_at.strftime("%Y-%m-%dT%H:%M:%S") + tz_formatted - lines.append(f'Date: {date_str}') + lines.append(f"Date: {date_str}") # Person ID - lines.append(f'PersonID: {self.person_id}') + lines.append(f"PersonID: {self.person_id}") # LLM Metadata (if available) if self.llm_metadata: - lines.append(f'TokensIn: {self._format_tokens(self.llm_metadata.prompt_tokens)}') - lines.append(f'TokensOut: {self._format_tokens(self.llm_metadata.completion_tokens)}') - lines.append(f'TotalTokens: {self._format_tokens(self.llm_metadata.total_tokens)}') - lines.append(f'LLM: {self.llm_metadata.provider.capitalize()}') - lines.append(f'Model: {self.llm_metadata.model}') - lines.append(f'PromptTime: {self._format_duration(self.llm_metadata.prompt_time)}') - lines.append(f'LLMTime: {self._format_duration(self.llm_metadata.llm_time)}') + lines.append(f"TokensIn: {self._format_tokens(self.llm_metadata.prompt_tokens)}") + lines.append(f"TokensOut: {self._format_tokens(self.llm_metadata.completion_tokens)}") + lines.append(f"TotalTokens: {self._format_tokens(self.llm_metadata.total_tokens)}") + lines.append(f"LLM: {self.llm_metadata.provider.capitalize()}") + lines.append(f"Model: {self.llm_metadata.model}") + lines.append(f"PromptTime: {self._format_duration(self.llm_metadata.prompt_time)}") + lines.append(f"LLMTime: {self._format_duration(self.llm_metadata.llm_time)}") # Biography stats (calculate word count dynamically) word_count = self._calculate_word_count() - lines.append(f'Words: {word_count:,}') - lines.append(f'Citations: {self.citation_count}') - lines.append(f'Sources: {self.source_count}') + lines.append(f"Words: {word_count:,}") + lines.append(f"Citations: {self.citation_count}") + lines.append(f"Sources: {self.source_count}") lines.append("---\n") return "\n".join(lines) @@ -243,7 +245,7 @@ def render_markdown(self, include_metadata: bool = True) -> str: additional_images = [] if self.length != BiographyLength.SHORT and self.media_files: for media in self.media_files: - is_primary = media.get("IsPrimary", 0) == 1 if hasattr(media, 'get') else media["IsPrimary"] == 1 + is_primary = media.get("IsPrimary", 0) == 1 if hasattr(media, "get") else media["IsPrimary"] == 1 if is_primary and primary_image is None: primary_image = media elif not is_primary: @@ -256,9 +258,14 @@ def render_markdown(self, include_metadata: bool = True) -> str: # Add primary portrait image with text wrapping (if available) if primary_image: from pathlib import Path + # Format the media path - media_path = primary_image.get("MediaPath", "") if hasattr(primary_image, 'get') else primary_image["MediaPath"] - media_file = primary_image.get("MediaFile", "") if hasattr(primary_image, 'get') else primary_image["MediaFile"] + if hasattr(primary_image, "get"): + media_path = primary_image.get("MediaPath", "") + media_file = primary_image.get("MediaFile", "") + else: + media_path = primary_image["MediaPath"] + media_file = primary_image["MediaFile"] # Strip RootsMagic's ?\ or ?/ prefix if present if media_path.startswith("?\\"): @@ -329,9 +336,10 @@ def render_markdown(self, include_metadata: bool = True) -> str: sections.append("## Photos\n") for media in additional_images: from pathlib import Path + # Format the media path - media_path = media.get("MediaPath", "") if hasattr(media, 'get') else media["MediaPath"] - media_file = media.get("MediaFile", "") if hasattr(media, 'get') else media["MediaFile"] + media_path = media.get("MediaPath", "") if hasattr(media, "get") else media["MediaPath"] + media_file = media.get("MediaFile", "") if hasattr(media, "get") else media["MediaFile"] # Strip RootsMagic's ?\ or ?/ prefix if present if media_path.startswith("?\\"): @@ -545,9 +553,7 @@ def generate( if use_ai and self.agent: biography = self._generate_with_ai(context, length, citation_style, include_sources) else: - biography = self._generate_template_based( - context, length, citation_style, include_sources - ) + biography = self._generate_template_based(context, length, citation_style, include_sources) return biography @@ -580,12 +586,8 @@ def _extract(db: RMDatabase) -> PersonContext: is_living = age < 110 # Extract birth/death information - birth_date_str, birth_place = self._extract_vital_info( - db, person_id, fact_type_id=1 - ) # Birth - death_date_str, death_place = self._extract_vital_info( - db, person_id, fact_type_id=2 - ) # Death + birth_date_str, birth_place = self._extract_vital_info(db, person_id, fact_type_id=1) # Birth + death_date_str, death_place = self._extract_vital_info(db, person_id, fact_type_id=2) # Death # Get relationships parents = query.get_parents(person_id) @@ -677,9 +679,7 @@ def _extract(db: RMDatabase) -> PersonContext: else: raise ValueError("No database provided") - def _extract_vital_info( - self, db: RMDatabase, person_id: int, fact_type_id: int - ) -> tuple[str | None, str | None]: + def _extract_vital_info(self, db: RMDatabase, person_id: int, fact_type_id: int) -> tuple[str | None, str | None]: """Extract date and place for a vital event (birth/death).""" query = QueryService(db) vital_events = query.get_vital_events(person_id) @@ -709,9 +709,7 @@ def _extract_vital_info( return None, None - def _categorize_events( - self, db: RMDatabase, events: list[dict] - ) -> tuple[list[EventContext], ...]: + def _categorize_events(self, db: RMDatabase, events: list[dict]) -> tuple[list[EventContext], ...]: """Categorize events into vital, education, occupation, military, residence, and other.""" vital = [] education = [] @@ -910,8 +908,8 @@ def _generate_with_ai( # Extract LLM metadata from result llm_metadata = None - if hasattr(self.agent, 'llm_provider'): - provider_name = self.agent.llm_provider.__class__.__name__.replace('Provider', '').lower() + if hasattr(self.agent, "llm_provider"): + provider_name = self.agent.llm_provider.__class__.__name__.replace("Provider", "").lower() llm_metadata = LLMMetadata( provider=provider_name, model=result.model, @@ -919,7 +917,7 @@ def _generate_with_ai( completion_tokens=result.usage.completion_tokens, total_tokens=result.usage.total_tokens, prompt_time=total_time * 0.1, # Estimate ~10% for prompt building - llm_time=total_time * 0.9, # Estimate ~90% for LLM + llm_time=total_time * 0.9, # Estimate ~90% for LLM cost=result.cost, ) @@ -932,9 +930,7 @@ def _generate_with_ai( if citation_style == CitationStyle.FOOTNOTE: # Process {cite:ID} markers in full response (preserves section headers) - modified_text, footnotes, tracker = self._process_citations_in_text( - response_text, context.all_citations - ) + modified_text, footnotes, tracker = self._process_citations_in_text(response_text, context.all_citations) # Use modified text for section parsing response_text = modified_text @@ -1267,7 +1263,7 @@ def _strip_source_type_prefix(source_name: str) -> str: for prefix in prefixes: if source_name.startswith(prefix): - return source_name[len(prefix):] + return source_name[len(prefix) :] return source_name @@ -1436,7 +1432,6 @@ def _generate_bibliography_from_fields(self, citation: dict) -> str: First checks for pre-formatted Bibliography field, then constructs from individual fields. Returns source name with WARNING only if all approaches fail. """ - source_id = _get_row_value(citation, "SourceID", 0) source_name = _get_row_value(citation, "SourceName", "[Unknown Source]") fields_blob = _get_row_value(citation, "SourceFields") @@ -1535,9 +1530,7 @@ def _process_citations_in_text( return modified_text, footnotes, tracker - def _generate_footnotes_section( - self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker - ) -> str: + def _generate_footnotes_section(self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker) -> str: """ Generate footnotes section with numbered entries. First citation per source uses full footnote, subsequent use short. diff --git a/rmagent/generators/biography/citations.py b/rmagent/generators/biography/citations.py index 3d1eb28..65346be 100644 --- a/rmagent/generators/biography/citations.py +++ b/rmagent/generators/biography/citations.py @@ -49,7 +49,7 @@ def strip_source_type_prefix(source_name: str) -> str: for prefix in prefixes: if source_name.startswith(prefix): - return source_name[len(prefix):] + return source_name[len(prefix) :] return source_name @@ -162,7 +162,6 @@ def _generate_bibliography_from_fields(self, citation: dict) -> str: First checks for pre-formatted Bibliography field, then constructs from individual fields. Returns source name with WARNING only if all approaches fail. """ - source_id = get_row_value(citation, "SourceID", 0) source_name = get_row_value(citation, "SourceName", "[Unknown Source]") fields_blob = get_row_value(citation, "SourceFields") @@ -259,9 +258,7 @@ def process_citations_in_text( return modified_text, footnotes, tracker - def generate_footnotes_section( - self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker - ) -> str: + def generate_footnotes_section(self, footnotes: list[tuple[int, CitationInfo]], tracker: CitationTracker) -> str: """ Generate footnotes section with numbered entries and 3-character indent. First citation per source uses full footnote, subsequent use short. diff --git a/rmagent/generators/biography/generator.py b/rmagent/generators/biography/generator.py index 10905cf..deb4272 100644 --- a/rmagent/generators/biography/generator.py +++ b/rmagent/generators/biography/generator.py @@ -6,9 +6,9 @@ from __future__ import annotations +import time from datetime import datetime from pathlib import Path -import time from rmagent.agent.genealogy_agent import GenealogyAgent from rmagent.rmlib.database import RMDatabase @@ -18,6 +18,7 @@ from rmagent.rmlib.parsers.place_parser import format_place_medium, format_place_short from rmagent.rmlib.queries import QueryService +from .citations import CitationProcessor from .models import ( Biography, BiographyLength, @@ -27,7 +28,6 @@ PersonContext, get_row_value, ) -from .citations import CitationProcessor from .templates import BiographyTemplates @@ -141,9 +141,7 @@ def generate( if use_ai and self.agent: biography = self._generate_with_ai(context, length, citation_style, include_sources) else: - biography = self._generate_template_based( - context, length, citation_style, include_sources - ) + biography = self._generate_template_based(context, length, citation_style, include_sources) return biography @@ -176,12 +174,8 @@ def _extract(db: RMDatabase) -> PersonContext: is_living = age < 110 # Extract birth/death information - birth_date_str, birth_place = self._extract_vital_info( - db, person_id, fact_type_id=1 - ) # Birth - death_date_str, death_place = self._extract_vital_info( - db, person_id, fact_type_id=2 - ) # Death + birth_date_str, birth_place = self._extract_vital_info(db, person_id, fact_type_id=1) # Birth + death_date_str, death_place = self._extract_vital_info(db, person_id, fact_type_id=2) # Death # Get relationships parents = query.get_parents(person_id) @@ -273,9 +267,7 @@ def _extract(db: RMDatabase) -> PersonContext: else: raise ValueError("No database provided") - def _extract_vital_info( - self, db: RMDatabase, person_id: int, fact_type_id: int - ) -> tuple[str | None, str | None]: + def _extract_vital_info(self, db: RMDatabase, person_id: int, fact_type_id: int) -> tuple[str | None, str | None]: """Extract date and place for a vital event (birth/death).""" query = QueryService(db) vital_events = query.get_vital_events(person_id) @@ -305,9 +297,7 @@ def _extract_vital_info( return None, None - def _categorize_events( - self, db: RMDatabase, events: list[dict] - ) -> tuple[list[EventContext], ...]: + def _categorize_events(self, db: RMDatabase, events: list[dict]) -> tuple[list[EventContext], ...]: """Categorize events into vital, education, occupation, military, residence, and other.""" vital = [] education = [] @@ -471,8 +461,8 @@ def _generate_with_ai( # Extract LLM metadata from result llm_metadata = None - if hasattr(self.agent, 'llm_provider'): - provider_name = self.agent.llm_provider.__class__.__name__.replace('Provider', '').lower() + if hasattr(self.agent, "llm_provider"): + provider_name = self.agent.llm_provider.__class__.__name__.replace("Provider", "").lower() llm_metadata = LLMMetadata( provider=provider_name, model=result.model, @@ -480,7 +470,7 @@ def _generate_with_ai( completion_tokens=result.usage.completion_tokens, total_tokens=result.usage.total_tokens, prompt_time=total_time * 0.1, # Estimate ~10% for prompt building - llm_time=total_time * 0.9, # Estimate ~90% for LLM + llm_time=total_time * 0.9, # Estimate ~90% for LLM cost=result.cost, ) diff --git a/rmagent/generators/biography/models.py b/rmagent/generators/biography/models.py index f164435..bb8a277 100644 --- a/rmagent/generators/biography/models.py +++ b/rmagent/generators/biography/models.py @@ -7,8 +7,9 @@ from __future__ import annotations from dataclasses import dataclass, field -from datetime import datetime, timezone +from datetime import UTC, datetime from enum import Enum +from pathlib import Path class BiographyLength(str, Enum): @@ -129,7 +130,7 @@ class Biography: sources: str # Metadata - generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc).astimezone()) + generated_at: datetime = field(default_factory=lambda: datetime.now(UTC).astimezone()) word_count: int = 0 privacy_applied: bool = False birth_year: int | None = None @@ -138,7 +139,7 @@ class Biography: citation_count: int = 0 source_count: int = 0 media_files: list[dict] = field(default_factory=list) # Media files for images - media_root_directory: "Path | None" = None # Root directory for media files (replaces ? in MediaPath) + media_root_directory: Path | None = None # Root directory for media files (replaces ? in MediaPath) def calculate_word_count(self) -> int: """ @@ -146,21 +147,24 @@ def calculate_word_count(self) -> int: Excludes front matter, footnotes, and sources sections. """ - all_text = "\n".join([ - self.introduction, - self.early_life, - self.education, - self.career, - self.marriage_family, - self.later_life, - self.death_legacy, - ]) + all_text = "\n".join( + [ + self.introduction, + self.early_life, + self.education, + self.career, + self.marriage_family, + self.later_life, + self.death_legacy, + ] + ) return len(all_text.split()) def render_markdown(self, include_metadata: bool = True) -> str: """Render complete biography as Markdown with optional front matter.""" # Import here to avoid circular dependency from .rendering import BiographyRenderer + renderer = BiographyRenderer(media_root_directory=self.media_root_directory) return renderer.render_markdown(self, include_metadata) @@ -168,6 +172,7 @@ def render_metadata(self) -> str: """Render Hugo-style front matter metadata.""" # Import here to avoid circular dependency from .rendering import BiographyRenderer + renderer = BiographyRenderer(media_root_directory=self.media_root_directory) return renderer.render_metadata(self) diff --git a/rmagent/generators/biography/rendering.py b/rmagent/generators/biography/rendering.py index ba4d9c7..c094c0d 100644 --- a/rmagent/generators/biography/rendering.py +++ b/rmagent/generators/biography/rendering.py @@ -55,26 +55,26 @@ def render_metadata(self, bio: Biography) -> str: tz_str = bio.generated_at.strftime("%z") tz_formatted = f"{tz_str[:3]}:{tz_str[3:]}" if tz_str else "" date_str = bio.generated_at.strftime("%Y-%m-%dT%H:%M:%S") + tz_formatted - lines.append(f'Date: {date_str}') + lines.append(f"Date: {date_str}") # Person ID - lines.append(f'PersonID: {bio.person_id}') + lines.append(f"PersonID: {bio.person_id}") # LLM Metadata (if available) if bio.llm_metadata: - lines.append(f'TokensIn: {self.format_tokens(bio.llm_metadata.prompt_tokens)}') - lines.append(f'TokensOut: {self.format_tokens(bio.llm_metadata.completion_tokens)}') - lines.append(f'TotalTokens: {self.format_tokens(bio.llm_metadata.total_tokens)}') - lines.append(f'LLM: {bio.llm_metadata.provider.capitalize()}') - lines.append(f'Model: {bio.llm_metadata.model}') - lines.append(f'PromptTime: {self.format_duration(bio.llm_metadata.prompt_time)}') - lines.append(f'LLMTime: {self.format_duration(bio.llm_metadata.llm_time)}') + lines.append(f"TokensIn: {self.format_tokens(bio.llm_metadata.prompt_tokens)}") + lines.append(f"TokensOut: {self.format_tokens(bio.llm_metadata.completion_tokens)}") + lines.append(f"TotalTokens: {self.format_tokens(bio.llm_metadata.total_tokens)}") + lines.append(f"LLM: {bio.llm_metadata.provider.capitalize()}") + lines.append(f"Model: {bio.llm_metadata.model}") + lines.append(f"PromptTime: {self.format_duration(bio.llm_metadata.prompt_time)}") + lines.append(f"LLMTime: {self.format_duration(bio.llm_metadata.llm_time)}") # Biography stats (calculate word count dynamically) word_count = bio.calculate_word_count() - lines.append(f'Words: {word_count:,}') - lines.append(f'Citations: {bio.citation_count}') - lines.append(f'Sources: {bio.source_count}') + lines.append(f"Words: {word_count:,}") + lines.append(f"Citations: {bio.citation_count}") + lines.append(f"Sources: {bio.source_count}") lines.append("---\n") return "\n".join(lines) @@ -124,18 +124,28 @@ def render_markdown(self, bio: Biography, include_metadata: bool = True) -> str: db_caption = primary_image["Caption"] if "Caption" in primary_image.keys() else "" except (AttributeError, TypeError): db_caption = "" - caption = db_caption if db_caption else self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year) - alt_text = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year) # Always use name/dates for alt text + if db_caption: + caption = db_caption + else: + caption = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year) + # Always use name/dates for alt text + alt_text = self._format_image_caption(bio.full_name, bio.birth_year, bio.death_year) sections.append('
') sections.append('
') - sections.append(f' {alt_text}') - sections.append(f'

{caption}

') - sections.append('
') + sections.append( + f' {alt_text}' + ) + sections.append( + f'

{caption}

' + ) + sections.append("
") sections.append('
') - sections.append(f' {bio.introduction}') - sections.append('
') - sections.append('\n') + sections.append(f" {bio.introduction}") + sections.append(" ") + sections.append("\n") else: sections.append(bio.introduction) diff --git a/rmagent/generators/biography/templates.py b/rmagent/generators/biography/templates.py index cbdf0e9..f90eec1 100644 --- a/rmagent/generators/biography/templates.py +++ b/rmagent/generators/biography/templates.py @@ -6,10 +6,11 @@ from __future__ import annotations -from .models import PersonContext, get_row_value from rmagent.rmlib.parsers.date_parser import is_unknown_date, parse_rm_date from rmagent.rmlib.parsers.name_parser import format_full_name +from .models import PersonContext, get_row_value + class BiographyTemplates: """Generates biography sections using templates (no AI).""" diff --git a/rmagent/generators/hugo_exporter.py b/rmagent/generators/hugo_exporter.py index c6dd1ae..d25a366 100644 --- a/rmagent/generators/hugo_exporter.py +++ b/rmagent/generators/hugo_exporter.py @@ -529,9 +529,7 @@ def _build_index(db: RMDatabase) -> str: lines.append(f"- [{person['name']}]({person['slug']}/){lifespan}") lines.append("") - lines.append( - f"*{len(people)} biographies • Generated {datetime.now().strftime('%Y-%m-%d')}*" - ) + lines.append(f"*{len(people)} biographies • Generated {datetime.now().strftime('%Y-%m-%d')}*") return "\n".join(lines) diff --git a/rmagent/generators/quality_report.py b/rmagent/generators/quality_report.py index da84822..9fa84eb 100644 --- a/rmagent/generators/quality_report.py +++ b/rmagent/generators/quality_report.py @@ -153,15 +153,11 @@ def _apply_filters( # Apply category filter if category_filter: - filtered_issues = [ - issue for issue in filtered_issues if issue.category == category_filter - ] + filtered_issues = [issue for issue in filtered_issues if issue.category == category_filter] # Apply severity filter if severity_filter: - filtered_issues = [ - issue for issue in filtered_issues if issue.severity == severity_filter - ] + filtered_issues = [issue for issue in filtered_issues if issue.severity == severity_filter] # Recalculate totals for filtered issues totals_by_severity = { @@ -320,10 +316,7 @@ def _format_html(self, report: QualityReport) -> str: " body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Arial, " "sans-serif; margin: 40px; }" ) - lines.append( - " h1 { color: #333; border-bottom: 2px solid #4CAF50; " - "padding-bottom: 10px; }" - ) + lines.append(" h1 { color: #333; border-bottom: 2px solid #4CAF50; " "padding-bottom: 10px; }") lines.append(" h2 { color: #555; margin-top: 30px; }") lines.append(" h3 { color: #666; }") lines.append( @@ -338,10 +331,7 @@ def _format_html(self, report: QualityReport) -> str: " .issue { background-color: #fff; border: 1px solid #ddd; padding: 15px; " "margin: 15px 0; border-radius: 4px; }" ) - lines.append( - " .issue-header { font-weight: bold; font-size: 1.1em; " - "margin-bottom: 10px; }" - ) + lines.append(" .issue-header { font-weight: bold; font-size: 1.1em; " "margin-bottom: 10px; }") lines.append(" .metadata { color: #666; font-size: 0.9em; }") lines.append(" .samples { margin-top: 10px; }") lines.append(" .sample { margin: 5px 0; padding-left: 20px; }") @@ -354,24 +344,16 @@ def _format_html(self, report: QualityReport) -> str: # Content lines.append("

Data Quality Report

") - lines.append( - f"

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

" - ) + lines.append(f"

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

") # Summary lines.append("
") lines.append("

Summary Statistics

") lines.append(" ") lines.append(" ") - lines.append( - f" " - ) - lines.append( - f" " - ) - lines.append( - f" " - ) + lines.append(f" ") + lines.append(f" ") + lines.append(f" ") lines.append( f" " ) @@ -409,9 +391,7 @@ def _format_html(self, report: QualityReport) -> str: severity_issues = [issue for issue in report.issues if issue.severity == severity] if severity_issues: css_class = severity.value - lines.append( - f"

{severity.value.capitalize()} Issues

" - ) + lines.append(f"

{severity.value.capitalize()} Issues

") for issue in severity_issues: lines.append("
") @@ -423,9 +403,7 @@ def _format_html(self, report: QualityReport) -> str: lines.append(f"

{issue.description}

") if issue.samples: - lines.append( - "
Sample Issues:
    " - ) + lines.append("
    Sample Issues:
      ") for sample in issue.samples[: self.sample_limit]: sample_text = self._format_sample_html(sample) lines.append(f"
    • {sample_text}
    • ") diff --git a/rmagent/generators/timeline.py b/rmagent/generators/timeline.py index 1b3e22e..744defe 100644 --- a/rmagent/generators/timeline.py +++ b/rmagent/generators/timeline.py @@ -234,9 +234,7 @@ def _extract(db: RMDatabase) -> dict: continue # Build timeline event - timeline_event = self._build_timeline_event( - db, event, person_id, birth_year, group_by_phase - ) + timeline_event = self._build_timeline_event(db, event, person_id, birth_year, group_by_phase) if timeline_event: timeline_events.append(timeline_event) @@ -286,9 +284,7 @@ def _build_timeline_event( place_formatted = self._format_place_for_timeline(place_str) # Build narrative text - narrative = self._build_event_narrative( - event_type_name, display_date, place_formatted, details - ) + narrative = self._build_event_narrative(event_type_name, display_date, place_formatted, details) # Get media media = self._get_event_media(db, event_id) @@ -330,9 +326,7 @@ def _build_timeline_event( return timeline_event - def _parse_date_to_timelinejs( - self, rm_date: str - ) -> tuple[dict | None, dict | None, str | None]: + def _parse_date_to_timelinejs(self, rm_date: str) -> tuple[dict | None, dict | None, str | None]: """Parse RM11 date to TimelineJS3 format.""" # Check if date string is null/unknown (empty or starts with ".") if not rm_date or rm_date.startswith("."): @@ -425,11 +419,7 @@ def _get_event_type_name(self, db: RMDatabase, event_type_id: int) -> str: """Get event type name from FactTypeTable.""" cursor = db.execute("SELECT Name FROM FactTypeTable WHERE FactTypeID = ?", (event_type_id,)) row = cursor.fetchone() - return ( - _get_row_value(row, "Name", f"Event {event_type_id}") - if row - else f"Event {event_type_id}" - ) + return _get_row_value(row, "Name", f"Event {event_type_id}") if row else f"Event {event_type_id}" def _get_event_media(self, db: RMDatabase, event_id: int) -> dict | None: """Get primary media for an event.""" diff --git a/rmagent/rmlib/database.py b/rmagent/rmlib/database.py index 7613c19..1c801b5 100644 --- a/rmagent/rmlib/database.py +++ b/rmagent/rmlib/database.py @@ -145,9 +145,7 @@ def _load_rmnocase_collation(self) -> None: # - caseLevel=off: Ignore case differences # - normalization=on: Normalize Unicode characters self._conn.execute( - "SELECT icu_load_collation(" - "'en_US@colStrength=primary;caseLevel=off;normalization=on'," - "'RMNOCASE')" + "SELECT icu_load_collation(" "'en_US@colStrength=primary;caseLevel=off;normalization=on'," "'RMNOCASE')" ) logger.debug("RMNOCASE collation registered successfully") finally: @@ -173,9 +171,7 @@ def connection(self) -> sqlite3.Connection: DatabaseError: If no active connection """ if self._conn is None: - raise DatabaseError( - "No active connection - use 'with RMDatabase(...)' or call connect()" - ) + raise DatabaseError("No active connection - use 'with RMDatabase(...)' or call connect()") return self._conn def execute(self, query: str, params: tuple | None = None) -> sqlite3.Cursor: diff --git a/rmagent/rmlib/models.py b/rmagent/rmlib/models.py index 04f1c1f..4f9c720 100644 --- a/rmagent/rmlib/models.py +++ b/rmagent/rmlib/models.py @@ -115,28 +115,18 @@ class Person(RMBaseModel): """ person_id: int = Field(..., alias="PersonID", description="Unique person identifier") - unique_id: str | None = Field( - None, alias="UniqueID", description="36-character hexadecimal unique ID" - ) + unique_id: str | None = Field(None, alias="UniqueID", description="36-character hexadecimal unique ID") sex: Sex = Field(..., alias="Sex", description="Person's sex/gender") parent_id: int = Field(0, alias="ParentID", description="FamilyID of parents (0 = no parents)") spouse_id: int = Field(0, alias="SpouseID", description="FamilyID of spouse (0 = no spouse)") - color: int = Field( - 0, alias="Color", ge=0, le=27, description="Color coding (0=None, 1-27=specific colors)" - ) - relate1: int = Field( - 0, ge=0, le=999, alias="Relate1", description="Generations to Most Recent Common Ancestor" - ) - relate2: int = Field( - 0, ge=0, alias="Relate2", description="Generations from reference person to MRCA" - ) + color: int = Field(0, alias="Color", ge=0, le=27, description="Color coding (0=None, 1-27=specific colors)") + relate1: int = Field(0, ge=0, le=999, alias="Relate1", description="Generations to Most Recent Common Ancestor") + relate2: int = Field(0, ge=0, alias="Relate2", description="Generations from reference person to MRCA") flags: int = Field(0, ge=0, le=10, alias="Flags", description="Relationship prefix descriptor") living: bool = Field(False, alias="Living", description="True if person is living") is_private: int = Field(0, alias="IsPrivate", description="Privacy flag (not implemented)") proof: int = Field(0, alias="Proof", description="Proof level (not implemented)") - bookmark: int = Field( - 0, alias="Bookmark", description="Bookmark flag (0=not bookmarked, 1=bookmarked)" - ) + bookmark: int = Field(0, alias="Bookmark", description="Bookmark flag (0=not bookmarked, 1=bookmarked)") note: str | None = Field(None, alias="Note", description="User-defined notes") @field_validator("sex", mode="before") @@ -168,43 +158,25 @@ class Name(RMBaseModel): surname: str | None = Field(None, alias="Surname", description="Surname/family name") given: str | None = Field(None, alias="Given", description="Given/first name") prefix: str | None = Field(None, alias="Prefix", description="Name prefix (Dr., Rev., etc.)") - suffix: str | None = Field( - None, alias="Suffix", description="Name suffix (Jr., Sr., III, etc.)" - ) + suffix: str | None = Field(None, alias="Suffix", description="Name suffix (Jr., Sr., III, etc.)") nickname: str | None = Field(None, alias="Nickname", description="Nickname") name_type: NameType = Field(NameType.NULL, alias="NameType", description="Type of name") - date: str | None = Field( - None, alias="Date", description="Date associated with this name (24-char encoded)" - ) + date: str | None = Field(None, alias="Date", description="Date associated with this name (24-char encoded)") sort_date: int | None = Field( None, alias="SortDate", description="Sortable date representation (9223372036854775807 = unknown)", ) - is_primary: bool = Field( - False, alias="IsPrimary", description="True if this is the primary name" - ) + is_primary: bool = Field(False, alias="IsPrimary", description="True if this is the primary name") is_private: bool = Field(False, alias="IsPrivate", description="True if name is private") - proof: ProofLevel = Field( - ProofLevel.BLANK, alias="Proof", description="Evidence quality rating" - ) + proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating") sentence: str | None = Field(None, alias="Sentence", description="Custom sentence template") note: str | None = Field(None, alias="Note", description="User-defined notes") - birth_year: int | None = Field( - None, alias="BirthYear", description="Year extracted from birth event" - ) - death_year: int | None = Field( - None, alias="DeathYear", description="Year extracted from death event" - ) - surname_mp: str | None = Field( - None, alias="SurnameMP", description="Metaphone encoding of surname" - ) - given_mp: str | None = Field( - None, alias="GivenMP", description="Metaphone encoding of given name" - ) - nickname_mp: str | None = Field( - None, alias="NicknameMP", description="Metaphone encoding of nickname" - ) + birth_year: int | None = Field(None, alias="BirthYear", description="Year extracted from birth event") + death_year: int | None = Field(None, alias="DeathYear", description="Year extracted from death event") + surname_mp: str | None = Field(None, alias="SurnameMP", description="Metaphone encoding of surname") + given_mp: str | None = Field(None, alias="GivenMP", description="Metaphone encoding of given name") + nickname_mp: str | None = Field(None, alias="NicknameMP", description="Metaphone encoding of nickname") @field_validator("is_primary", "is_private", mode="before") @classmethod @@ -238,26 +210,18 @@ class Event(RMBaseModel): event_id: int = Field(..., alias="EventID", description="Unique event identifier") event_type: int = Field(..., alias="EventType", description="FactTypeID from FactTypeTable") - owner_type: OwnerType = Field( - ..., alias="OwnerType", description="Type of owner (person or family)" - ) + owner_type: OwnerType = Field(..., alias="OwnerType", description="Type of owner (person or family)") owner_id: int = Field(..., alias="OwnerID", description="PersonID or FamilyID") - family_id: int = Field( - 0, alias="FamilyID", description="FamilyID for parent-related events (0 = not applicable)" - ) + family_id: int = Field(0, alias="FamilyID", description="FamilyID for parent-related events (0 = not applicable)") place_id: int = Field(0, alias="PlaceID", description="PlaceID (0 = no place)") site_id: int = Field(0, alias="SiteID", description="PlaceID of place details (0 = no details)") date: str | None = Field(None, alias="Date", description="Date in 24-character encoded format") - sort_date: int | None = Field( - None, alias="SortDate", description="Sortable date representation" - ) + sort_date: int | None = Field(None, alias="SortDate", description="Sortable date representation") is_primary: bool = Field( False, alias="IsPrimary", description="True if this is primary event (suppresses conflicts)" ) is_private: bool = Field(False, alias="IsPrivate", description="True if event is private") - proof: ProofLevel = Field( - ProofLevel.BLANK, alias="Proof", description="Evidence quality rating" - ) + proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating") status: int = Field(0, alias="Status", description="LDS status (0=default, 1-12=LDS statuses)") sentence: str | None = Field(None, alias="Sentence", description="Custom sentence template") details: str | None = Field(None, alias="Details", description="Event details/description") @@ -280,24 +244,16 @@ class Place(RMBaseModel): """ place_id: int = Field(..., alias="PlaceID", description="Unique place identifier") - place_type: PlaceType = Field( - PlaceType.PLACE, alias="PlaceType", description="Type of place entry" - ) - name: str | None = Field( - None, alias="Name", description="Place name (comma-delimited hierarchy)" - ) + place_type: PlaceType = Field(PlaceType.PLACE, alias="PlaceType", description="Type of place entry") + name: str | None = Field(None, alias="Name", description="Place name (comma-delimited hierarchy)") abbrev: str | None = Field(None, alias="Abbrev", description="Abbreviated place name") normalized: str | None = Field(None, alias="Normalized", description="Standardized place name") latitude: int = Field(0, alias="Latitude", description="Latitude (decimal degrees × 1e7)") longitude: int = Field(0, alias="Longitude", description="Longitude (decimal degrees × 1e7)") - lat_long_exact: bool = Field( - False, alias="LatLongExact", description="True if coordinates are exact" - ) + lat_long_exact: bool = Field(False, alias="LatLongExact", description="True if coordinates are exact") master_id: int = Field(0, alias="MasterID", description="PlaceID of master place (for details)") note: str | None = Field(None, alias="Note", description="User-defined notes") - reverse: str | None = Field( - None, alias="Reverse", description="Reverse order of place hierarchy (for indexing)" - ) + reverse: str | None = Field(None, alias="Reverse", description="Reverse order of place hierarchy (for indexing)") fs_id: int | None = Field(None, alias="fsID", description="FamilySearch place ID") an_id: int | None = Field(None, alias="anID", description="Ancestry.com place ID") @@ -338,9 +294,7 @@ class Source(RMBaseModel): comments: str | None = Field(None, alias="Comments", description="Source comments") is_private: bool = Field(False, alias="IsPrivate", description="True if source is private") template_id: int = Field(0, alias="TemplateID", description="SourceTemplateID (0=free-form)") - fields: bytes | None = Field( - None, alias="Fields", description="XML BLOB with field values (UTF-8 with BOM)" - ) + fields: bytes | None = Field(None, alias="Fields", description="XML BLOB with field values (UTF-8 with BOM)") @field_validator("is_private", mode="before") @classmethod @@ -364,18 +318,12 @@ class Citation(RMBaseModel): actual_text: str | None = Field(None, alias="ActualText", description="Research note") ref_number: str | None = Field(None, alias="RefNumber", description="Detail reference number") footnote: str | None = Field(None, alias="Footnote", description="Custom footnote override") - short_footnote: str | None = Field( - None, alias="ShortFootnote", description="Custom short footnote override" - ) - bibliography: str | None = Field( - None, alias="Bibliography", description="Custom bibliography override" - ) + short_footnote: str | None = Field(None, alias="ShortFootnote", description="Custom short footnote override") + bibliography: str | None = Field(None, alias="Bibliography", description="Custom bibliography override") fields: bytes | None = Field( None, alias="Fields", description="XML BLOB with citation field values (UTF-8 with BOM)" ) - citation_name: str | None = Field( - None, alias="CitationName", description="Auto-generated or user-defined name" - ) + citation_name: str | None = Field(None, alias="CitationName", description="Auto-generated or user-defined name") class Family(RMBaseModel): @@ -392,21 +340,11 @@ class Family(RMBaseModel): husb_order: int = Field(0, alias="HusbOrder", description="Spouse order (0=never rearranged)") wife_order: int = Field(0, alias="WifeOrder", description="Spouse order (0=never rearranged)") is_private: bool = Field(False, alias="IsPrivate", description="True if family is private") - proof: ProofLevel = Field( - ProofLevel.BLANK, alias="Proof", description="Evidence quality rating" - ) - father_label: ParentLabel = Field( - ParentLabel.FATHER, alias="FatherLabel", description="Label for father role" - ) - mother_label: MotherLabel = Field( - MotherLabel.MOTHER, alias="MotherLabel", description="Label for mother role" - ) - father_label_str: str | None = Field( - None, alias="FatherLabelStr", description="Custom label when FatherLabel=99" - ) - mother_label_str: str | None = Field( - None, alias="MotherLabelStr", description="Custom label when MotherLabel=99" - ) + proof: ProofLevel = Field(ProofLevel.BLANK, alias="Proof", description="Evidence quality rating") + father_label: ParentLabel = Field(ParentLabel.FATHER, alias="FatherLabel", description="Label for father role") + mother_label: MotherLabel = Field(MotherLabel.MOTHER, alias="MotherLabel", description="Label for mother role") + father_label_str: str | None = Field(None, alias="FatherLabelStr", description="Custom label when FatherLabel=99") + mother_label_str: str | None = Field(None, alias="MotherLabelStr", description="Custom label when MotherLabel=99") note: str | None = Field(None, alias="Note", description="User-defined notes") @field_validator("is_private", mode="before") @@ -430,21 +368,15 @@ class FactType(RMBaseModel): alias="FactTypeID", description="Unique fact type identifier (<1000=built-in, ≥1000=custom)", ) - owner_type: OwnerType = Field( - ..., alias="OwnerType", description="Type of owner (person or family)" - ) + owner_type: OwnerType = Field(..., alias="OwnerType", description="Type of owner (person or family)") name: str = Field(..., alias="Name", description="Fact type name") abbrev: str | None = Field(None, alias="Abbrev", description="Abbreviation") gedcom_tag: str | None = Field(None, alias="GedcomTag", description="GEDCOM tag") - use_value: bool = Field( - False, alias="UseValue", description="True if fact uses description field" - ) + use_value: bool = Field(False, alias="UseValue", description="True if fact uses description field") use_date: bool = Field(True, alias="UseDate", description="True if fact uses date field") use_place: bool = Field(True, alias="UsePlace", description="True if fact uses place field") sentence: str | None = Field(None, alias="Sentence", description="Sentence template") - flags: int = Field( - 0, alias="Flags", description="6-bit position-coded flags for Include settings" - ) + flags: int = Field(0, alias="Flags", description="6-bit position-coded flags for Include settings") @field_validator("use_value", "use_date", "use_place", mode="before") @classmethod diff --git a/rmagent/rmlib/parsers/blob_parser.py b/rmagent/rmlib/parsers/blob_parser.py index b6bdd3c..84ed276 100644 --- a/rmagent/rmlib/parsers/blob_parser.py +++ b/rmagent/rmlib/parsers/blob_parser.py @@ -170,9 +170,7 @@ def parse_template_field_defs(blob_data: bytes | None) -> list[TemplateField]: hint = hint_elem.text if hint_elem is not None else None long_hint = long_hint_elem.text if long_hint_elem is not None else None - citation_field = ( - citation_field_elem.text == "True" if citation_field_elem is not None else False - ) + citation_field = citation_field_elem.text == "True" if citation_field_elem is not None else False field_defs.append( TemplateField( @@ -242,12 +240,7 @@ def is_freeform_source(fields: dict[str, str]) -> bool: Returns: True if this appears to be a free-form source """ - return ( - len(fields) == 3 - and "Footnote" in fields - and "ShortFootnote" in fields - and "Bibliography" in fields - ) + return len(fields) == 3 and "Footnote" in fields and "ShortFootnote" in fields and "Bibliography" in fields def get_citation_level_fields(template_fields: list[TemplateField]) -> list[str]: diff --git a/rmagent/rmlib/parsers/date_parser.py b/rmagent/rmlib/parsers/date_parser.py index 3b85fb1..bdfd899 100644 --- a/rmagent/rmlib/parsers/date_parser.py +++ b/rmagent/rmlib/parsers/date_parser.py @@ -176,13 +176,7 @@ def to_datetime(self) -> datetime | None: - Date is BC - Date is a range """ - if ( - self.is_null - or self.date_type == DateType.TEXT - or self.is_partial - or self.is_bc - or self.is_range - ): + if self.is_null or self.date_type == DateType.TEXT or self.is_partial or self.is_bc or self.is_range: return None try: @@ -329,9 +323,7 @@ def parse_rm_date(date_str: str | None) -> RMDate: year, month, day, is_bc, is_double_date, qualifier = _parse_date_components(date_str[2:13]) # Parse second date (for ranges) - year2, month2, day2, is_bc2, is_double_date2, qualifier2 = _parse_date_components( - date_str[13:24] - ) + year2, month2, day2, is_bc2, is_double_date2, qualifier2 = _parse_date_components(date_str[13:24]) return RMDate( date_type=date_type, diff --git a/rmagent/rmlib/parsers/name_parser.py b/rmagent/rmlib/parsers/name_parser.py index c40ff0c..4e595af 100644 --- a/rmagent/rmlib/parsers/name_parser.py +++ b/rmagent/rmlib/parsers/name_parser.py @@ -284,9 +284,7 @@ def get_all_names(person_id: int, db_connection: sqlite3.Connection) -> list[Nam return names -def get_name_at_date( - person_id: int, event_sort_date: int | None, db_connection: sqlite3.Connection -) -> Name | None: +def get_name_at_date(person_id: int, event_sort_date: int | None, db_connection: sqlite3.Connection) -> Name | None: """ Get appropriate name for a specific date (context-aware). diff --git a/rmagent/rmlib/parsers/place_parser.py b/rmagent/rmlib/parsers/place_parser.py index 5f3df14..d2497db 100644 --- a/rmagent/rmlib/parsers/place_parser.py +++ b/rmagent/rmlib/parsers/place_parser.py @@ -225,9 +225,7 @@ def format_place_medium(place_name: str | None) -> str: return place_name -def convert_coordinates( - lat_int: int | None, lon_int: int | None -) -> tuple[float | None, float | None]: +def convert_coordinates(lat_int: int | None, lon_int: int | None) -> tuple[float | None, float | None]: """ Convert integer coordinates to decimal degrees. diff --git a/rmagent/rmlib/prototype.py b/rmagent/rmlib/prototype.py index 37db724..2fd060c 100644 --- a/rmagent/rmlib/prototype.py +++ b/rmagent/rmlib/prototype.py @@ -388,9 +388,7 @@ def format_family(person_id: int, query_service: QueryService) -> str: if children: lines.append(f"\nChildren ({len(children)}):") for child in children: - child_name = format_full_name( - given=get_row_value(child, "Given"), surname=get_row_value(child, "Surname") - ) + child_name = format_full_name(given=get_row_value(child, "Given"), surname=get_row_value(child, "Surname")) birth_year = get_row_value(child, "BirthYear", "") year_str = f" (b. {birth_year})" if birth_year else "" lines.append(f" - {child_name} (ID: {child['PersonID']}){year_str}") diff --git a/rmagent/rmlib/quality.py b/rmagent/rmlib/quality.py index 7af0057..c637c67 100644 --- a/rmagent/rmlib/quality.py +++ b/rmagent/rmlib/quality.py @@ -20,7 +20,7 @@ parse_source_fields, parse_template_field_defs, ) -from .parsers.date_parser import UNKNOWN_SORT_DATE, parse_rm_date +from .parsers.date_parser import UNKNOWN_SORT_DATE # Numeric constants YEAR_SECONDS = 31557600 @@ -688,11 +688,7 @@ def _rule_4_3(self, rule: QualityRule) -> list[QualityIssue]: continue required = [field.name for field in template_fields if not field.citation_field] - missing = [ - field_name - for field_name in required - if not actual_fields.get(field_name, "").strip() - ] + missing = [field_name for field_name in required if not actual_fields.get(field_name, "").strip()] if missing: issues.append( { @@ -753,9 +749,7 @@ def _rule_5_1(self, rule: QualityRule) -> list[QualityIssue]: AND LENGTH(CAST(ABS(CAST(SortDate AS INTEGER)) AS TEXT)) NOT IN (18, 19)) ) """ - rows = self.db.query( - sql, (UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE) - ) + rows = self.db.query(sql, (UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE, UNKNOWN_SORT_DATE)) if not rows: return [] diff --git a/rmagent/rmlib/queries.py b/rmagent/rmlib/queries.py index 991923d..70a546e 100644 --- a/rmagent/rmlib/queries.py +++ b/rmagent/rmlib/queries.py @@ -337,9 +337,7 @@ def get_unsourced_vital_events( return self.db.query(sql, tuple(params)) # Pattern 13 - def find_places_by_name( - self, pattern: str, limit: int = DEFAULT_RESULT_LIMIT, exact: bool = False - ): + def find_places_by_name(self, pattern: str, limit: int = DEFAULT_RESULT_LIMIT, exact: bool = False): """ Find places by name with flexible or exact matching. @@ -382,7 +380,7 @@ def find_places_by_name( else: # Flexible matching (original behavior) # Split pattern by comma-space to get hierarchy parts - parts = [p.strip() for p in pattern.split(',') if p.strip()] + parts = [p.strip() for p in pattern.split(",") if p.strip()] if len(parts) == 1: # Simple case: single search term @@ -453,9 +451,7 @@ def find_places_within_radius( center_lon = center["Longitude"] if center["Longitude"] is not None else 0 if not center_lat or not center_lon or center_lat == 0 or center_lon == 0: - raise ValueError( - f"Place '{center['Name']}' (ID {center_place_id}) has no GPS coordinates" - ) + raise ValueError(f"Place '{center['Name']}' (ID {center_place_id}) has no GPS coordinates") # Convert integer coordinates to degrees center_lat_deg = center_lat / 10_000_000.0 @@ -481,9 +477,7 @@ def find_places_within_radius( place_lat_deg = place["Latitude"] / 10_000_000.0 place_lon_deg = place["Longitude"] / 10_000_000.0 - distance_km = _haversine_distance( - center_lat_deg, center_lon_deg, place_lat_deg, place_lon_deg - ) + distance_km = _haversine_distance(center_lat_deg, center_lon_deg, place_lat_deg, place_lon_deg) if distance_km <= radius_km: results.append( @@ -562,7 +556,7 @@ def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> f import math # Earth radius in kilometers - R = 6371.0 + earth_radius_km = 6371.0 # Convert degrees to radians lat1_rad = math.radians(lat1) @@ -571,11 +565,8 @@ def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> f delta_lon = math.radians(lon2 - lon1) # Haversine formula - a = ( - math.sin(delta_lat / 2) ** 2 - + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2 - ) + a = math.sin(delta_lat / 2) ** 2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2 c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) - distance = R * c + distance = earth_radius_km * c return distance diff --git a/sqlite-extension/python_example.py b/sqlite-extension/python_example.py index de3bab0..c717b89 100755 --- a/sqlite-extension/python_example.py +++ b/sqlite-extension/python_example.py @@ -49,9 +49,7 @@ def connect_rmtree(db_path, extension_path="./sqlite-extension/icu.dylib"): # - caseLevel=off: Ignore case differences # - normalization=on: Normalize Unicode characters conn.execute( - "SELECT icu_load_collation(" - "'en_US@colStrength=primary;caseLevel=off;normalization=on'," - "'RMNOCASE')" + "SELECT icu_load_collation(" "'en_US@colStrength=primary;caseLevel=off;normalization=on'," "'RMNOCASE')" ) finally: # Disable extension loading (security best practice) diff --git a/tests/integration/test_llm_providers.py b/tests/integration/test_llm_providers.py index 0430307..42ccbc6 100644 --- a/tests/integration/test_llm_providers.py +++ b/tests/integration/test_llm_providers.py @@ -176,9 +176,7 @@ class TestProviderInterfaceCompliance: ), ( OllamaProvider, - lambda m: setattr( - m, "generate", lambda **kw: {"response": "Text", "eval_count": 10} - ), + lambda m: setattr(m, "generate", lambda **kw: {"response": "Text", "eval_count": 10}), ), ], ) diff --git a/tests/integration/test_real_providers.py b/tests/integration/test_real_providers.py index 4698482..4c183b3 100644 --- a/tests/integration/test_real_providers.py +++ b/tests/integration/test_real_providers.py @@ -24,6 +24,7 @@ if _env_path.exists(): load_dotenv(_env_path) + # Environment checks - detect placeholder vs real keys def _is_real_key(key_value: str | None) -> bool: """Check if API key is real (not placeholder like sk-xxxxx).""" @@ -68,9 +69,7 @@ def test_genealogy_specific_prompt(self): assert result.usage.total_tokens > 0 # Check for genealogy keywords text_lower = result.text.lower() - assert any( - word in text_lower for word in ["census", "vital", "records", "birth", "death", "marriage"] - ) + assert any(word in text_lower for word in ["census", "vital", "records", "birth", "death", "marriage"]) @pytest.mark.real_api diff --git a/tests/unit/test_biography_generator.py b/tests/unit/test_biography_generator.py index 39d4dc0..d1a376a 100644 --- a/tests/unit/test_biography_generator.py +++ b/tests/unit/test_biography_generator.py @@ -336,6 +336,7 @@ def test_apply_privacy_rules_for_living_person(self): def test_generate_introduction(self): """Test generating introduction section.""" from rmagent.generators.biography import BiographyTemplates + templates = BiographyTemplates() context = PersonContext( @@ -370,6 +371,7 @@ def test_generate_introduction(self): def test_generate_early_life(self): """Test generating early life section.""" from rmagent.generators.biography import BiographyTemplates + templates = BiographyTemplates() # Test with siblings @@ -399,6 +401,7 @@ def test_generate_early_life(self): def test_format_sources_footnote_style(self): """Test formatting sources in footnote style.""" from rmagent.generators.biography import CitationProcessor + citation_processor = CitationProcessor() context = PersonContext( @@ -438,6 +441,7 @@ def test_format_sources_footnote_style(self): def test_format_sources_parenthetical_style(self): """Test formatting sources in parenthetical style.""" from rmagent.generators.biography import CitationProcessor + citation_processor = CitationProcessor() context = PersonContext( @@ -470,6 +474,7 @@ def test_format_sources_parenthetical_style(self): def test_parse_ai_response(self): """Test parsing AI-generated biography.""" from rmagent.generators.biography import BiographyTemplates + templates = BiographyTemplates() ai_response = """ @@ -641,9 +646,7 @@ def test_categorize_events(self, real_db_path, extension_path): }, # Residence ] - vital, education, occupation, military, residence, other = generator._categorize_events( - db, events - ) + vital, education, occupation, military, residence, other = generator._categorize_events(db, events) assert len(vital) == 1 assert len(education) == 1 diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 74f5715..68e7f0c 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -87,9 +87,7 @@ def test_bio_with_invalid_length(self, runner, test_db_path): def test_bio_no_ai_template_based(self, runner, test_db_path, tmp_path): """Test bio command with --no-ai flag (template-based generation).""" output_file = tmp_path / "bio_test.md" - result = runner.invoke( - cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)] - ) + result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)]) # Should succeed with template-based generation assert result.exit_code == 0 assert output_file.exists() @@ -119,26 +117,20 @@ def test_bio_length_variations(self, runner, test_db_path): def test_bio_citation_styles(self, runner, test_db_path): """Test bio with different citation styles.""" for style in ["footnote", "parenthetical", "narrative"]: - result = runner.invoke( - cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--citation-style", style] - ) + result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--citation-style", style]) assert result.exit_code == 0 def test_bio_with_file_output(self, runner, test_db_path, tmp_path): """Test bio with file output.""" output_file = tmp_path / "biography.md" - result = runner.invoke( - cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)] - ) + result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--output", str(output_file)]) assert result.exit_code == 0 assert "Biography written to" in result.output assert output_file.exists() def test_bio_no_sources(self, runner, test_db_path): """Test bio with --no-sources flag.""" - result = runner.invoke( - cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--no-sources"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "bio", "1", "--no-ai", "--no-sources"]) assert result.exit_code == 0 # Biography should not include sources section when --no-sources is used # (We can't easily verify this without parsing output, but command should succeed) @@ -165,9 +157,7 @@ def test_quality_with_invalid_format(self, runner): def test_quality_basic(self, runner, test_db_path, tmp_path): """Test basic quality report generation.""" output_file = tmp_path / "quality.md" - result = runner.invoke( - cli, ["--database", test_db_path, "quality", "--output", str(output_file)] - ) + result = runner.invoke(cli, ["--database", test_db_path, "quality", "--output", str(output_file)]) assert result.exit_code == 0 assert output_file.exists() assert "📊 Data Quality Summary" in result.output @@ -397,9 +387,7 @@ def test_timeline_with_include_family(self, runner, test_db_path, tmp_path): def test_timeline_invalid_format(self, runner, test_db_path): """Test timeline with invalid format option.""" - result = runner.invoke( - cli, ["--database", test_db_path, "timeline", "1", "--format", "invalid"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "timeline", "1", "--format", "invalid"]) assert result.exit_code != 0 @@ -573,9 +561,7 @@ def test_search_by_name(self, runner, test_db_path): def test_search_by_full_name(self, runner, test_db_path): """Test search by full name (given and surname).""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "Michael Iams"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Michael Iams"]) assert result.exit_code == 0 def test_search_by_place(self, runner, test_db_path): @@ -587,40 +573,30 @@ def test_search_by_place(self, runner, test_db_path): def test_search_with_limit(self, runner, test_db_path): """Test search with custom limit.""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "Smith", "--limit", "10"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Smith", "--limit", "10"]) assert result.exit_code == 0 def test_search_exact_mode(self, runner, test_db_path): """Test search with --exact flag (no phonetic matching).""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "Iams", "--exact"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Iams", "--exact"]) assert result.exit_code == 0 def test_search_name_and_place(self, runner, test_db_path): """Test search with both name and place criteria.""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "Iams", "--place", "Maryland"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "Iams", "--place", "Maryland"]) # Should show results for both searches assert result.exit_code == 0 def test_search_with_surname_variation(self, runner, test_db_path): """Test search with surname variation syntax [variant].""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "John Iiams [Ijams]"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "John Iiams [Ijams]"]) assert result.exit_code == 0 # Should show that it's searching multiple variations assert "Searching 2 name variations" in result.output or "Found" in result.output def test_search_with_multiple_variations(self, runner, test_db_path): """Test search with multiple surname variations.""" - result = runner.invoke( - cli, ["--database", test_db_path, "search", "--name", "John Iams [Ijams] [Imes]"] - ) + result = runner.invoke(cli, ["--database", test_db_path, "search", "--name", "John Iams [Ijams] [Imes]"]) assert result.exit_code == 0 # Should search 3 variations (base + 2 variants) assert "Searching 3 name variations" in result.output or "Found" in result.output diff --git a/tests/unit/test_hugo_exporter.py b/tests/unit/test_hugo_exporter.py index df3c770..a69260a 100644 --- a/tests/unit/test_hugo_exporter.py +++ b/tests/unit/test_hugo_exporter.py @@ -112,9 +112,7 @@ def test_export_person_raises_error_without_database(self, tmp_path): with pytest.raises(ValueError, match="No database provided"): exporter.export_person(person_id=1, output_dir=tmp_path) - def test_export_person_raises_error_for_nonexistent_person( - self, tmp_path, real_db_path, extension_path - ): + def test_export_person_raises_error_for_nonexistent_person(self, tmp_path, real_db_path, extension_path): """Test that export_person raises ValueError for nonexistent person.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -276,9 +274,7 @@ def test_export_batch_with_index(self, tmp_path, real_db_path, extension_path): assert "Family Biographies" in content assert "---" in content # Has front matter - def test_export_batch_handles_invalid_person_gracefully( - self, tmp_path, real_db_path, extension_path - ): + def test_export_batch_handles_invalid_person_gracefully(self, tmp_path, real_db_path, extension_path): """Test batch export continues when one person fails.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -351,9 +347,7 @@ def test_complete_hugo_export_workflow(self, tmp_path, real_db_path, extension_p if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - exporter = HugoExporter( - db=real_db_path, extension_path=extension_path, media_base_path="/media/" - ) + exporter = HugoExporter(db=real_db_path, extension_path=extension_path, media_base_path="/media/") # Create Hugo directory structure content_dir = tmp_path / "content" / "people" @@ -403,9 +397,7 @@ def test_media_references_in_export(self, tmp_path, real_db_path, extension_path if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - exporter = HugoExporter( - db=real_db_path, extension_path=extension_path, media_base_path="/media/" - ) + exporter = HugoExporter(db=real_db_path, extension_path=extension_path, media_base_path="/media/") result = exporter.export_person( person_id=1, diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py index 0722b95..bee920d 100644 --- a/tests/unit/test_llm_provider.py +++ b/tests/unit/test_llm_provider.py @@ -41,9 +41,7 @@ def _invoke(self, prompt: str, **kwargs): return LLMResult( text=text, model=self.model, - usage=TokenUsage( - prompt_tokens=len(prompt.split()), completion_tokens=len(text.split()) - ), + usage=TokenUsage(prompt_tokens=len(prompt.split()), completion_tokens=len(text.split())), ) @@ -64,9 +62,7 @@ def _invoke(self, prompt: str, **kwargs): self.invocations += 1 if self.invocations < 2: raise LLMError("temporary failure") - return LLMResult( - text="ok", model=self.model, usage=TokenUsage(prompt_tokens=1, completion_tokens=1) - ) + return LLMResult(text="ok", model=self.model, usage=TokenUsage(prompt_tokens=1, completion_tokens=1)) provider = FlakyProvider() result = provider.generate("prompt") diff --git a/tests/unit/test_name_parser.py b/tests/unit/test_name_parser.py index febee4f..0eecdfe 100644 --- a/tests/unit/test_name_parser.py +++ b/tests/unit/test_name_parser.py @@ -179,9 +179,7 @@ def test_full_name_minimal(self): def test_full_name_surname_only(self): """Test full name with surname only.""" - name = Name( - name_id=1, person_id=1, is_primary=True, name_type=NameType.BIRTH, surname="Smith" - ) + name = Name(name_id=1, person_id=1, is_primary=True, name_type=NameType.BIRTH, surname="Smith") assert name.full_name() == "Smith" @@ -457,9 +455,7 @@ def test_format_minimal(self): def test_format_no_nickname(self): """Test formatting without nickname.""" - full = format_full_name( - surname="Smith", given="John", nickname="Jack", include_nickname=False - ) + full = format_full_name(surname="Smith", given="John", nickname="Jack", include_nickname=False) assert full == "John Smith" diff --git a/tests/unit/test_place_parser.py b/tests/unit/test_place_parser.py index dfeed21..87db7b8 100644 --- a/tests/unit/test_place_parser.py +++ b/tests/unit/test_place_parser.py @@ -172,9 +172,7 @@ def test_get_level_2_state(self): def test_get_level_3_country(self): """Test getting level 3 (country).""" - assert ( - get_place_level("Baltimore, Baltimore, Maryland, United States", 3) == "United States" - ) + assert get_place_level("Baltimore, Baltimore, Maryland, United States", 3) == "United States" def test_get_level_out_of_range(self): """Test getting level that doesn't exist.""" @@ -192,10 +190,7 @@ class TestGetPlaceShort: def test_get_short_us_place_2_levels(self): """Test short form for US place (skips county).""" - assert ( - get_place_short("Baltimore, Baltimore, Maryland, United States", 2) - == "Baltimore, Maryland" - ) + assert get_place_short("Baltimore, Baltimore, Maryland, United States", 2) == "Baltimore, Maryland" def test_get_short_international_place_2_levels(self): """Test short form for international place.""" @@ -217,18 +212,12 @@ class TestFormatPlaceShort: def test_format_us_4_level(self): """Test formatting US 4-level place.""" - assert ( - format_place_short("Baltimore, Baltimore, Maryland, United States") - == "Baltimore, Maryland" - ) + assert format_place_short("Baltimore, Baltimore, Maryland, United States") == "Baltimore, Maryland" def test_format_us_3_level(self): """Test formatting US 3-level place.""" # 3-level place: City, State, Country - format returns City, Country (level 0 and 2) - assert ( - format_place_short("Abbeville, South Carolina, United States") - == "Abbeville, United States" - ) + assert format_place_short("Abbeville, South Carolina, United States") == "Abbeville, United States" def test_format_international_4_level(self): """Test formatting international 4-level place.""" @@ -249,10 +238,7 @@ class TestFormatPlaceMedium: def test_format_medium_4_level(self): """Test medium format for 4-level place.""" - assert ( - format_place_medium("Baltimore, Baltimore, Maryland, United States") - == "Baltimore, Baltimore, Maryland" - ) + assert format_place_medium("Baltimore, Baltimore, Maryland, United States") == "Baltimore, Baltimore, Maryland" def test_format_medium_3_level(self): """Test medium format for 3-level place.""" diff --git a/tests/unit/test_quality.py b/tests/unit/test_quality.py index d9a3b69..d3b4c53 100644 --- a/tests/unit/test_quality.py +++ b/tests/unit/test_quality.py @@ -7,11 +7,8 @@ from __future__ import annotations -from collections.abc import Iterable from pathlib import Path -import pytest - # Ensure repository root is available on sys.path when running with pytest -o addopts='' PROJECT_ROOT = Path(__file__).resolve().parents[2] import sys diff --git a/tests/unit/test_quality_report.py b/tests/unit/test_quality_report.py index 427ff7e..5f34aae 100644 --- a/tests/unit/test_quality_report.py +++ b/tests/unit/test_quality_report.py @@ -261,9 +261,7 @@ def test_generate_raises_error_without_database(self): with pytest.raises(ValueError, match="No database provided"): generator.generate(format=ReportFormat.MARKDOWN) - def test_generate_markdown_with_mock_validation( - self, real_db_path, extension_path, mock_quality_report - ): + def test_generate_markdown_with_mock_validation(self, real_db_path, extension_path, mock_quality_report): """Test generate with mocked validation.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -278,9 +276,7 @@ def test_generate_markdown_with_mock_validation( assert "Total People:** 10,000" in report assert "Total Issues Found:** 185" in report - def test_generate_html_with_mock_validation( - self, real_db_path, extension_path, mock_quality_report - ): + def test_generate_html_with_mock_validation(self, real_db_path, extension_path, mock_quality_report): """Test HTML generation with mocked validation.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -293,9 +289,7 @@ def test_generate_html_with_mock_validation( assert "" in report assert "

      Data Quality Report

      " in report - def test_generate_csv_with_mock_validation( - self, real_db_path, extension_path, mock_quality_report - ): + def test_generate_csv_with_mock_validation(self, real_db_path, extension_path, mock_quality_report): """Test CSV generation with mocked validation.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -309,9 +303,7 @@ def test_generate_csv_with_mock_validation( assert "Rule Name" in report assert "1.1" in report - def test_generate_with_output_path( - self, tmp_path, real_db_path, extension_path, mock_quality_report - ): + def test_generate_with_output_path(self, tmp_path, real_db_path, extension_path, mock_quality_report): """Test writing report to file.""" if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") @@ -363,9 +355,7 @@ def test_generate_real_markdown_report(self, real_db_path, extension_path): if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - generator = QualityReportGenerator( - db=real_db_path, extension_path=extension_path, sample_limit=5 - ) + generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5) report = generator.generate(format=ReportFormat.MARKDOWN) @@ -394,9 +384,7 @@ def test_generate_real_html_report(self, real_db_path, extension_path): if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - generator = QualityReportGenerator( - db=real_db_path, extension_path=extension_path, sample_limit=5 - ) + generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5) report = generator.generate(format=ReportFormat.HTML) @@ -419,9 +407,7 @@ def test_generate_real_csv_report(self, real_db_path, extension_path): if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - generator = QualityReportGenerator( - db=real_db_path, extension_path=extension_path, sample_limit=5 - ) + generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=5) report = generator.generate(format=ReportFormat.CSV) @@ -441,9 +427,7 @@ def test_generate_all_formats(self, real_db_path, extension_path): if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - generator = QualityReportGenerator( - db=real_db_path, extension_path=extension_path, sample_limit=3 - ) + generator = QualityReportGenerator(db=real_db_path, extension_path=extension_path, sample_limit=3) # Generate all three formats markdown_report = generator.generate(format=ReportFormat.MARKDOWN) diff --git a/tests/unit/test_timeline_generator.py b/tests/unit/test_timeline_generator.py index df43dc1..0b9091d 100644 --- a/tests/unit/test_timeline_generator.py +++ b/tests/unit/test_timeline_generator.py @@ -128,9 +128,7 @@ def test_format_place_for_timeline(self): assert place == "Tulsa, Oklahoma" # International place - place = generator._format_place_for_timeline( - "London, Greater London, England, United Kingdom" - ) + place = generator._format_place_for_timeline("London, Greater London, England, United Kingdom") assert place == "London, England" # Simple place @@ -347,9 +345,7 @@ def test_generate_with_output_path(self, tmp_path, real_db_path, extension_path) generator = TimelineGenerator(db=real_db_path, extension_path=extension_path) output_file = tmp_path / "timeline.json" - json_output = generator.generate( - person_id=1, format=TimelineFormat.JSON, output_path=output_file - ) + json_output = generator.generate(person_id=1, format=TimelineFormat.JSON, output_path=output_file) # Verify file was created assert output_file.exists() @@ -395,9 +391,7 @@ def test_generate_complete_timeline(self, real_db_path, extension_path): generator = TimelineGenerator(db=real_db_path, extension_path=extension_path) # Generate JSON - json_output = generator.generate( - person_id=1, format=TimelineFormat.JSON, group_by_phase=True - ) + json_output = generator.generate(person_id=1, format=TimelineFormat.JSON, group_by_phase=True) # Parse and verify timeline = json.loads(json_output) @@ -476,9 +470,7 @@ def test_timeline_with_private_events_excluded(self, real_db_path, extension_pat if not real_db_path.exists() or not extension_path.exists(): pytest.skip("Real database or ICU extension not available") - generator = TimelineGenerator( - db=real_db_path, extension_path=extension_path, include_private=False - ) + generator = TimelineGenerator(db=real_db_path, extension_path=extension_path, include_private=False) json_output = generator.generate(person_id=1, format=TimelineFormat.JSON) timeline = json.loads(json_output)
MetricCount
Total People{report.summary.get('total_people', 0):,}
Total Events{report.summary.get('total_events', 0):,}
Total Sources{report.summary.get('total_sources', 0):,}
Total People{report.summary.get('total_people', 0):,}
Total Events{report.summary.get('total_events', 0):,}
Total Sources{report.summary.get('total_sources', 0):,}
Total Citations{report.summary.get('total_citations', 0):,}