From 67e31618b4432d9a0aa4bdd75f980cc9b9ae392c Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Fri, 16 Jan 2026 09:12:20 -0600 Subject: [PATCH 1/4] added date in html, and passed full result for charts --- src/faircareai/core/results.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/faircareai/core/results.py b/src/faircareai/core/results.py index 157a3b3..40d7001 100644 --- a/src/faircareai/core/results.py +++ b/src/faircareai/core/results.py @@ -10,6 +10,7 @@ import json from dataclasses import dataclass, field +from datetime import date from pathlib import Path from typing import TYPE_CHECKING, Any @@ -429,9 +430,9 @@ def to_pdf( return generate_governance_pdf_report(self, path, metric_config=metric_config) else: metric_config = MetricDisplayConfig.data_scientist(include_optional=include_optional) - # Convert AuditResults to AuditSummary for generator + # Convert AuditResults to AuditSummary for generator, but also pass full results for charts summary = self._to_audit_summary() - return generate_pdf_report(summary, path, metric_config=metric_config) + return generate_pdf_report(summary, path, metric_config=metric_config, results=self) def to_pptx( self, @@ -570,7 +571,7 @@ def _to_audit_summary(self) -> "AuditSummary": return AuditSummary( model_name=self.config.model_name, - audit_date=self.config.report_date or "", + audit_date=self.config.report_date or date.today().isoformat(), n_samples=self.descriptive_stats.get("cohort_overview", {}).get("n_total", 0), n_groups=n_groups, threshold=self.threshold, From 6a8c89cb9259208fc7ffff460e2a7532b5d759ca Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Fri, 16 Jan 2026 09:13:28 -0600 Subject: [PATCH 2/4] added computation of predictive parity and calibration --- src/faircareai/metrics/fairness.py | 41 +++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/src/faircareai/metrics/fairness.py b/src/faircareai/metrics/fairness.py index ecc7c90..f6b8957 100644 --- a/src/faircareai/metrics/fairness.py +++ b/src/faircareai/metrics/fairness.py @@ -160,6 +160,7 @@ def compute_fairness_metrics( results["fpr_diff"] = {} results["equalized_odds_diff"] = {} results["ppv_ratio"] = {} + results["ppv_diff"] = {} results["calibration_diff"] = {} ref_selection = ref_metrics.get("selection_rate", 0) @@ -197,12 +198,13 @@ def compute_fairness_metrics( max(abs(tpr - ref_tpr), abs(fpr - ref_fpr)) ) - # Predictive parity (PPV ratio) + # Predictive parity (PPV ratio and difference) ppv = group_data.get("ppv", 0) if ref_ppv > 0: results["ppv_ratio"][str(group)] = float(ppv / ref_ppv) else: results["ppv_ratio"][str(group)] = None + results["ppv_diff"][str(group)] = float(ppv - ref_ppv) # Calibration difference cal = group_data.get("mean_calibration_error", 0) @@ -252,15 +254,40 @@ def _compute_fairness_summary(metrics: dict) -> dict[str, Any]: "within_threshold": worst_eo <= EQUALIZED_ODDS_THRESHOLD, } - # Predictive parity - filter None values (occur when reference PPV is 0) + # Predictive parity - use PPV difference for consistency with other metrics + ppv_diffs = list(metrics.get("ppv_diff", {}).values()) ppv_ratios_raw = list(metrics.get("ppv_ratio", {}).values()) ppv_ratios = [r for r in ppv_ratios_raw if r is not None] - if ppv_ratios: - min_ppv = min(ppv_ratios) - worst_ppv = min_ppv if min_ppv < 1 else max(ppv_ratios) + if ppv_diffs or ppv_ratios: + # Compute worst_diff from ppv_diff if available + worst_ppv_diff = max(ppv_diffs, key=abs) if ppv_diffs else None + # Compute worst_ratio for backward compatibility + worst_ratio = None + if ppv_ratios: + min_ppv = min(ppv_ratios) + worst_ratio = min_ppv if min_ppv < 1 else max(ppv_ratios) + # Determine within_threshold based on worst_diff if available, else ratio + if worst_ppv_diff is not None: + within_threshold = abs(worst_ppv_diff) <= EQUALIZED_ODDS_THRESHOLD + elif worst_ratio is not None: + within_threshold = DEMOGRAPHIC_PARITY_LOWER <= worst_ratio <= DEMOGRAPHIC_PARITY_UPPER + else: + within_threshold = True summary["predictive_parity"] = { - "worst_ratio": float(worst_ppv), - "within_threshold": DEMOGRAPHIC_PARITY_LOWER <= worst_ppv <= DEMOGRAPHIC_PARITY_UPPER, + "worst_diff": float(worst_ppv_diff) if worst_ppv_diff is not None else 0.0, + "worst_ratio": float(worst_ratio) if worst_ratio is not None else None, + "within_threshold": within_threshold, + } + + # Calibration + cal_diffs = list(metrics.get("calibration_diff", {}).values()) + if cal_diffs: + worst_cal = max(cal_diffs, key=abs) + # Calibration threshold: difference in mean calibration error should be small + # Using 0.05 (5 percentage points) as threshold for clinical significance + summary["calibration"] = { + "worst_diff": float(worst_cal), + "within_threshold": abs(worst_cal) <= 0.05, } return summary From 906594c4f3f22889f50d30ae826947121e8dbe82 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Fri, 16 Jan 2026 09:14:37 -0600 Subject: [PATCH 3/4] updated font size and made section 5 tailor to selected metric --- src/faircareai/reports/generator.py | 393 ++++++++++++++++++++-------- 1 file changed, 291 insertions(+), 102 deletions(-) diff --git a/src/faircareai/reports/generator.py b/src/faircareai/reports/generator.py index 7f68f6f..b10a9d4 100644 --- a/src/faircareai/reports/generator.py +++ b/src/faircareai/reports/generator.py @@ -18,7 +18,9 @@ Methodology: Van Calster et al. (2025), CHAI RAIC Checkpoint 1. """ +import asyncio import html +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from datetime import date from pathlib import Path @@ -42,6 +44,62 @@ logger = get_logger(__name__) +def _is_in_async_context() -> bool: + """Check if we're running inside an asyncio event loop (e.g., Jupyter notebook).""" + try: + asyncio.get_running_loop() + return True + except RuntimeError: + return False + + +def _run_playwright_pdf_generation( + html_content: str, + output_path: Path, + page_format: str = "Letter", + margins: dict | None = None, +) -> None: + """Run Playwright PDF generation, handling async context (Jupyter) safely. + + Args: + html_content: HTML string to render to PDF. + output_path: Path for output PDF file. + page_format: Page format (e.g., "Letter", "A4"). + margins: Page margins dict with top, right, bottom, left keys. + """ + from playwright.sync_api import sync_playwright + + if margins is None: + margins = {"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"} + + def _generate_pdf() -> None: + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page() + + # Load HTML content with timeout protection (60s for complex reports) + page.set_content(html_content, wait_until="networkidle", timeout=60000) + + # Generate PDF with print styling + page.pdf( + path=str(output_path.resolve()), + format=page_format, + margin=margins, + print_background=True, + ) + + browser.close() + + if _is_in_async_context(): + # Running in Jupyter or other async context - use thread pool + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(_generate_pdf) + future.result() # Wait for completion and raise any exceptions + else: + # Normal sync context - run directly + _generate_pdf() + + def _validate_output_path(output_path: Path, base_dir: Path | None = None) -> Path: """Validate output path is within allowed directory. @@ -97,6 +155,7 @@ def generate_pdf_report( output_path: str | Path, include_charts: bool = True, metric_config: "MetricDisplayConfig | None" = None, + results: "AuditResults | None" = None, ) -> Path: """ Generate a formal PDF audit report. @@ -114,6 +173,8 @@ def generate_pdf_report( include_charts: If True, embed charts metric_config: MetricDisplayConfig controlling which metrics to display. If None, defaults to RECOMMENDED metrics only. + results: Full AuditResults object for chart generation. If None, charts + will be limited or unavailable. Returns: Path to generated PDF file @@ -123,7 +184,7 @@ def generate_pdf_report( Run: pip install playwright && playwright install chromium """ try: - from playwright.sync_api import sync_playwright + from playwright.sync_api import sync_playwright # noqa: F401 except ImportError as err: raise ImportError( "Playwright is required for PDF generation. Install with: " @@ -134,25 +195,10 @@ def generate_pdf_report( output_path.parent.mkdir(parents=True, exist_ok=True) # Generate HTML content - html_content = _generate_report_html(summary, include_charts) - - # Use Playwright to render HTML to PDF - with sync_playwright() as p: - browser = p.chromium.launch() - page = browser.new_page() - - # Load HTML content with timeout protection (60s for complex reports) - page.set_content(html_content, wait_until="networkidle", timeout=60000) - - # Generate PDF with print styling - page.pdf( - path=str(output_path.resolve()), - format="Letter", - margin={"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"}, - print_background=True, - ) + html_content = _generate_report_html(summary, include_charts, results=results) - browser.close() + # Use Playwright to render HTML to PDF (handles Jupyter/async context) + _run_playwright_pdf_generation(html_content, output_path) return output_path @@ -298,7 +344,7 @@ def _generate_full_report_html(results: "AuditResults") -> str: /* Scientific Publication Style - Large, Clear, Readable */ body {{ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; - font-size: {TYPOGRAPHY["body_size"]}px; /* 18px - publication readable */ + font-size: 16px; color: var(--text-color); background-color: var(--bg-color); line-height: 1.6; @@ -318,10 +364,10 @@ def _generate_full_report_html(results: "AuditResults") -> str: margin-top: 0; }} - /* Publication-style large headers */ - h1 {{ font-size: {TYPOGRAPHY["heading_size"]}px; margin-bottom: 12px; }} /* 40px */ - h2 {{ font-size: {TYPOGRAPHY["subheading_size"]}px; margin-top: 40px; border-bottom: 2px solid var(--primary-color); padding-bottom: 10px; }} /* 32px */ - h3 {{ font-size: {TYPOGRAPHY["h3_size"]}px; margin-top: 28px; color: var(--secondary-color); }} /* 28px */ + /* Publication-style large headers - fixed sizes for HTML readability */ + h1 {{ font-size: 32px; margin-bottom: 12px; }} + h2 {{ font-size: 24px; margin-top: 40px; border-bottom: 2px solid var(--primary-color); padding-bottom: 10px; }} + h3 {{ font-size: 20px; margin-top: 28px; color: var(--secondary-color); }} .header {{ background: white; @@ -332,14 +378,14 @@ def _generate_full_report_html(results: "AuditResults") -> str: }} /* Publication readable metadata */ - .metadata {{ color: #666; font-size: {TYPOGRAPHY["label_size"]}px; }} /* 18px */ + .metadata {{ color: #666; font-size: 14px; }} .status-badge {{ display: inline-block; padding: 14px 28px; border-radius: 6px; font-weight: 700; - font-size: {TYPOGRAPHY["h3_size"]}px; /* 28px - prominent */ + font-size: 18px; color: white; background-color: {status_color}; margin: 16px 0; @@ -371,12 +417,12 @@ def _generate_full_report_html(results: "AuditResults") -> str: /* Large scorecard numbers */ .scorecard-value {{ - font-size: {TYPOGRAPHY["heading_size"]}px; /* 40px - prominent */ + font-size: 36px; font-weight: 700; }} .scorecard-label {{ - font-size: {TYPOGRAPHY["label_size"]}px; /* 18px - readable */ + font-size: 14px; color: #666; text-transform: uppercase; letter-spacing: 0.5px; @@ -391,7 +437,7 @@ def _generate_full_report_html(results: "AuditResults") -> str: width: 100%; border-collapse: collapse; margin: 20px 0; - font-size: {TYPOGRAPHY["label_size"]}px; /* 18px - readable */ + font-size: 15px; }} th, td {{ @@ -403,7 +449,7 @@ def _generate_full_report_html(results: "AuditResults") -> str: th {{ background: var(--bg-color); font-weight: 600; - font-size: {TYPOGRAPHY["label_size"]}px; /* 18px */ + font-size: 15px; color: var(--secondary-color); }} @@ -460,13 +506,13 @@ def _generate_full_report_html(results: "AuditResults") -> str: /* Large metric values */ .metric-value {{ - font-size: {TYPOGRAPHY["subheading_size"]}px; /* 32px - prominent */ + font-size: 28px; font-weight: 700; color: var(--primary-color); }} .metric-label {{ - font-size: {TYPOGRAPHY["label_size"]}px; /* 18px - readable */ + font-size: 14px; color: #666; }} @@ -476,7 +522,7 @@ def _generate_full_report_html(results: "AuditResults") -> str: text-align: center; border-radius: 6px; color: #666; - font-size: {TYPOGRAPHY["body_size"]}px; + font-size: 16px; }} /* Responsive chart grid - single column on tablets/mobile */ @@ -913,7 +959,13 @@ def _generate_subgroup_section(results: "AuditResults") -> str: if not isinstance(attr_data, dict): continue - for group_name, group_data in attr_data.items(): + # Extract groups from nested structure + groups_data = attr_data.get("groups", attr_data) + + for group_name, group_data in groups_data.items(): + # Skip metadata keys + if group_name in ("attribute", "threshold", "reference", "disparities"): + continue if not isinstance(group_data, dict) or "error" in group_data: continue @@ -1025,11 +1077,62 @@ def _generate_subgroup_section(results: "AuditResults") -> str: def _generate_fairness_section(results: "AuditResults") -> str: - """Generate Section 5: Fairness Assessment.""" + """Generate Section 5: Fairness Assessment with metric-specific content.""" + from faircareai.core.config import FairnessMetric + config = results.config metric = config.primary_fairness_metric justification = config.fairness_justification or "Not provided" + # Metric-specific descriptions and what to look for + metric_info = { + FairnessMetric.DEMOGRAPHIC_PARITY: { + "name": "Demographic Parity", + "description": "Equal selection rates across groups regardless of true outcomes.", + "what_to_look_for": "Selection rate differences should be small. Large differences mean some groups are selected more/less often.", + "key_metric": "selection_rate_diff", + "threshold_note": "Differences < 0.10 (10%) are typically acceptable.", + }, + FairnessMetric.EQUALIZED_ODDS: { + "name": "Equalized Odds", + "description": "Equal true positive rates AND false positive rates across groups.", + "what_to_look_for": "Both TPR and FPR differences should be small. This ensures equal benefit AND equal burden across groups.", + "key_metric": "equalized_odds", + "threshold_note": "Max(TPR diff, FPR diff) < 0.10 is typically acceptable.", + }, + FairnessMetric.EQUAL_OPPORTUNITY: { + "name": "Equal Opportunity", + "description": "Equal true positive rates across groups (focuses on benefit, not burden).", + "what_to_look_for": "TPR differences should be small. This ensures all groups with the condition are equally likely to be identified.", + "key_metric": "equal_opportunity", + "threshold_note": "TPR differences < 0.10 are typically acceptable.", + }, + FairnessMetric.PREDICTIVE_PARITY: { + "name": "Predictive Parity", + "description": "Equal positive predictive value (PPV) across groups.", + "what_to_look_for": "PPV differences should be small. A positive prediction should mean the same thing for all groups.", + "key_metric": "ppv_diff", + "threshold_note": "PPV differences < 0.10 are typically acceptable.", + }, + FairnessMetric.CALIBRATION: { + "name": "Calibration", + "description": "Predicted probabilities match actual outcomes equally across groups.", + "what_to_look_for": "Calibration error differences should be small. A 30% prediction should mean 30% risk for all groups.", + "key_metric": "calibration_diff", + "threshold_note": "Calibration differences < 0.05 are typically acceptable.", + }, + } + + # Get info for selected metric + selected_info = metric_info.get(metric, { + "name": "Not Specified", + "description": "No primary fairness metric selected.", + "what_to_look_for": "Review all metrics below.", + "key_metric": None, + "threshold_note": "Differences < 0.10 are typically acceptable.", + }) + + # Build table rows with all metrics, highlighting the primary one fairness_rows = "" for attr_name, attr_data in results.fairness_metrics.items(): if not isinstance(attr_data, dict): @@ -1037,69 +1140,104 @@ def _generate_fairness_section(results: "AuditResults") -> str: summary = attr_data.get("summary", {}) - # Equal opportunity + # Demographic Parity (selection rate) + dp = summary.get("demographic_parity", {}) + dp_diff = dp.get("worst_diff", 0) if dp else 0 + dp_pass = dp.get("within_threshold", True) if dp else True + + # Equal Opportunity (TPR) eo = summary.get("equal_opportunity", {}) eo_diff = eo.get("worst_diff", 0) if eo else 0 eo_pass = eo.get("within_threshold", True) if eo else True - eo_status = "PASS" if eo_pass else "FLAG" - eo_class = "pass" if eo_pass else "fail" - # Equalized odds + # Equalized Odds (TPR + FPR) eq = summary.get("equalized_odds", {}) eq_diff = eq.get("worst_diff", 0) if eq else 0 eq_pass = eq.get("within_threshold", True) if eq else True - eq_status = "PASS" if eq_pass else "FLAG" - eq_class = "pass" if eq_pass else "fail" + + # Predictive Parity (PPV) + pp = summary.get("predictive_parity", {}) + pp_diff = pp.get("worst_diff", 0) if pp else 0 + pp_pass = pp.get("within_threshold", True) if pp else True + + # Calibration + cal = summary.get("calibration", {}) + cal_diff = cal.get("worst_diff", 0) if cal else 0 + cal_pass = cal.get("within_threshold", True) if cal else True + + # Helper to format cell with highlighting for primary metric + def format_cell(value: float, passed: bool, is_primary: bool) -> str: + status = "PASS" if passed else "FLAG" + status_class = "pass" if passed else "fail" + highlight = ' style="background: #e8f4f8; font-weight: bold;"' if is_primary else "" + return f'{abs(value):.3f}{status}' + + # Determine which metric is primary for this row + is_dp_primary = metric == FairnessMetric.DEMOGRAPHIC_PARITY + is_eo_primary = metric == FairnessMetric.EQUAL_OPPORTUNITY + is_eq_primary = metric == FairnessMetric.EQUALIZED_ODDS + is_pp_primary = metric == FairnessMetric.PREDICTIVE_PARITY + is_cal_primary = metric == FairnessMetric.CALIBRATION fairness_rows += f''' {attr_name} - {abs(eo_diff):.3f} - {eo_status} - {eq_diff:.3f} - {eq_status} + {format_cell(dp_diff, dp_pass, is_dp_primary)} + {format_cell(eo_diff, eo_pass, is_eo_primary)} + {format_cell(eq_diff, eq_pass, is_eq_primary)} + {format_cell(pp_diff, pp_pass, is_pp_primary)} + {format_cell(cal_diff, cal_pass, is_cal_primary)} ''' + # Primary metric badge color + metric_color = "#0072B2" if metric else "#666" + return f"""

Section 5: Fairness Assessment

-

Selected Fairness Metric

-

Primary Metric: {metric.value if metric else "Not specified"}

-

Justification: {justification}

+
+

Primary Fairness Metric: {selected_info["name"]}

+

Definition: {selected_info["description"]}

+

What to look for: {selected_info["what_to_look_for"]}

+

Threshold: {selected_info["threshold_note"]}

+

Justification: {justification}

+
-

Fairness Metrics by Attribute

-

- What to look for: Differences less than 0.10 (10 percentage points) are typically acceptable. - Larger differences may indicate the model treats groups differently. +

All Fairness Metrics by Attribute

+

+ Your selected metric is highlighted in blue. Other metrics shown for completeness.

- +
+
- - - - + + + + + {fairness_rows}
AttributeTPR Difference
(Equal Opportunity)
StatusEqualized Odds Diff
(TPR + FPR)
StatusDemographic Parity
Selection Rate Diff
Equal Opportunity
TPR Diff
Equalized Odds
Max(TPR, FPR) Diff
Predictive Parity
PPV Diff
Calibration
Cal Error Diff
+
-

Understanding Fairness Metrics:

-
    -
  • TPR Difference (Equal Opportunity): Do all groups have similar rates of correctly identified cases? - Large differences mean the model "misses" more cases in certain groups.
  • -
  • Equalized Odds: Combines both true positive rate and false positive rate differences. - Measures overall fairness in both detecting cases and avoiding false alarms.
  • -
  • Impossibility Theorem: When base rates (prevalence) differ between groups, - no model can satisfy all fairness criteria simultaneously. Trade-offs are necessary.
  • -
  • Threshold: Differences <0.10 are generally acceptable in healthcare AI.
  • +

    Why Your Metric Choice Matters:

    +

    The impossibility theorem proves that when base rates differ between groups, + no model can satisfy all fairness criteria simultaneously. Your choice reflects your values:

    +
      +
    • Demographic Parity: Prioritizes equal selection rates (good for resource allocation)
    • +
    • Equal Opportunity: Prioritizes equal detection of true cases (good for screening)
    • +
    • Equalized Odds: Balances detection AND false alarms (good for interventions)
    • +
    • Predictive Parity: Prioritizes equal meaning of positive predictions
    • +
    • Calibration: Prioritizes accurate risk communication across groups
@@ -1173,8 +1311,15 @@ def _generate_governance_section(results: "AuditResults") -> str: def _generate_report_html( summary: AuditSummary, include_charts: bool = True, + results: "AuditResults | None" = None, ) -> str: - """Generate the HTML content for the report.""" + """Generate the HTML content for the report. + + Args: + summary: AuditSummary with basic audit info. + include_charts: If True, generate charts. + results: Full AuditResults for chart generation. If None, charts will be limited. + """ from faircareai.visualization.tables import create_plain_language_summary @@ -1199,20 +1344,43 @@ def _generate_report_html( summary.worst_disparity_value, ) - # Generate charts as SVG if requested + # Generate charts if requested charts_html = "" - if include_charts and summary.metrics_df is not None: - try: - from faircareai.visualization.altair_plots import create_forest_plot_static - - chart = create_forest_plot_static(summary.metrics_df, metric="tpr") - charts_html = f'
{chart.to_html()}
' - except (ValueError, TypeError, KeyError) as e: - logger.warning("Forest plot generation failed: %s", e) - charts_html = '

Charts could not be generated.

' - except ImportError as e: - logger.error("Chart library not available: %s", e) - charts_html = '

Chart library missing. Install with: pip install \'faircareai[viz]\'

' + if include_charts: + if results is not None: + # Use full AuditResults for comprehensive charts + try: + overall_html = _render_governance_overall_figures(results) + subgroup_html = _render_governance_subgroup_figures(results) + charts_html = f""" +
+

Overall Performance

+ {overall_html} +

Subgroup Performance

+ {subgroup_html} +
+ """ + except (ValueError, TypeError, KeyError) as e: + logger.warning("Chart generation failed: %s", e) + charts_html = f'

Charts could not be generated: {html.escape(str(e))}

' + except ImportError as e: + logger.error("Chart library not available: %s", e) + charts_html = '

Chart library missing. Install with: pip install \'faircareai[viz]\'

' + elif summary.metrics_df is not None and len(summary.metrics_df) > 0: + # Fall back to forest plot from metrics_df + try: + from faircareai.visualization.altair_plots import create_forest_plot_static + + chart = create_forest_plot_static(summary.metrics_df, metric="tpr") + charts_html = f'
{chart.to_html()}
' + except (ValueError, TypeError, KeyError) as e: + logger.warning("Forest plot generation failed: %s", e) + charts_html = '

Charts could not be generated.

' + except ImportError as e: + logger.error("Chart library not available: %s", e) + charts_html = '

Chart library missing. Install with: pip install \'faircareai[viz]\'

' + else: + charts_html = '

No chart data available.

' html = f""" @@ -1237,7 +1405,7 @@ def _generate_report_html( body {{ font-family: {TYPOGRAPHY["data_font"]}; - font-size: {TYPOGRAPHY["body_size"]}px; + font-size: 16px; color: var(--text-color); background-color: var(--bg-color); line-height: 1.6; @@ -1253,12 +1421,12 @@ def _generate_report_html( }} h1 {{ - font-size: 28px; + font-size: 32px; margin-bottom: 8px; }} h2 {{ - font-size: 22px; + font-size: 24px; margin-top: 40px; border-bottom: 2px solid var(--text-color); padding-bottom: 8px; @@ -1342,7 +1510,19 @@ def _generate_report_html( padding: 20px; }} }} + + .charts-section {{ + margin: 30px 0; + }} + + .charts-section h3 {{ + font-size: 20px; + margin-top: 30px; + margin-bottom: 15px; + color: #2c5282; + }} +
@@ -1677,7 +1857,7 @@ def generate_governance_pdf_report( Run: pip install playwright && playwright install chromium """ try: - from playwright.sync_api import sync_playwright + from playwright.sync_api import sync_playwright # noqa: F401 except ImportError as err: raise ImportError( "Playwright is required for PDF generation. Install with: " @@ -1696,23 +1876,8 @@ def generate_governance_pdf_report( '', ) - # Use Playwright to render HTML to PDF - with sync_playwright() as p: - browser = p.chromium.launch() - page = browser.new_page() - - # Load HTML content with timeout protection (60s for complex reports) - page.set_content(html_content, wait_until="networkidle", timeout=60000) - - # Generate PDF with print styling - page.pdf( - path=str(output_path.resolve()), - format="Letter", - margin={"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"}, - print_background=True, - ) - - browser.close() + # Use Playwright to render HTML to PDF (handles Jupyter/async context) + _run_playwright_pdf_generation(html_content, output_path) return output_path @@ -1805,6 +1970,22 @@ def _generate_governance_html(results: "AuditResults") -> str: worst_metric = gov.get("worst_disparity_metric", "metric") worst_group = gov.get("worst_disparity_group", "group") + # Primary fairness metric information + from faircareai.core.config import FairnessMetric + + primary_metric = results.config.primary_fairness_metric + metric_descriptions = { + FairnessMetric.DEMOGRAPHIC_PARITY: ("Demographic Parity", "Equal selection rates across groups"), + FairnessMetric.EQUALIZED_ODDS: ("Equalized Odds", "Equal TPR and FPR across groups"), + FairnessMetric.EQUAL_OPPORTUNITY: ("Equal Opportunity", "Equal detection rates (TPR) across groups"), + FairnessMetric.PREDICTIVE_PARITY: ("Predictive Parity", "Equal positive predictive values across groups"), + FairnessMetric.CALIBRATION: ("Calibration", "Equal calibration accuracy across groups"), + } + metric_name, metric_desc = metric_descriptions.get( + primary_metric, ("Not Specified", "No primary fairness metric was selected") + ) + metric_justification = results.config.fairness_justification or "Not provided" + html = f""" @@ -2138,8 +2319,16 @@ def _generate_governance_html(results: "AuditResults") -> str:

2. Where Do Disparities Exist?

+ +
+

Selected Fairness Metric: {metric_name}

+

Definition: {metric_desc}

+

Justification: {metric_justification}

+
+

- Performance varies across demographic groups. Below shows how the model performs for each population. + Performance varies across demographic groups. Charts corresponding to your selected metric are + highlighted in blue.

From 6c07857c940a88567a470ab5f159cd49ca92fbc1 Mon Sep 17 00:00:00 2001 From: Jonathan Tsai Date: Fri, 16 Jan 2026 09:15:16 -0600 Subject: [PATCH 4/4] updated font size and made section 5 tailor to selected metric --- .../visualization/governance_dashboard.py | 87 ++++++++++++++++--- .../visualization/performance_charts.py | 20 +++-- 2 files changed, 87 insertions(+), 20 deletions(-) diff --git a/src/faircareai/visualization/governance_dashboard.py b/src/faircareai/visualization/governance_dashboard.py index e74ee9d..5317fea 100644 --- a/src/faircareai/visualization/governance_dashboard.py +++ b/src/faircareai/visualization/governance_dashboard.py @@ -244,7 +244,8 @@ def create_executive_summary(results: "AuditResults") -> go.Figure: title=dict( text=f"Governance Review: {results.config.model_name}
" f"Version {results.config.model_version}", - x=0.5, + x=0, + xanchor="left", font=dict(size=18), ), height=800, @@ -424,7 +425,8 @@ def create_go_nogo_scorecard(results: "AuditResults") -> go.Figure: f"{overall}
" f"{n_pass} Pass | {n_warn} Near | {n_fail} Outside" ), - x=0.5, + x=0, + xanchor="left", font=dict(size=16), ), height=500, @@ -521,7 +523,12 @@ def _build_checklist(results: "AuditResults") -> list[dict]: # Check for small subgroups subgroup_ok = True for _attr_name, attr_data in results.subgroup_performance.items(): - for _group_name, group_data in attr_data.items(): + # Extract groups from nested structure + groups_data = attr_data.get("groups", attr_data) if isinstance(attr_data, dict) else {} + for group_name, group_data in groups_data.items(): + # Skip metadata keys + if group_name in ("attribute", "threshold", "reference", "disparities"): + continue if isinstance(group_data, dict): n = group_data.get("n", 0) if n < 30: @@ -589,7 +596,13 @@ def create_fairness_dashboard(results: "AuditResults") -> go.Figure: if not isinstance(attr_data, dict): continue - for group_name, group_data in attr_data.items(): + # Extract groups from nested structure + groups_data = attr_data.get("groups", attr_data) + + for group_name, group_data in groups_data.items(): + # Skip metadata keys + if group_name in ("attribute", "threshold", "reference", "disparities"): + continue if not isinstance(group_data, dict) or "error" in group_data: continue @@ -768,7 +781,8 @@ def create_fairness_dashboard(results: "AuditResults") -> go.Figure: fig.update_layout( title=dict( text=f"Fairness Dashboard: {results.config.model_name}", - x=0.5, + x=0, + xanchor="left", font=dict(size=16), ), height=1000, # Taller for more spacing @@ -821,13 +835,19 @@ def plot_subgroup_comparison( if not isinstance(attr_data, dict): continue + # Extract groups from nested structure + groups_data = attr_data.get("groups", attr_data) + groups = [] values = [] errors_low = [] errors_high = [] colors = [] - for group_name, group_data in attr_data.items(): + for group_name, group_data in groups_data.items(): + # Skip metadata keys + if group_name in ("attribute", "threshold", "reference", "disparities"): + continue if not isinstance(group_data, dict) or "error" in group_data: continue @@ -886,7 +906,8 @@ def plot_subgroup_comparison( fig.update_layout( title=dict( text=f"Subgroup {metric_labels.get(metric, metric)} Comparison", - x=0.5, + x=0, + xanchor="left", ), xaxis_title="Subgroup", yaxis_title=metric_labels.get(metric, metric), @@ -1157,7 +1178,10 @@ def create_governance_overall_figures(results: "AuditResults") -> dict[str, Any] return figures -def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dict[str, go.Figure]]: +def create_governance_subgroup_figures( + results: "AuditResults", + primary_metric: "FairnessMetric | None" = None, +) -> dict[str, dict[str, go.Figure]]: """Create subgroup performance figures for governance report. For each sensitive attribute, generates 4 figures (Van Calster 4): @@ -1167,13 +1191,31 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic 4. Selection Rate by Subgroup - Demographic parity check Each figure includes plain language explanations per the governance spec. + Charts corresponding to the primary_metric are visually highlighted. Args: results: AuditResults from FairCareAudit.run(). + primary_metric: The primary fairness metric to highlight. If None, + uses results.config.primary_fairness_metric. Returns: Dict mapping attribute name to dict of figure title -> Plotly Figure. """ + from faircareai.core.config import FairnessMetric + + # Get primary metric from results if not provided + if primary_metric is None: + primary_metric = getattr(results.config, "primary_fairness_metric", None) + + # Map fairness metrics to chart keys for highlighting + metric_to_chart = { + FairnessMetric.DEMOGRAPHIC_PARITY: "Selection Rate by Subgroup", + FairnessMetric.EQUAL_OPPORTUNITY: "Sensitivity by Subgroup", + FairnessMetric.EQUALIZED_ODDS: "Sensitivity by Subgroup", # TPR is part of EO + FairnessMetric.PREDICTIVE_PARITY: None, # PPV not shown in standard charts + FairnessMetric.CALIBRATION: None, # Calibration not shown in standard charts + } + primary_chart_key = metric_to_chart.get(primary_metric) if primary_metric else None # Plain language explanations for Van Calster 4 visualizations SUBGROUP_EXPLANATIONS = { "auroc": ( @@ -1247,6 +1289,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic for ref in is_reference ] + # Determine which charts correspond to the primary metric + is_tpr_primary = primary_metric in (FairnessMetric.EQUAL_OPPORTUNITY, FairnessMetric.EQUALIZED_ODDS) + is_fpr_primary = primary_metric == FairnessMetric.EQUALIZED_ODDS + is_selection_primary = primary_metric == FairnessMetric.DEMOGRAPHIC_PARITY + # 1. AUROC by Subgroup fig_auroc = _create_subgroup_bar_chart( groups, @@ -1259,10 +1306,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic explanation=SUBGROUP_EXPLANATIONS["auroc"], y_axis_title="AUROC (Model Accuracy Score)", x_axis_title="Demographic Group", + is_primary_metric=False, # AUROC not directly a fairness metric ) figures["AUROC by Subgroup"] = fig_auroc - # 2. TPR (Sensitivity) by Subgroup + # 2. TPR (Sensitivity) by Subgroup - Equal Opportunity / Equalized Odds fig_tpr = _create_subgroup_bar_chart( groups, [v * 100 for v in tpr_vals], @@ -1274,10 +1322,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic explanation=SUBGROUP_EXPLANATIONS["sensitivity"], y_axis_title="True Positive Rate (%)", x_axis_title="Demographic Group", + is_primary_metric=is_tpr_primary, ) figures["Sensitivity by Subgroup"] = fig_tpr - # 3. FPR by Subgroup + # 3. FPR by Subgroup - Equalized Odds fig_fpr = _create_subgroup_bar_chart( groups, [v * 100 for v in fpr_vals], @@ -1289,10 +1338,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic explanation=SUBGROUP_EXPLANATIONS["fpr"], y_axis_title="False Positive Rate (%)", x_axis_title="Demographic Group", + is_primary_metric=is_fpr_primary, ) figures["FPR by Subgroup"] = fig_fpr - # 4. Selection Rate by Subgroup + # 4. Selection Rate by Subgroup - Demographic Parity fig_sel = _create_subgroup_bar_chart( groups, [v * 100 for v in selection_vals], @@ -1304,6 +1354,7 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic explanation=SUBGROUP_EXPLANATIONS["selection"], y_axis_title="Selection Rate (% flagged)", x_axis_title="Demographic Group", + is_primary_metric=is_selection_primary, ) figures["Selection Rate by Subgroup"] = fig_sel @@ -1324,6 +1375,7 @@ def _create_subgroup_bar_chart( explanation: str = "", y_axis_title: str = "Value", x_axis_title: str = "Group", + is_primary_metric: bool = False, ) -> go.Figure: """Create a simplified bar chart for subgroup comparison. @@ -1339,6 +1391,8 @@ def _create_subgroup_bar_chart( explanation: Plain language explanation for non-technical audiences. y_axis_title: Descriptive label for Y-axis. x_axis_title: Descriptive label for X-axis. + is_primary_metric: If True, adds visual highlighting to indicate + this chart corresponds to the selected primary fairness metric. Returns: Plotly Figure. @@ -1376,8 +1430,16 @@ def _create_subgroup_bar_chart( # No in-chart annotations - they overlap with labels # Explanation text will be added via HTML wrapper in generator.py + # Add visual highlighting for primary metric + if is_primary_metric: + title_text = f"{title}
★ YOUR SELECTED FAIRNESS METRIC" + plot_bgcolor = "rgba(0, 114, 178, 0.05)" # Light blue background + else: + title_text = f"{title}" + plot_bgcolor = "white" + fig.update_layout( - title=dict(text=f"{title}", font=dict(size=16)), + title=dict(text=title_text, font=dict(size=16), x=0, xanchor="left"), xaxis=dict( title=x_axis_title, tickfont={"size": 11}, @@ -1395,6 +1457,7 @@ def _create_subgroup_bar_chart( height=380, # Good height for chart margin=dict(l=80, r=40, t=100, b=160), # Top: long titles, bottom: rotated labels showlegend=False, + plot_bgcolor=plot_bgcolor, ) return fig diff --git a/src/faircareai/visualization/performance_charts.py b/src/faircareai/visualization/performance_charts.py index 83754d3..0d3fdf3 100644 --- a/src/faircareai/visualization/performance_charts.py +++ b/src/faircareai/visualization/performance_charts.py @@ -223,7 +223,8 @@ def plot_discrimination_curves( fig.update_layout( title=dict( text=f"Model Discrimination: AUROC = {auroc:.3f} {auroc_ci}", - x=0.5, + x=0, + xanchor="left", ), height=450, showlegend=True, @@ -346,7 +347,7 @@ def plot_calibration_curve( # Apply theme fig = apply_faircareai_theme(fig) fig.update_layout( - title=dict(text=title_text, x=0.5), + title=dict(text=title_text, x=0, xanchor="left"), xaxis_title=x_axis_title, yaxis_title=y_axis_title, height=500, @@ -398,7 +399,7 @@ def plot_threshold_analysis( ) fig = apply_faircareai_theme(fig) fig.update_layout( - title=dict(text="Threshold Selection Impact", x=0.5), + title=dict(text="Threshold Selection Impact", x=0, xanchor="left"), height=300, ) return fig @@ -504,7 +505,7 @@ def plot_threshold_analysis( # Apply theme fig = apply_faircareai_theme(fig) fig.update_layout( - title=dict(text="Threshold Selection Impact", x=0.5), + title=dict(text="Threshold Selection Impact", x=0, xanchor="left"), height=600, showlegend=True, legend=LEGEND_POSITIONS["top_horizontal"], @@ -620,7 +621,8 @@ def plot_decision_curve( fig.update_layout( title=dict( text=title_text, - x=0.5, + x=0, + xanchor="left", ), xaxis_title=x_axis_title, yaxis_title=y_axis_title, @@ -715,7 +717,7 @@ def plot_confusion_matrix(results: AuditResults) -> go.Figure: # Apply theme fig = apply_faircareai_theme(fig) fig.update_layout( - title=dict(text=f"Confusion Matrix at Threshold = {threshold:.2f}", x=0.5), + title=dict(text=f"Confusion Matrix at Threshold = {threshold:.2f}", x=0, xanchor="left"), xaxis_title="Predicted", yaxis_title="Actual", height=400, @@ -892,7 +894,8 @@ def plot_performance_summary( fig.update_layout( title=dict( text=f"Model Performance Summary - {results.config.model_name}", - x=0.5, + x=0, + xanchor="left", ), height=600, showlegend=False, @@ -909,7 +912,8 @@ def plot_performance_summary( fig.update_layout( title=dict( text=f"Model Discrimination (AUROC) - {results.config.model_name}", - x=0.5, + x=0, + xanchor="left", ), height=350, showlegend=False,