From 67e31618b4432d9a0aa4bdd75f980cc9b9ae392c Mon Sep 17 00:00:00 2001
From: Jonathan Tsai <cctsai97@gmail.com>
Date: Fri, 16 Jan 2026 09:12:20 -0600
Subject: [PATCH 1/4] added date in html, and passed full result for charts

---
 src/faircareai/core/results.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/faircareai/core/results.py b/src/faircareai/core/results.py
index 157a3b3..40d7001 100644
--- a/src/faircareai/core/results.py
+++ b/src/faircareai/core/results.py
@@ -10,6 +10,7 @@
 
 import json
 from dataclasses import dataclass, field
+from datetime import date
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -429,9 +430,9 @@ def to_pdf(
             return generate_governance_pdf_report(self, path, metric_config=metric_config)
         else:
             metric_config = MetricDisplayConfig.data_scientist(include_optional=include_optional)
-            # Convert AuditResults to AuditSummary for generator
+            # Convert AuditResults to AuditSummary for generator, but also pass full results for charts
             summary = self._to_audit_summary()
-            return generate_pdf_report(summary, path, metric_config=metric_config)
+            return generate_pdf_report(summary, path, metric_config=metric_config, results=self)
 
     def to_pptx(
         self,
@@ -570,7 +571,7 @@ def _to_audit_summary(self) -> "AuditSummary":
 
         return AuditSummary(
             model_name=self.config.model_name,
-            audit_date=self.config.report_date or "",
+            audit_date=self.config.report_date or date.today().isoformat(),
             n_samples=self.descriptive_stats.get("cohort_overview", {}).get("n_total", 0),
             n_groups=n_groups,
             threshold=self.threshold,

From 6a8c89cb9259208fc7ffff460e2a7532b5d759ca Mon Sep 17 00:00:00 2001
From: Jonathan Tsai <cctsai97@gmail.com>
Date: Fri, 16 Jan 2026 09:13:28 -0600
Subject: [PATCH 2/4] added computation of predictive parity and calibration

---
 src/faircareai/metrics/fairness.py | 41 +++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/faircareai/metrics/fairness.py b/src/faircareai/metrics/fairness.py
index ecc7c90..f6b8957 100644
--- a/src/faircareai/metrics/fairness.py
+++ b/src/faircareai/metrics/fairness.py
@@ -160,6 +160,7 @@ def compute_fairness_metrics(
     results["fpr_diff"] = {}
     results["equalized_odds_diff"] = {}
     results["ppv_ratio"] = {}
+    results["ppv_diff"] = {}
     results["calibration_diff"] = {}
 
     ref_selection = ref_metrics.get("selection_rate", 0)
@@ -197,12 +198,13 @@ def compute_fairness_metrics(
             max(abs(tpr - ref_tpr), abs(fpr - ref_fpr))
         )
 
-        # Predictive parity (PPV ratio)
+        # Predictive parity (PPV ratio and difference)
         ppv = group_data.get("ppv", 0)
         if ref_ppv > 0:
             results["ppv_ratio"][str(group)] = float(ppv / ref_ppv)
         else:
             results["ppv_ratio"][str(group)] = None
+        results["ppv_diff"][str(group)] = float(ppv - ref_ppv)
 
         # Calibration difference
         cal = group_data.get("mean_calibration_error", 0)
@@ -252,15 +254,40 @@ def _compute_fairness_summary(metrics: dict) -> dict[str, Any]:
             "within_threshold": worst_eo <= EQUALIZED_ODDS_THRESHOLD,
         }
 
-    # Predictive parity - filter None values (occur when reference PPV is 0)
+    # Predictive parity - use PPV difference for consistency with other metrics
+    ppv_diffs = list(metrics.get("ppv_diff", {}).values())
     ppv_ratios_raw = list(metrics.get("ppv_ratio", {}).values())
     ppv_ratios = [r for r in ppv_ratios_raw if r is not None]
-    if ppv_ratios:
-        min_ppv = min(ppv_ratios)
-        worst_ppv = min_ppv if min_ppv < 1 else max(ppv_ratios)
+    if ppv_diffs or ppv_ratios:
+        # Compute worst_diff from ppv_diff if available
+        worst_ppv_diff = max(ppv_diffs, key=abs) if ppv_diffs else None
+        # Compute worst_ratio for backward compatibility
+        worst_ratio = None
+        if ppv_ratios:
+            min_ppv = min(ppv_ratios)
+            worst_ratio = min_ppv if min_ppv < 1 else max(ppv_ratios)
+        # Determine within_threshold based on worst_diff if available, else ratio
+        if worst_ppv_diff is not None:
+            within_threshold = abs(worst_ppv_diff) <= EQUALIZED_ODDS_THRESHOLD
+        elif worst_ratio is not None:
+            within_threshold = DEMOGRAPHIC_PARITY_LOWER <= worst_ratio <= DEMOGRAPHIC_PARITY_UPPER
+        else:
+            within_threshold = True
         summary["predictive_parity"] = {
-            "worst_ratio": float(worst_ppv),
-            "within_threshold": DEMOGRAPHIC_PARITY_LOWER <= worst_ppv <= DEMOGRAPHIC_PARITY_UPPER,
+            "worst_diff": float(worst_ppv_diff) if worst_ppv_diff is not None else 0.0,
+            "worst_ratio": float(worst_ratio) if worst_ratio is not None else None,
+            "within_threshold": within_threshold,
+        }
+
+    # Calibration
+    cal_diffs = list(metrics.get("calibration_diff", {}).values())
+    if cal_diffs:
+        worst_cal = max(cal_diffs, key=abs)
+        # Calibration threshold: difference in mean calibration error should be small
+        # Using 0.05 (5 percentage points) as threshold for clinical significance
+        summary["calibration"] = {
+            "worst_diff": float(worst_cal),
+            "within_threshold": abs(worst_cal) <= 0.05,
         }
 
     return summary

From 906594c4f3f22889f50d30ae826947121e8dbe82 Mon Sep 17 00:00:00 2001
From: Jonathan Tsai <cctsai97@gmail.com>
Date: Fri, 16 Jan 2026 09:14:37 -0600
Subject: [PATCH 3/4] updated font size and made section 5 tailor to selected
 metric

---
 src/faircareai/reports/generator.py | 393 ++++++++++++++++++++--------
 1 file changed, 291 insertions(+), 102 deletions(-)

diff --git a/src/faircareai/reports/generator.py b/src/faircareai/reports/generator.py
index 7f68f6f..b10a9d4 100644
--- a/src/faircareai/reports/generator.py
+++ b/src/faircareai/reports/generator.py
@@ -18,7 +18,9 @@
 Methodology: Van Calster et al. (2025), CHAI RAIC Checkpoint 1.
 """
 
+import asyncio
 import html
+from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from datetime import date
 from pathlib import Path
@@ -42,6 +44,62 @@
 logger = get_logger(__name__)
 
 
+def _is_in_async_context() -> bool:
+    """Check if we're running inside an asyncio event loop (e.g., Jupyter notebook)."""
+    try:
+        asyncio.get_running_loop()
+        return True
+    except RuntimeError:
+        return False
+
+
+def _run_playwright_pdf_generation(
+    html_content: str,
+    output_path: Path,
+    page_format: str = "Letter",
+    margins: dict | None = None,
+) -> None:
+    """Run Playwright PDF generation, handling async context (Jupyter) safely.
+
+    Args:
+        html_content: HTML string to render to PDF.
+        output_path: Path for output PDF file.
+        page_format: Page format (e.g., "Letter", "A4").
+        margins: Page margins dict with top, right, bottom, left keys.
+    """
+    from playwright.sync_api import sync_playwright
+
+    if margins is None:
+        margins = {"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"}
+
+    def _generate_pdf() -> None:
+        with sync_playwright() as p:
+            browser = p.chromium.launch()
+            page = browser.new_page()
+
+            # Load HTML content with timeout protection (60s for complex reports)
+            page.set_content(html_content, wait_until="networkidle", timeout=60000)
+
+            # Generate PDF with print styling
+            page.pdf(
+                path=str(output_path.resolve()),
+                format=page_format,
+                margin=margins,
+                print_background=True,
+            )
+
+            browser.close()
+
+    if _is_in_async_context():
+        # Running in Jupyter or other async context - use thread pool
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            future = executor.submit(_generate_pdf)
+            future.result()  # Wait for completion and raise any exceptions
+    else:
+        # Normal sync context - run directly
+        _generate_pdf()
+
+
 def _validate_output_path(output_path: Path, base_dir: Path | None = None) -> Path:
     """Validate output path is within allowed directory.
 
@@ -97,6 +155,7 @@ def generate_pdf_report(
     output_path: str | Path,
     include_charts: bool = True,
     metric_config: "MetricDisplayConfig | None" = None,
+    results: "AuditResults | None" = None,
 ) -> Path:
     """
     Generate a formal PDF audit report.
@@ -114,6 +173,8 @@ def generate_pdf_report(
         include_charts: If True, embed charts
         metric_config: MetricDisplayConfig controlling which metrics to display.
             If None, defaults to RECOMMENDED metrics only.
+        results: Full AuditResults object for chart generation. If None, charts
+            will be limited or unavailable.
 
     Returns:
         Path to generated PDF file
@@ -123,7 +184,7 @@ def generate_pdf_report(
             Run: pip install playwright && playwright install chromium
     """
     try:
-        from playwright.sync_api import sync_playwright
+        from playwright.sync_api import sync_playwright  # noqa: F401
     except ImportError as err:
         raise ImportError(
             "Playwright is required for PDF generation. Install with: "
@@ -134,25 +195,10 @@ def generate_pdf_report(
     output_path.parent.mkdir(parents=True, exist_ok=True)
 
     # Generate HTML content
-    html_content = _generate_report_html(summary, include_charts)
-
-    # Use Playwright to render HTML to PDF
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        # Load HTML content with timeout protection (60s for complex reports)
-        page.set_content(html_content, wait_until="networkidle", timeout=60000)
-
-        # Generate PDF with print styling
-        page.pdf(
-            path=str(output_path.resolve()),
-            format="Letter",
-            margin={"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"},
-            print_background=True,
-        )
+    html_content = _generate_report_html(summary, include_charts, results=results)
 
-        browser.close()
+    # Use Playwright to render HTML to PDF (handles Jupyter/async context)
+    _run_playwright_pdf_generation(html_content, output_path)
 
     return output_path
 
@@ -298,7 +344,7 @@ def _generate_full_report_html(results: "AuditResults") -> str:
         /* Scientific Publication Style - Large, Clear, Readable */
         body {{
             font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
-            font-size: {TYPOGRAPHY["body_size"]}px;  /* 18px - publication readable */
+            font-size: 16px;
             color: var(--text-color);
             background-color: var(--bg-color);
             line-height: 1.6;
@@ -318,10 +364,10 @@ def _generate_full_report_html(results: "AuditResults") -> str:
             margin-top: 0;
         }}
 
-        /* Publication-style large headers */
-        h1 {{ font-size: {TYPOGRAPHY["heading_size"]}px; margin-bottom: 12px; }}  /* 40px */
-        h2 {{ font-size: {TYPOGRAPHY["subheading_size"]}px; margin-top: 40px; border-bottom: 2px solid var(--primary-color); padding-bottom: 10px; }}  /* 32px */
-        h3 {{ font-size: {TYPOGRAPHY["h3_size"]}px; margin-top: 28px; color: var(--secondary-color); }}  /* 28px */
+        /* Publication-style large headers - fixed sizes for HTML readability */
+        h1 {{ font-size: 32px; margin-bottom: 12px; }}
+        h2 {{ font-size: 24px; margin-top: 40px; border-bottom: 2px solid var(--primary-color); padding-bottom: 10px; }}
+        h3 {{ font-size: 20px; margin-top: 28px; color: var(--secondary-color); }}
 
         .header {{
             background: white;
@@ -332,14 +378,14 @@ def _generate_full_report_html(results: "AuditResults") -> str:
         }}
 
         /* Publication readable metadata */
-        .metadata {{ color: #666; font-size: {TYPOGRAPHY["label_size"]}px; }}  /* 18px */
+        .metadata {{ color: #666; font-size: 14px; }}
 
         .status-badge {{
             display: inline-block;
             padding: 14px 28px;
             border-radius: 6px;
             font-weight: 700;
-            font-size: {TYPOGRAPHY["h3_size"]}px;  /* 28px - prominent */
+            font-size: 18px;
             color: white;
             background-color: {status_color};
             margin: 16px 0;
@@ -371,12 +417,12 @@ def _generate_full_report_html(results: "AuditResults") -> str:
 
         /* Large scorecard numbers */
         .scorecard-value {{
-            font-size: {TYPOGRAPHY["heading_size"]}px;  /* 40px - prominent */
+            font-size: 36px;
             font-weight: 700;
         }}
 
         .scorecard-label {{
-            font-size: {TYPOGRAPHY["label_size"]}px;  /* 18px - readable */
+            font-size: 14px;
             color: #666;
             text-transform: uppercase;
             letter-spacing: 0.5px;
@@ -391,7 +437,7 @@ def _generate_full_report_html(results: "AuditResults") -> str:
             width: 100%;
             border-collapse: collapse;
             margin: 20px 0;
-            font-size: {TYPOGRAPHY["label_size"]}px;  /* 18px - readable */
+            font-size: 15px;
         }}
 
         th, td {{
@@ -403,7 +449,7 @@ def _generate_full_report_html(results: "AuditResults") -> str:
         th {{
             background: var(--bg-color);
             font-weight: 600;
-            font-size: {TYPOGRAPHY["label_size"]}px;  /* 18px */
+            font-size: 15px;
             color: var(--secondary-color);
         }}
 
@@ -460,13 +506,13 @@ def _generate_full_report_html(results: "AuditResults") -> str:
 
         /* Large metric values */
         .metric-value {{
-            font-size: {TYPOGRAPHY["subheading_size"]}px;  /* 32px - prominent */
+            font-size: 28px;
             font-weight: 700;
             color: var(--primary-color);
         }}
 
         .metric-label {{
-            font-size: {TYPOGRAPHY["label_size"]}px;  /* 18px - readable */
+            font-size: 14px;
             color: #666;
         }}
 
@@ -476,7 +522,7 @@ def _generate_full_report_html(results: "AuditResults") -> str:
             text-align: center;
             border-radius: 6px;
             color: #666;
-            font-size: {TYPOGRAPHY["body_size"]}px;
+            font-size: 16px;
         }}
 
         /* Responsive chart grid - single column on tablets/mobile */
@@ -913,7 +959,13 @@ def _generate_subgroup_section(results: "AuditResults") -> str:
         if not isinstance(attr_data, dict):
             continue
 
-        for group_name, group_data in attr_data.items():
+        # Extract groups from nested structure
+        groups_data = attr_data.get("groups", attr_data)
+
+        for group_name, group_data in groups_data.items():
+            # Skip metadata keys
+            if group_name in ("attribute", "threshold", "reference", "disparities"):
+                continue
             if not isinstance(group_data, dict) or "error" in group_data:
                 continue
 
@@ -1025,11 +1077,62 @@ def _generate_subgroup_section(results: "AuditResults") -> str:
 
 
 def _generate_fairness_section(results: "AuditResults") -> str:
-    """Generate Section 5: Fairness Assessment."""
+    """Generate Section 5: Fairness Assessment with metric-specific content."""
+    from faircareai.core.config import FairnessMetric
+
     config = results.config
     metric = config.primary_fairness_metric
     justification = config.fairness_justification or "Not provided"
 
+    # Metric-specific descriptions and what to look for
+    metric_info = {
+        FairnessMetric.DEMOGRAPHIC_PARITY: {
+            "name": "Demographic Parity",
+            "description": "Equal selection rates across groups regardless of true outcomes.",
+            "what_to_look_for": "Selection rate differences should be small. Large differences mean some groups are selected more/less often.",
+            "key_metric": "selection_rate_diff",
+            "threshold_note": "Differences < 0.10 (10%) are typically acceptable.",
+        },
+        FairnessMetric.EQUALIZED_ODDS: {
+            "name": "Equalized Odds",
+            "description": "Equal true positive rates AND false positive rates across groups.",
+            "what_to_look_for": "Both TPR and FPR differences should be small. This ensures equal benefit AND equal burden across groups.",
+            "key_metric": "equalized_odds",
+            "threshold_note": "Max(TPR diff, FPR diff) < 0.10 is typically acceptable.",
+        },
+        FairnessMetric.EQUAL_OPPORTUNITY: {
+            "name": "Equal Opportunity",
+            "description": "Equal true positive rates across groups (focuses on benefit, not burden).",
+            "what_to_look_for": "TPR differences should be small. This ensures all groups with the condition are equally likely to be identified.",
+            "key_metric": "equal_opportunity",
+            "threshold_note": "TPR differences < 0.10 are typically acceptable.",
+        },
+        FairnessMetric.PREDICTIVE_PARITY: {
+            "name": "Predictive Parity",
+            "description": "Equal positive predictive value (PPV) across groups.",
+            "what_to_look_for": "PPV differences should be small. A positive prediction should mean the same thing for all groups.",
+            "key_metric": "ppv_diff",
+            "threshold_note": "PPV differences < 0.10 are typically acceptable.",
+        },
+        FairnessMetric.CALIBRATION: {
+            "name": "Calibration",
+            "description": "Predicted probabilities match actual outcomes equally across groups.",
+            "what_to_look_for": "Calibration error differences should be small. A 30% prediction should mean 30% risk for all groups.",
+            "key_metric": "calibration_diff",
+            "threshold_note": "Calibration differences < 0.05 are typically acceptable.",
+        },
+    }
+
+    # Get info for selected metric
+    selected_info = metric_info.get(metric, {
+        "name": "Not Specified",
+        "description": "No primary fairness metric selected.",
+        "what_to_look_for": "Review all metrics below.",
+        "key_metric": None,
+        "threshold_note": "Differences < 0.10 are typically acceptable.",
+    })
+
+    # Build table rows with all metrics, highlighting the primary one
     fairness_rows = ""
     for attr_name, attr_data in results.fairness_metrics.items():
         if not isinstance(attr_data, dict):
@@ -1037,69 +1140,104 @@ def _generate_fairness_section(results: "AuditResults") -> str:
 
         summary = attr_data.get("summary", {})
 
-        # Equal opportunity
+        # Demographic Parity (selection rate)
+        dp = summary.get("demographic_parity", {})
+        dp_diff = dp.get("worst_diff", 0) if dp else 0
+        dp_pass = dp.get("within_threshold", True) if dp else True
+
+        # Equal Opportunity (TPR)
         eo = summary.get("equal_opportunity", {})
         eo_diff = eo.get("worst_diff", 0) if eo else 0
         eo_pass = eo.get("within_threshold", True) if eo else True
-        eo_status = "PASS" if eo_pass else "FLAG"
-        eo_class = "pass" if eo_pass else "fail"
 
-        # Equalized odds
+        # Equalized Odds (TPR + FPR)
         eq = summary.get("equalized_odds", {})
         eq_diff = eq.get("worst_diff", 0) if eq else 0
         eq_pass = eq.get("within_threshold", True) if eq else True
-        eq_status = "PASS" if eq_pass else "FLAG"
-        eq_class = "pass" if eq_pass else "fail"
+
+        # Predictive Parity (PPV)
+        pp = summary.get("predictive_parity", {})
+        pp_diff = pp.get("worst_diff", 0) if pp else 0
+        pp_pass = pp.get("within_threshold", True) if pp else True
+
+        # Calibration
+        cal = summary.get("calibration", {})
+        cal_diff = cal.get("worst_diff", 0) if cal else 0
+        cal_pass = cal.get("within_threshold", True) if cal else True
+
+        # Helper to format cell with highlighting for primary metric
+        def format_cell(value: float, passed: bool, is_primary: bool) -> str:
+            status = "PASS" if passed else "FLAG"
+            status_class = "pass" if passed else "fail"
+            highlight = ' style="background: #e8f4f8; font-weight: bold;"' if is_primary else ""
+            return f'<td{highlight}>{abs(value):.3f}</td><td class="{status_class}"{highlight}>{status}</td>'
+
+        # Determine which metric is primary for this row
+        is_dp_primary = metric == FairnessMetric.DEMOGRAPHIC_PARITY
+        is_eo_primary = metric == FairnessMetric.EQUAL_OPPORTUNITY
+        is_eq_primary = metric == FairnessMetric.EQUALIZED_ODDS
+        is_pp_primary = metric == FairnessMetric.PREDICTIVE_PARITY
+        is_cal_primary = metric == FairnessMetric.CALIBRATION
 
         fairness_rows += f'''
         <tr>
             <td>{attr_name}</td>
-            <td>{abs(eo_diff):.3f}</td>
-            <td class="{eo_class}">{eo_status}</td>
-            <td>{eq_diff:.3f}</td>
-            <td class="{eq_class}">{eq_status}</td>
+            {format_cell(dp_diff, dp_pass, is_dp_primary)}
+            {format_cell(eo_diff, eo_pass, is_eo_primary)}
+            {format_cell(eq_diff, eq_pass, is_eq_primary)}
+            {format_cell(pp_diff, pp_pass, is_pp_primary)}
+            {format_cell(cal_diff, cal_pass, is_cal_primary)}
         </tr>
         '''
 
+    # Primary metric badge color
+    metric_color = "#0072B2" if metric else "#666"
+
     return f"""
     <section class="section">
         <h2>Section 5: Fairness Assessment</h2>
 
-        <h3>Selected Fairness Metric</h3>
-        <p><strong>Primary Metric:</strong> {metric.value if metric else "Not specified"}</p>
-        <p><strong>Justification:</strong> {justification}</p>
+        <div style="background: #e8f4f8; border: 2px solid {metric_color}; padding: 20px; margin-bottom: 24px; border-radius: 8px;">
+            <h3 style="margin-top: 0; color: {metric_color};">Primary Fairness Metric: {selected_info["name"]}</h3>
+            <p style="margin: 8px 0;"><strong>Definition:</strong> {selected_info["description"]}</p>
+            <p style="margin: 8px 0;"><strong>What to look for:</strong> {selected_info["what_to_look_for"]}</p>
+            <p style="margin: 8px 0;"><strong>Threshold:</strong> {selected_info["threshold_note"]}</p>
+            <p style="margin: 8px 0 0 0; color: #666;"><strong>Justification:</strong> {justification}</p>
+        </div>
 
-        <h3>Fairness Metrics by Attribute</h3>
-        <p style="color: #666; font-size: 16px; margin-bottom: 16px;">
-            <strong>What to look for:</strong> Differences less than 0.10 (10 percentage points) are typically acceptable.
-            Larger differences may indicate the model treats groups differently.
+        <h3>All Fairness Metrics by Attribute</h3>
+        <p style="color: #666; font-size: 14px; margin-bottom: 16px;">
+            Your selected metric is <strong>highlighted in blue</strong>. Other metrics shown for completeness.
         </p>
 
-        <table>
+        <div style="overflow-x: auto;">
+        <table style="font-size: 14px;">
             <thead>
                 <tr>
                     <th>Attribute</th>
-                    <th>TPR Difference<br><span style="font-weight: normal; font-size: 12px;">(Equal Opportunity)</span></th>
-                    <th>Status</th>
-                    <th>Equalized Odds Diff<br><span style="font-weight: normal; font-size: 12px;">(TPR + FPR)</span></th>
-                    <th>Status</th>
+                    <th colspan="2">Demographic Parity<br><span style="font-weight: normal; font-size: 11px;">Selection Rate Diff</span></th>
+                    <th colspan="2">Equal Opportunity<br><span style="font-weight: normal; font-size: 11px;">TPR Diff</span></th>
+                    <th colspan="2">Equalized Odds<br><span style="font-weight: normal; font-size: 11px;">Max(TPR, FPR) Diff</span></th>
+                    <th colspan="2">Predictive Parity<br><span style="font-weight: normal; font-size: 11px;">PPV Diff</span></th>
+                    <th colspan="2">Calibration<br><span style="font-weight: normal; font-size: 11px;">Cal Error Diff</span></th>
                 </tr>
             </thead>
             <tbody>
                 {fairness_rows}
             </tbody>
         </table>
+        </div>
 
         <div style="margin-top: 20px; padding: 16px; background: #fffdf0; border-left: 4px solid #F0E442; border-radius: 4px;">
-            <h4 style="margin-top: 0; color: #856404;">Understanding Fairness Metrics:</h4>
-            <ul style="margin-bottom: 0;">
-                <li><strong>TPR Difference (Equal Opportunity):</strong> Do all groups have similar rates of correctly identified cases?
-                    Large differences mean the model "misses" more cases in certain groups.</li>
-                <li><strong>Equalized Odds:</strong> Combines both true positive rate and false positive rate differences.
-                    Measures overall fairness in both detecting cases and avoiding false alarms.</li>
-                <li><strong>Impossibility Theorem:</strong> When base rates (prevalence) differ between groups,
-                    no model can satisfy all fairness criteria simultaneously. Trade-offs are necessary.</li>
-                <li><strong>Threshold:</strong> Differences &lt;0.10 are generally acceptable in healthcare AI.</li>
+            <h4 style="margin-top: 0; color: #856404;">Why Your Metric Choice Matters:</h4>
+            <p style="margin-bottom: 8px;">The <strong>impossibility theorem</strong> proves that when base rates differ between groups,
+            no model can satisfy all fairness criteria simultaneously. Your choice reflects your values:</p>
+            <ul style="margin-bottom: 0; font-size: 14px;">
+                <li><strong>Demographic Parity:</strong> Prioritizes equal selection rates (good for resource allocation)</li>
+                <li><strong>Equal Opportunity:</strong> Prioritizes equal detection of true cases (good for screening)</li>
+                <li><strong>Equalized Odds:</strong> Balances detection AND false alarms (good for interventions)</li>
+                <li><strong>Predictive Parity:</strong> Prioritizes equal meaning of positive predictions</li>
+                <li><strong>Calibration:</strong> Prioritizes accurate risk communication across groups</li>
             </ul>
         </div>
     </section>
@@ -1173,8 +1311,15 @@ def _generate_governance_section(results: "AuditResults") -> str:
 def _generate_report_html(
     summary: AuditSummary,
     include_charts: bool = True,
+    results: "AuditResults | None" = None,
 ) -> str:
-    """Generate the HTML content for the report."""
+    """Generate the HTML content for the report.
+
+    Args:
+        summary: AuditSummary with basic audit info.
+        include_charts: If True, generate charts.
+        results: Full AuditResults for chart generation. If None, charts will be limited.
+    """
 
     from faircareai.visualization.tables import create_plain_language_summary
 
@@ -1199,20 +1344,43 @@ def _generate_report_html(
         summary.worst_disparity_value,
     )
 
-    # Generate charts as SVG if requested
+    # Generate charts if requested
     charts_html = ""
-    if include_charts and summary.metrics_df is not None:
-        try:
-            from faircareai.visualization.altair_plots import create_forest_plot_static
-
-            chart = create_forest_plot_static(summary.metrics_df, metric="tpr")
-            charts_html = f'<div class="chart-container">{chart.to_html()}</div>'
-        except (ValueError, TypeError, KeyError) as e:
-            logger.warning("Forest plot generation failed: %s", e)
-            charts_html = '<p class="chart-placeholder">Charts could not be generated.</p>'
-        except ImportError as e:
-            logger.error("Chart library not available: %s", e)
-            charts_html = '<p class="chart-placeholder">Chart library missing. Install with: pip install \'faircareai[viz]\'</p>'
+    if include_charts:
+        if results is not None:
+            # Use full AuditResults for comprehensive charts
+            try:
+                overall_html = _render_governance_overall_figures(results)
+                subgroup_html = _render_governance_subgroup_figures(results)
+                charts_html = f"""
+                <div class="charts-section">
+                    <h3>Overall Performance</h3>
+                    {overall_html}
+                    <h3>Subgroup Performance</h3>
+                    {subgroup_html}
+                </div>
+                """
+            except (ValueError, TypeError, KeyError) as e:
+                logger.warning("Chart generation failed: %s", e)
+                charts_html = f'<p class="chart-placeholder">Charts could not be generated: {html.escape(str(e))}</p>'
+            except ImportError as e:
+                logger.error("Chart library not available: %s", e)
+                charts_html = '<p class="chart-placeholder">Chart library missing. Install with: pip install \'faircareai[viz]\'</p>'
+        elif summary.metrics_df is not None and len(summary.metrics_df) > 0:
+            # Fall back to forest plot from metrics_df
+            try:
+                from faircareai.visualization.altair_plots import create_forest_plot_static
+
+                chart = create_forest_plot_static(summary.metrics_df, metric="tpr")
+                charts_html = f'<div class="chart-container">{chart.to_html()}</div>'
+            except (ValueError, TypeError, KeyError) as e:
+                logger.warning("Forest plot generation failed: %s", e)
+                charts_html = '<p class="chart-placeholder">Charts could not be generated.</p>'
+            except ImportError as e:
+                logger.error("Chart library not available: %s", e)
+                charts_html = '<p class="chart-placeholder">Chart library missing. Install with: pip install \'faircareai[viz]\'</p>'
+        else:
+            charts_html = '<p class="chart-placeholder">No chart data available.</p>'
 
     html = f"""<!DOCTYPE html>
 <html lang="en">
@@ -1237,7 +1405,7 @@ def _generate_report_html(
 
         body {{
             font-family: {TYPOGRAPHY["data_font"]};
-            font-size: {TYPOGRAPHY["body_size"]}px;
+            font-size: 16px;
             color: var(--text-color);
             background-color: var(--bg-color);
             line-height: 1.6;
@@ -1253,12 +1421,12 @@ def _generate_report_html(
         }}
 
         h1 {{
-            font-size: 28px;
+            font-size: 32px;
             margin-bottom: 8px;
         }}
 
         h2 {{
-            font-size: 22px;
+            font-size: 24px;
             margin-top: 40px;
             border-bottom: 2px solid var(--text-color);
             padding-bottom: 8px;
@@ -1342,7 +1510,19 @@ def _generate_report_html(
                 padding: 20px;
             }}
         }}
+
+        .charts-section {{
+            margin: 30px 0;
+        }}
+
+        .charts-section h3 {{
+            font-size: 20px;
+            margin-top: 30px;
+            margin-bottom: 15px;
+            color: #2c5282;
+        }}
     </style>
+    <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
 </head>
 <body>
     <header class="header">
@@ -1677,7 +1857,7 @@ def generate_governance_pdf_report(
             Run: pip install playwright && playwright install chromium
     """
     try:
-        from playwright.sync_api import sync_playwright
+        from playwright.sync_api import sync_playwright  # noqa: F401
     except ImportError as err:
         raise ImportError(
             "Playwright is required for PDF generation. Install with: "
@@ -1696,23 +1876,8 @@ def generate_governance_pdf_report(
         '<script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script></head>',
     )
 
-    # Use Playwright to render HTML to PDF
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        # Load HTML content with timeout protection (60s for complex reports)
-        page.set_content(html_content, wait_until="networkidle", timeout=60000)
-
-        # Generate PDF with print styling
-        page.pdf(
-            path=str(output_path.resolve()),
-            format="Letter",
-            margin={"top": "0.5in", "right": "0.5in", "bottom": "0.5in", "left": "0.5in"},
-            print_background=True,
-        )
-
-        browser.close()
+    # Use Playwright to render HTML to PDF (handles Jupyter/async context)
+    _run_playwright_pdf_generation(html_content, output_path)
 
     return output_path
 
@@ -1805,6 +1970,22 @@ def _generate_governance_html(results: "AuditResults") -> str:
     worst_metric = gov.get("worst_disparity_metric", "metric")
     worst_group = gov.get("worst_disparity_group", "group")
 
+    # Primary fairness metric information
+    from faircareai.core.config import FairnessMetric
+
+    primary_metric = results.config.primary_fairness_metric
+    metric_descriptions = {
+        FairnessMetric.DEMOGRAPHIC_PARITY: ("Demographic Parity", "Equal selection rates across groups"),
+        FairnessMetric.EQUALIZED_ODDS: ("Equalized Odds", "Equal TPR and FPR across groups"),
+        FairnessMetric.EQUAL_OPPORTUNITY: ("Equal Opportunity", "Equal detection rates (TPR) across groups"),
+        FairnessMetric.PREDICTIVE_PARITY: ("Predictive Parity", "Equal positive predictive values across groups"),
+        FairnessMetric.CALIBRATION: ("Calibration", "Equal calibration accuracy across groups"),
+    }
+    metric_name, metric_desc = metric_descriptions.get(
+        primary_metric, ("Not Specified", "No primary fairness metric was selected")
+    )
+    metric_justification = results.config.fairness_justification or "Not provided"
+
     html = f"""<!DOCTYPE html>
 <html lang="en">
 <head>
@@ -2138,8 +2319,16 @@ def _generate_governance_html(results: "AuditResults") -> str:
         <section class="section">
             <h2 class="narrative-headline">2. Where Do Disparities Exist?</h2>
 
+            <!-- Primary Fairness Metric Box -->
+            <div style="background: #e8f4f8; border: 2px solid #0072B2; padding: 20px; margin-bottom: 24px; border-radius: 8px;">
+                <h3 style="margin-top: 0; color: #0072B2; font-size: 18px;">Selected Fairness Metric: {metric_name}</h3>
+                <p style="margin: 8px 0; color: #333;"><strong>Definition:</strong> {metric_desc}</p>
+                <p style="margin: 8px 0 0 0; color: #666; font-size: 14px;"><strong>Justification:</strong> {metric_justification}</p>
+            </div>
+
             <p style="color: #666; font-size: 16px; margin-bottom: 20px;">
-                Performance varies across demographic groups. Below shows how the model performs for each population.
+                Performance varies across demographic groups. Charts corresponding to your selected metric are
+                <span style="background: rgba(0, 114, 178, 0.1); padding: 2px 6px; border-radius: 3px;">highlighted in blue</span>.
             </p>
 
             <!-- Callout Box for Key Statistics -->

From 6c07857c940a88567a470ab5f159cd49ca92fbc1 Mon Sep 17 00:00:00 2001
From: Jonathan Tsai <cctsai97@gmail.com>
Date: Fri, 16 Jan 2026 09:15:16 -0600
Subject: [PATCH 4/4] updated font size and made section 5 tailor to selected
 metric

---
 .../visualization/governance_dashboard.py     | 87 ++++++++++++++++---
 .../visualization/performance_charts.py       | 20 +++--
 2 files changed, 87 insertions(+), 20 deletions(-)

diff --git a/src/faircareai/visualization/governance_dashboard.py b/src/faircareai/visualization/governance_dashboard.py
index e74ee9d..5317fea 100644
--- a/src/faircareai/visualization/governance_dashboard.py
+++ b/src/faircareai/visualization/governance_dashboard.py
@@ -244,7 +244,8 @@ def create_executive_summary(results: "AuditResults") -> go.Figure:
         title=dict(
             text=f"<b>Governance Review: {results.config.model_name}</b><br>"
             f"<sup>Version {results.config.model_version}</sup>",
-            x=0.5,
+            x=0,
+            xanchor="left",
             font=dict(size=18),
         ),
         height=800,
@@ -424,7 +425,8 @@ def create_go_nogo_scorecard(results: "AuditResults") -> go.Figure:
                 f"<span style='color:{overall_color}; font-size:24px'>{overall}</span><br>"
                 f"<sup>{n_pass} Pass | {n_warn} Near | {n_fail} Outside</sup>"
             ),
-            x=0.5,
+            x=0,
+            xanchor="left",
             font=dict(size=16),
         ),
         height=500,
@@ -521,7 +523,12 @@ def _build_checklist(results: "AuditResults") -> list[dict]:
     # Check for small subgroups
     subgroup_ok = True
     for _attr_name, attr_data in results.subgroup_performance.items():
-        for _group_name, group_data in attr_data.items():
+        # Extract groups from nested structure
+        groups_data = attr_data.get("groups", attr_data) if isinstance(attr_data, dict) else {}
+        for group_name, group_data in groups_data.items():
+            # Skip metadata keys
+            if group_name in ("attribute", "threshold", "reference", "disparities"):
+                continue
             if isinstance(group_data, dict):
                 n = group_data.get("n", 0)
                 if n < 30:
@@ -589,7 +596,13 @@ def create_fairness_dashboard(results: "AuditResults") -> go.Figure:
         if not isinstance(attr_data, dict):
             continue
 
-        for group_name, group_data in attr_data.items():
+        # Extract groups from nested structure
+        groups_data = attr_data.get("groups", attr_data)
+
+        for group_name, group_data in groups_data.items():
+            # Skip metadata keys
+            if group_name in ("attribute", "threshold", "reference", "disparities"):
+                continue
             if not isinstance(group_data, dict) or "error" in group_data:
                 continue
 
@@ -768,7 +781,8 @@ def create_fairness_dashboard(results: "AuditResults") -> go.Figure:
     fig.update_layout(
         title=dict(
             text=f"<b>Fairness Dashboard: {results.config.model_name}</b>",
-            x=0.5,
+            x=0,
+            xanchor="left",
             font=dict(size=16),
         ),
         height=1000,  # Taller for more spacing
@@ -821,13 +835,19 @@ def plot_subgroup_comparison(
         if not isinstance(attr_data, dict):
             continue
 
+        # Extract groups from nested structure
+        groups_data = attr_data.get("groups", attr_data)
+
         groups = []
         values = []
         errors_low = []
         errors_high = []
         colors = []
 
-        for group_name, group_data in attr_data.items():
+        for group_name, group_data in groups_data.items():
+            # Skip metadata keys
+            if group_name in ("attribute", "threshold", "reference", "disparities"):
+                continue
             if not isinstance(group_data, dict) or "error" in group_data:
                 continue
 
@@ -886,7 +906,8 @@ def plot_subgroup_comparison(
     fig.update_layout(
         title=dict(
             text=f"Subgroup {metric_labels.get(metric, metric)} Comparison",
-            x=0.5,
+            x=0,
+            xanchor="left",
         ),
         xaxis_title="Subgroup",
         yaxis_title=metric_labels.get(metric, metric),
@@ -1157,7 +1178,10 @@ def create_governance_overall_figures(results: "AuditResults") -> dict[str, Any]
     return figures
 
 
-def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dict[str, go.Figure]]:
+def create_governance_subgroup_figures(
+    results: "AuditResults",
+    primary_metric: "FairnessMetric | None" = None,
+) -> dict[str, dict[str, go.Figure]]:
     """Create subgroup performance figures for governance report.
 
     For each sensitive attribute, generates 4 figures (Van Calster 4):
@@ -1167,13 +1191,31 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
     4. Selection Rate by Subgroup - Demographic parity check
 
     Each figure includes plain language explanations per the governance spec.
+    Charts corresponding to the primary_metric are visually highlighted.
 
     Args:
         results: AuditResults from FairCareAudit.run().
+        primary_metric: The primary fairness metric to highlight. If None,
+            uses results.config.primary_fairness_metric.
 
     Returns:
         Dict mapping attribute name to dict of figure title -> Plotly Figure.
     """
+    from faircareai.core.config import FairnessMetric
+
+    # Get primary metric from results if not provided
+    if primary_metric is None:
+        primary_metric = getattr(results.config, "primary_fairness_metric", None)
+
+    # Map fairness metrics to chart keys for highlighting
+    metric_to_chart = {
+        FairnessMetric.DEMOGRAPHIC_PARITY: "Selection Rate by Subgroup",
+        FairnessMetric.EQUAL_OPPORTUNITY: "Sensitivity by Subgroup",
+        FairnessMetric.EQUALIZED_ODDS: "Sensitivity by Subgroup",  # TPR is part of EO
+        FairnessMetric.PREDICTIVE_PARITY: None,  # PPV not shown in standard charts
+        FairnessMetric.CALIBRATION: None,  # Calibration not shown in standard charts
+    }
+    primary_chart_key = metric_to_chart.get(primary_metric) if primary_metric else None
     # Plain language explanations for Van Calster 4 visualizations
     SUBGROUP_EXPLANATIONS = {
         "auroc": (
@@ -1247,6 +1289,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
             for ref in is_reference
         ]
 
+        # Determine which charts correspond to the primary metric
+        is_tpr_primary = primary_metric in (FairnessMetric.EQUAL_OPPORTUNITY, FairnessMetric.EQUALIZED_ODDS)
+        is_fpr_primary = primary_metric == FairnessMetric.EQUALIZED_ODDS
+        is_selection_primary = primary_metric == FairnessMetric.DEMOGRAPHIC_PARITY
+
         # 1. AUROC by Subgroup
         fig_auroc = _create_subgroup_bar_chart(
             groups,
@@ -1259,10 +1306,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
             explanation=SUBGROUP_EXPLANATIONS["auroc"],
             y_axis_title="AUROC (Model Accuracy Score)",
             x_axis_title="Demographic Group",
+            is_primary_metric=False,  # AUROC not directly a fairness metric
         )
         figures["AUROC by Subgroup"] = fig_auroc
 
-        # 2. TPR (Sensitivity) by Subgroup
+        # 2. TPR (Sensitivity) by Subgroup - Equal Opportunity / Equalized Odds
         fig_tpr = _create_subgroup_bar_chart(
             groups,
             [v * 100 for v in tpr_vals],
@@ -1274,10 +1322,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
             explanation=SUBGROUP_EXPLANATIONS["sensitivity"],
             y_axis_title="True Positive Rate (%)",
             x_axis_title="Demographic Group",
+            is_primary_metric=is_tpr_primary,
         )
         figures["Sensitivity by Subgroup"] = fig_tpr
 
-        # 3. FPR by Subgroup
+        # 3. FPR by Subgroup - Equalized Odds
         fig_fpr = _create_subgroup_bar_chart(
             groups,
             [v * 100 for v in fpr_vals],
@@ -1289,10 +1338,11 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
             explanation=SUBGROUP_EXPLANATIONS["fpr"],
             y_axis_title="False Positive Rate (%)",
             x_axis_title="Demographic Group",
+            is_primary_metric=is_fpr_primary,
         )
         figures["FPR by Subgroup"] = fig_fpr
 
-        # 4. Selection Rate by Subgroup
+        # 4. Selection Rate by Subgroup - Demographic Parity
         fig_sel = _create_subgroup_bar_chart(
             groups,
             [v * 100 for v in selection_vals],
@@ -1304,6 +1354,7 @@ def create_governance_subgroup_figures(results: "AuditResults") -> dict[str, dic
             explanation=SUBGROUP_EXPLANATIONS["selection"],
             y_axis_title="Selection Rate (% flagged)",
             x_axis_title="Demographic Group",
+            is_primary_metric=is_selection_primary,
         )
         figures["Selection Rate by Subgroup"] = fig_sel
 
@@ -1324,6 +1375,7 @@ def _create_subgroup_bar_chart(
     explanation: str = "",
     y_axis_title: str = "Value",
     x_axis_title: str = "Group",
+    is_primary_metric: bool = False,
 ) -> go.Figure:
     """Create a simplified bar chart for subgroup comparison.
 
@@ -1339,6 +1391,8 @@ def _create_subgroup_bar_chart(
         explanation: Plain language explanation for non-technical audiences.
         y_axis_title: Descriptive label for Y-axis.
         x_axis_title: Descriptive label for X-axis.
+        is_primary_metric: If True, adds visual highlighting to indicate
+            this chart corresponds to the selected primary fairness metric.
 
     Returns:
         Plotly Figure.
@@ -1376,8 +1430,16 @@ def _create_subgroup_bar_chart(
     # No in-chart annotations - they overlap with labels
     # Explanation text will be added via HTML wrapper in generator.py
 
+    # Add visual highlighting for primary metric
+    if is_primary_metric:
+        title_text = f"<b>{title}</b><br><span style='font-size:12px; color:#0072B2;'>★ YOUR SELECTED FAIRNESS METRIC</span>"
+        plot_bgcolor = "rgba(0, 114, 178, 0.05)"  # Light blue background
+    else:
+        title_text = f"<b>{title}</b>"
+        plot_bgcolor = "white"
+
     fig.update_layout(
-        title=dict(text=f"<b>{title}</b>", font=dict(size=16)),
+        title=dict(text=title_text, font=dict(size=16), x=0, xanchor="left"),
         xaxis=dict(
             title=x_axis_title,
             tickfont={"size": 11},
@@ -1395,6 +1457,7 @@ def _create_subgroup_bar_chart(
         height=380,  # Good height for chart
         margin=dict(l=80, r=40, t=100, b=160),  # Top: long titles, bottom: rotated labels
         showlegend=False,
+        plot_bgcolor=plot_bgcolor,
     )
 
     return fig
diff --git a/src/faircareai/visualization/performance_charts.py b/src/faircareai/visualization/performance_charts.py
index 83754d3..0d3fdf3 100644
--- a/src/faircareai/visualization/performance_charts.py
+++ b/src/faircareai/visualization/performance_charts.py
@@ -223,7 +223,8 @@ def plot_discrimination_curves(
     fig.update_layout(
         title=dict(
             text=f"Model Discrimination: AUROC = {auroc:.3f} {auroc_ci}",
-            x=0.5,
+            x=0,
+            xanchor="left",
         ),
         height=450,
         showlegend=True,
@@ -346,7 +347,7 @@ def plot_calibration_curve(
     # Apply theme
     fig = apply_faircareai_theme(fig)
     fig.update_layout(
-        title=dict(text=title_text, x=0.5),
+        title=dict(text=title_text, x=0, xanchor="left"),
         xaxis_title=x_axis_title,
         yaxis_title=y_axis_title,
         height=500,
@@ -398,7 +399,7 @@ def plot_threshold_analysis(
         )
         fig = apply_faircareai_theme(fig)
         fig.update_layout(
-            title=dict(text="Threshold Selection Impact", x=0.5),
+            title=dict(text="Threshold Selection Impact", x=0, xanchor="left"),
             height=300,
         )
         return fig
@@ -504,7 +505,7 @@ def plot_threshold_analysis(
     # Apply theme
     fig = apply_faircareai_theme(fig)
     fig.update_layout(
-        title=dict(text="Threshold Selection Impact", x=0.5),
+        title=dict(text="Threshold Selection Impact", x=0, xanchor="left"),
         height=600,
         showlegend=True,
         legend=LEGEND_POSITIONS["top_horizontal"],
@@ -620,7 +621,8 @@ def plot_decision_curve(
     fig.update_layout(
         title=dict(
             text=title_text,
-            x=0.5,
+            x=0,
+            xanchor="left",
         ),
         xaxis_title=x_axis_title,
         yaxis_title=y_axis_title,
@@ -715,7 +717,7 @@ def plot_confusion_matrix(results: AuditResults) -> go.Figure:
     # Apply theme
     fig = apply_faircareai_theme(fig)
     fig.update_layout(
-        title=dict(text=f"Confusion Matrix at Threshold = {threshold:.2f}", x=0.5),
+        title=dict(text=f"Confusion Matrix at Threshold = {threshold:.2f}", x=0, xanchor="left"),
         xaxis_title="Predicted",
         yaxis_title="Actual",
         height=400,
@@ -892,7 +894,8 @@ def plot_performance_summary(
         fig.update_layout(
             title=dict(
                 text=f"Model Performance Summary - {results.config.model_name}",
-                x=0.5,
+                x=0,
+                xanchor="left",
             ),
             height=600,
             showlegend=False,
@@ -909,7 +912,8 @@ def plot_performance_summary(
         fig.update_layout(
             title=dict(
                 text=f"Model Discrimination (AUROC) - {results.config.model_name}",
-                x=0.5,
+                x=0,
+                xanchor="left",
             ),
             height=350,
             showlegend=False,