diff --git a/analysis/sf_c_44_maximin_plot.pdf b/analysis/sf_c_44_maximin_plot.pdf new file mode 100644 index 0000000..8d905c8 Binary files /dev/null and b/analysis/sf_c_44_maximin_plot.pdf differ diff --git a/analysis/sf_c_44_plot.pdf b/analysis/sf_c_44_plot.pdf new file mode 100644 index 0000000..86bce9e Binary files /dev/null and b/analysis/sf_c_44_plot.pdf differ diff --git a/analysis/sf_c_44_stats.txt b/analysis/sf_c_44_stats.txt new file mode 100644 index 0000000..af6dbbe --- /dev/null +++ b/analysis/sf_c_44_stats.txt @@ -0,0 +1,6 @@ +Validation: PASS +Sum of Probs: 44.0000 (Target k=44) +Average Time: 1.62s +Gini Fairness: 52.7260% +Min Prob: 0.085937 +Max Prob: 1.000000 diff --git a/analysis/sf_e_110_maximin_plot.pdf b/analysis/sf_e_110_maximin_plot.pdf new file mode 100644 index 0000000..d922848 Binary files /dev/null and b/analysis/sf_e_110_maximin_plot.pdf differ diff --git a/analysis/sf_e_110_plot.pdf b/analysis/sf_e_110_plot.pdf new file mode 100644 index 0000000..34a5a54 Binary files /dev/null and b/analysis/sf_e_110_plot.pdf differ diff --git a/analysis/sf_e_110_stats.txt b/analysis/sf_e_110_stats.txt new file mode 100644 index 0000000..05a20c7 --- /dev/null +++ b/analysis/sf_e_110_stats.txt @@ -0,0 +1,6 @@ +Validation: PASS +Sum of Probs: 110.0000 (Target k=110) +Average Time: 169.60s +Gini Fairness: 51.6610% +Min Prob: 0.026144 +Max Prob: 1.000000 diff --git a/analyze.py b/analyze.py new file mode 100644 index 0000000..1b35083 --- /dev/null +++ b/analyze.py @@ -0,0 +1,168 @@ +# coding: utf-8 +import csv +import argparse +import logging # <-- NEW: Imported the logging module! +from argparse import ArgumentParser +from dataclasses import dataclass +from math import ceil, isclose +from pathlib import Path +import time +from typing import Dict, List, Any, NewType, Union, Tuple +import sys +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scipy.stats.mstats import gmean + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) + +try: + from sortition_algorithms.committee_generation.leximin import find_distribution_leximin +except ImportError as e: + print(f"CRITICAL ERROR: Could not locate leximin.py. Details: {e}") + exit(1) + +AgentId = NewType("AgentId", Any) +FeatureCategory = NewType("FeatureCategory", str) +Feature = NewType("Feature", str) + +@dataclass +class FeatureInfo: + min: int + max: int + selected: int = 0 + remaining: int = 0 + +ProbAllocation = NewType("ProbAllocation", Dict[AgentId, float]) + +@dataclass +class Instance: + k: int + categories: Dict[FeatureCategory, Dict[Feature, FeatureInfo]] + agents: Dict[AgentId, Dict[FeatureCategory, Feature]] + +def read_instance(feature_file: Union[str, Path], pool_file: Union[str, Path], k: int) -> Instance: + feature_info = {} + with open(feature_file, "r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for line in reader: + category = FeatureCategory(line["category"]) + feature = Feature(line["name"]) + if category not in feature_info: + feature_info[category] = {} + feature_info[category][feature] = FeatureInfo(min=int(line["min"]), max=int(line["max"])) + + categories = list(feature_info) + agents = {} + with open(pool_file, "r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for i, line in enumerate(reader): + agent_id = AgentId(str(i)) + agents[agent_id] = {category: Feature(line[category]) for category in categories} + for category in categories: + feature_info[category][Feature(line[category])].remaining += 1 + return Instance(k=k, categories=feature_info, agents=agents) + +def leximin_probabilities(instance: Instance, backend: str = "mip") -> ProbAllocation: + portfolio, output_probs, _ = find_distribution_leximin( + instance.categories, + instance.agents, + instance.k, + [], + solver_backend=backend + ) + selection_probs = {agent_id: 0. for agent_id in instance.agents} + for panel, probability in zip(portfolio, output_probs): + for agent_id in panel: + selection_probs[agent_id] += probability + return ProbAllocation(selection_probs) + +@dataclass +class ProbAllocationStats: + gini: float + geometric_mean: float + min: float + max: float + +def compute_prob_allocation_stats(alloc: ProbAllocation) -> ProbAllocationStats: + n = len(alloc) + k = round(sum(alloc.values())) + sorted_probs = sorted(alloc.values()) + gini = sum((2 * i - n + 1) * prob for i, prob in enumerate(sorted_probs)) / (n * k) + geometric_mean = gmean([max(p, 1e-10) for p in sorted_probs]) + mini = min(sorted_probs) + maxi = max(sorted_probs) + return ProbAllocationStats(gini=gini, geometric_mean=geometric_mean, min=mini, max=maxi) + +def analyze_instance(instance_name: str, instance: Instance, backend: str, num_runs: int = 3): + Path("analysis").mkdir(exist_ok=True) + print(f"Running analysis with {backend} backend ({num_runs} runs)...") + + timings = [] + alloc = None + for i in range(num_runs): + start_time = time.time() + alloc = leximin_probabilities(instance, backend) + timings.append(time.time() - start_time) + print(f" Run {i + 1}/{num_runs} complete.") + + total_prob = sum(alloc.values()) + is_valid = isclose(total_prob, instance.k, rel_tol=1e-5) + + stats = compute_prob_allocation_stats(alloc) + + log_path = Path("analysis") / f"{instance_name}_{instance.k}_stats.txt" + with open(log_path, "w") as f: + f.write(f"Validation: {'PASS' if is_valid else 'FAIL'}\n") + f.write(f"Sum of Probs: {total_prob:.4f} (Target k={instance.k})\n") + f.write(f"Average Time: {sum(timings)/num_runs:.2f}s\n") + f.write(f"Gini Fairness: {stats.gini:.4%}\n") + f.write(f"Min Prob: {stats.min:.6f}\n") + f.write(f"Max Prob: {stats.max:.6f}\n") + + n = len(alloc) + probs = list(alloc.values()) + + plt.figure(figsize=(10, 3)) + + rng = np.random.default_rng(42) + jitter = 0.22 + y_vals = rng.uniform(-jitter, jitter, size=n) + + plt.scatter(probs, y_vals, s=18, alpha=0.55, color="tab:blue", label="Leximin") + + plt.axvline(x=instance.k/n, color='black', linestyle='--', linewidth=1, label=f'Equalized (k/n = {instance.k/n:.4f})') + plt.axvline(x=stats.min, color='red', linestyle=':', alpha=0.5, label=f'Min: {stats.min:.4f}') + plt.axvline(x=stats.max, color='orange', linestyle=':', alpha=0.5, label=f'Max: {stats.max:.4f}') + + plt.yticks([0], ["Leximin"]) + plt.grid(axis="x", alpha=0.25) + plt.ylim(-0.6, 0.6) + plt.title(f"Marginal Selection Probabilities: {instance_name} (n={n}, k={instance.k})") + plt.xlabel("Selection Probability") + plt.legend(loc="upper right") + + output_path = Path("analysis") / f"{instance_name}_{instance.k}_plot.pdf" + plt.savefig(output_path, bbox_inches="tight") + plt.close() + + print(f"VALIDATION: {'✅ PASS' if is_valid else '❌ FAIL'} (Sum={total_prob:.2f})") + print(f"Gini Score: {stats.gini:.4%}") + print(f"Min/Max Probs: {stats.min:.6f} / {stats.max:.6f}") + +if __name__ == '__main__': + # --- NEW: Turn on the "chatty" debug logs! --- + # To make it less spammy later, change logging.DEBUG to logging.INFO + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + parser = ArgumentParser() + parser.add_argument('instance_name') + parser.add_argument('panel_size', type=int) + parser.add_argument('--backend', default='mip') + args = parser.parse_args() + + data_path = Path("data") / f"{args.instance_name}_{args.panel_size}" + instance = read_instance(data_path / "categories.csv", data_path / "respondents.csv", args.panel_size) + analyze_instance(args.instance_name, instance, args.backend) \ No newline at end of file diff --git a/analyze_maximin.py b/analyze_maximin.py new file mode 100644 index 0000000..3025c5b --- /dev/null +++ b/analyze_maximin.py @@ -0,0 +1,157 @@ +# coding: utf-8 +import csv +import argparse +from argparse import ArgumentParser +from dataclasses import dataclass +from math import ceil, isclose +from pathlib import Path +import time +from typing import Dict, List, Any, NewType, Union, Tuple +import sys +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from scipy.stats.mstats import gmean + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src'))) + +# IMPORT SWAPPED TO MAXIMIN +try: + from sortition_algorithms.committee_generation.maximin import find_distribution_maximin +except ImportError as e: + print(f"CRITICAL ERROR: Could not locate maximin.py. Details: {e}") + exit(1) + +AgentId = NewType("AgentId", Any) +FeatureCategory = NewType("FeatureCategory", str) +Feature = NewType("Feature", str) + +@dataclass +class FeatureInfo: + min: int + max: int + selected: int = 0 + remaining: int = 0 + +ProbAllocation = NewType("ProbAllocation", Dict[AgentId, float]) + +@dataclass +class Instance: + k: int + categories: Dict[FeatureCategory, Dict[Feature, FeatureInfo]] + agents: Dict[AgentId, Dict[FeatureCategory, Feature]] + +def read_instance(feature_file: Union[str, Path], pool_file: Union[str, Path], k: int) -> Instance: + feature_info = {} + with open(feature_file, "r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for line in reader: + category = FeatureCategory(line["category"]) + feature = Feature(line["name"]) + if category not in feature_info: + feature_info[category] = {} + feature_info[category][feature] = FeatureInfo(min=int(line["min"]), max=int(line["max"])) + + categories = list(feature_info) + agents = {} + with open(pool_file, "r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for i, line in enumerate(reader): + agent_id = AgentId(str(i)) + agents[agent_id] = {category: Feature(line[category]) for category in categories} + for category in categories: + feature_info[category][Feature(line[category])].remaining += 1 + return Instance(k=k, categories=feature_info, agents=agents) + +def maximin_probabilities(instance: Instance, backend: str = "mip") -> ProbAllocation: + portfolio, output_probs, _ = find_distribution_maximin( + instance.categories, + instance.agents, + instance.k, + [], + solver_backend=backend + ) + selection_probs = {agent_id: 0. for agent_id in instance.agents} + for panel, probability in zip(portfolio, output_probs): + for agent_id in panel: + selection_probs[agent_id] += probability + return ProbAllocation(selection_probs) + +@dataclass +class ProbAllocationStats: + gini: float + geometric_mean: float + min: float + max: float + +def compute_prob_allocation_stats(alloc: ProbAllocation) -> ProbAllocationStats: + n = len(alloc) + k = round(sum(alloc.values())) + sorted_probs = sorted(alloc.values()) + gini = sum((2 * i - n + 1) * prob for i, prob in enumerate(sorted_probs)) / (n * k) + geometric_mean = gmean([max(p, 1e-10) for p in sorted_probs]) + mini = min(sorted_probs) + maxi = max(sorted_probs) + return ProbAllocationStats(gini=gini, geometric_mean=geometric_mean, min=mini, max=maxi) + +def analyze_instance(instance_name: str, instance: Instance, backend: str, num_runs: int = 1): + Path("analysis").mkdir(exist_ok=True) + print(f"Running MAXIMIN analysis with {backend} backend ({num_runs} runs)...") + + timings = [] + alloc = None + for i in range(num_runs): + start_time = time.time() + alloc = maximin_probabilities(instance, backend) + timings.append(time.time() - start_time) + print(f" Run {i + 1}/{num_runs} complete.") + + total_prob = sum(alloc.values()) + is_valid = isclose(total_prob, instance.k, rel_tol=1e-5) + + stats = compute_prob_allocation_stats(alloc) + + # Plotting (Scatter Strip Plot Aesthetic) + n = len(alloc) + probs = list(alloc.values()) + + plt.figure(figsize=(10, 3)) + + rng = np.random.default_rng(42) + jitter = 0.22 + y_vals = rng.uniform(-jitter, jitter, size=n) + + # Changed dot color to Red for Maximin + plt.scatter(probs, y_vals, s=18, alpha=0.55, color="tab:red", label="Maximin") + + plt.axvline(x=instance.k/n, color='black', linestyle='--', linewidth=1, label=f'Equalized (k/n = {instance.k/n:.4f})') + plt.axvline(x=stats.min, color='red', linestyle=':', alpha=0.5, label=f'Min: {stats.min:.4f}') + plt.axvline(x=stats.max, color='orange', linestyle=':', alpha=0.5, label=f'Max: {stats.max:.4f}') + + plt.yticks([0], ["Maximin"]) + plt.grid(axis="x", alpha=0.25) + plt.ylim(-0.6, 0.6) + plt.title(f"Maximin Selection Probabilities: {instance_name} (n={n}, k={instance.k})") + plt.xlabel("Selection Probability") + plt.legend(loc="upper right") + + output_path = Path("analysis") / f"{instance_name}_{instance.k}_maximin_plot.pdf" + plt.savefig(output_path, bbox_inches="tight") + plt.close() + + print(f"VALIDATION: {'✅ PASS' if is_valid else '❌ FAIL'} (Sum={total_prob:.2f})") + print(f"Gini Score: {stats.gini:.4%}") + print(f"Min/Max Probs: {stats.min:.6f} / {stats.max:.6f}") + +if __name__ == '__main__': + parser = ArgumentParser() + parser.add_argument('instance_name') + parser.add_argument('panel_size', type=int) + parser.add_argument('--backend', default='mip') + args = parser.parse_args() + + data_path = Path("data") / f"{args.instance_name}_{args.panel_size}" + instance = read_instance(data_path / "categories.csv", data_path / "respondents.csv", args.panel_size) + analyze_instance(args.instance_name, instance, args.backend) \ No newline at end of file diff --git a/src/sortition_algorithms/committee_generation/leximin.py b/src/sortition_algorithms/committee_generation/leximin.py index 1a17c89..628a3a8 100644 --- a/src/sortition_algorithms/committee_generation/leximin.py +++ b/src/sortition_algorithms/committee_generation/leximin.py @@ -249,8 +249,8 @@ def _run_leximin_main_loop( fixed_probabilities: dict[str, float] = {} reduction_counter = 0 - while len(fixed_probabilities) < people.count: - logger.debug(f"Fixed {len(fixed_probabilities)}/{people.count} probabilities.") + while len(fixed_probabilities) < len(people): + logger.debug(f"Fixed {len(fixed_probabilities)}/{len(people)} probabilities.") dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( people, @@ -321,7 +321,7 @@ def find_distribution_leximin( # Find initial committees that cover every possible agent committees, covered_agents, initial_report = generate_initial_committees( - new_committee_solver, agent_vars, 3 * people.count + new_committee_solver, agent_vars, 3 * len(people) ) report.add_report(initial_report) diff --git a/src/sortition_algorithms/committee_generation/maximin.py b/src/sortition_algorithms/committee_generation/maximin.py index d5265ce..09ca690 100644 --- a/src/sortition_algorithms/committee_generation/maximin.py +++ b/src/sortition_algorithms/committee_generation/maximin.py @@ -297,7 +297,7 @@ def find_distribution_maximin( # Find initial committees that cover every possible agent committees, covered_agents, init_report = generate_initial_committees( - new_committee_solver, agent_vars, people.count + new_committee_solver, agent_vars, len(people) ) report.add_report(init_report) diff --git a/src/sortition_algorithms/committee_generation/newleximin.py b/src/sortition_algorithms/committee_generation/newleximin.py new file mode 100644 index 0000000..55f8467 --- /dev/null +++ b/src/sortition_algorithms/committee_generation/newleximin.py @@ -0,0 +1,268 @@ +# ABOUTME: Leximin algorithm for committee generation. +# ABOUTME: Maximizes minimum probability, breaking ties by second-lowest, third-lowest, etc. + +import logging +from typing import Any + +import numpy + +try: + import gurobipy as grb + + GUROBI_AVAILABLE = True +except ImportError: + GUROBI_AVAILABLE = False + grb = None + +from sortition_algorithms.committee_generation.common import ( + EPS, + generate_initial_committees, + ilp_results_to_committee, + setup_committee_generation, +) +from sortition_algorithms.committee_generation.solver import Solver, SolverSense, solver_sum +from sortition_algorithms.features import FeatureCollection +from sortition_algorithms.people import People +from sortition_algorithms.utils import RunReport, logger + + +def _dual_leximin_stage( + people: People, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], +) -> tuple: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + assert len(committees) != 0 + + model = grb.Model() + agent_vars = {agent_id: model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for agent_id in people} + cap_var = model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + model.addConstr( + grb.quicksum(agent_vars[agent_id] for agent_id in people if agent_id not in fixed_probabilities) == 1 + ) + for committee in committees: + model.addConstr(grb.quicksum(agent_vars[agent_id] for agent_id in committee) <= cap_var) + model.setObjective( + cap_var + - grb.quicksum(fixed_probabilities[agent_id] * agent_vars[agent_id] for agent_id in fixed_probabilities), + grb.GRB.MINIMIZE, + ) + + model.setParam("Method", 2) # optimize via barrier only + model.setParam("Crossover", 0) # deactivate cross-over + + return model, agent_vars, cap_var + + +def _add_report_update( + report: RunReport, dual_obj: float, value: float, upper: float, committees: set[frozenset[str]] +) -> None: + at_most = f"{dual_obj - upper + value:.2%}" + dual_obj_str = f"{dual_obj:.2%}" + gap_str = f"{value - upper:.2%}" + report.add_message_and_log( + "leximin_is_at_most", + log_level=logging.DEBUG, + at_most=at_most, + dual_obj_str=dual_obj_str, + num_committees=len(committees), + gap_str=gap_str, + ) + + +def _run_leximin_column_generation_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, + dual_agent_vars: dict[str, Any], + dual_cap_var: Any, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], + people: People, + reduction_counter: int, + report: RunReport, +) -> tuple[bool, int]: + while True: + dual_model.optimize() + if dual_model.status != grb.GRB.OPTIMAL: + for agent_id in fixed_probabilities: + fixed_probabilities[agent_id] = max(0.0, fixed_probabilities[agent_id] - 0.0001) + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + logger.debug(f"Status: {dual_model.status} REDUCE PROBS for {reduction_counter}th time.") + reduction_counter += 1 + continue + + agent_weights = {agent_id: agent_var.x for agent_id, agent_var in dual_agent_vars.items()} + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), SolverSense.MAXIMIZE + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) + value = new_committee_solver.get_objective_value() + + upper = dual_cap_var.x + dual_obj = dual_model.objVal + + _add_report_update(report, dual_obj, value, upper, committees) + if value <= upper + EPS: + for agent_id, agent_weight in agent_weights.items(): + if agent_weight > EPS and agent_id not in fixed_probabilities: + fixed_probabilities[agent_id] = max(0, dual_obj) + return True, reduction_counter + + assert new_set not in committees + committees.add(new_set) + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + + +def _solve_leximin_primal_for_final_probabilities( + committees: set[frozenset[str]], fixed_probabilities: dict[str, float] +) -> list[float]: + primal = grb.Model() + committee_vars = [primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for _ in committees] + eps = primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + primal.addConstr(grb.quicksum(committee_vars) == 1) + for agent_id, prob in fixed_probabilities.items(): + agent_probability = grb.quicksum( + comm_var for committee, comm_var in zip(committees, committee_vars, strict=False) if agent_id in committee + ) + primal.addConstr(agent_probability >= prob - eps) + primal.setObjective(eps, grb.GRB.MINIMIZE) + primal.optimize() + + probabilities = numpy.array([comm_var.x for comm_var in committee_vars]).clip(0, 1) + return list(probabilities / sum(probabilities)) + + +def _run_leximin_main_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + committees: set[frozenset[str]], + people: People, + report: RunReport, +) -> dict[str, float]: + fixed_probabilities: dict[str, float] = {} + reduction_counter = 0 + + while len(fixed_probabilities) < people.count: + logger.debug(f"Fixed {len(fixed_probabilities)}/{people.count} probabilities.") + + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + + should_break, reduction_counter = _run_leximin_column_generation_loop( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + committees, + fixed_probabilities, + people, + reduction_counter, + report, + ) + if should_break: + break + + return fixed_probabilities + + +def find_distribution_leximin( + features: FeatureCollection, + people: People, + number_people_wanted: int, + check_same_address_columns: list[str], + solver_backend: str = "highspy", +) -> tuple[list[frozenset[str]], list[float], RunReport]: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + report = RunReport() + report.add_message_and_log("using_leximin_algorithm", logging.INFO) + grb.setParam("OutputFlag", 0) + + new_committee_solver, agent_vars = setup_committee_generation( + features, people, number_people_wanted, check_same_address_columns, solver_backend + ) + + # cap initial committees at 100 to speedup + committees, covered_agents, initial_report = generate_initial_committees( + new_committee_solver, agent_vars, 100 + ) + report.add_report(initial_report) + + fixed_probabilities = _run_leximin_main_loop(new_committee_solver, agent_vars, committees, people, report) + + probabilities_normalised = _solve_leximin_primal_for_final_probabilities(committees, fixed_probabilities) + + # ========================================== + # WHITEBOARD VALIDATION CHECKS + # ========================================== + total_prob = sum(probabilities_normalised) + min_prob = min(fixed_probabilities.values()) if fixed_probabilities else 0.0 + + print("\n" + "="*50) + print(f"WHITEBOARD CHECKS (Dataset size: {people.count})") + print("="*50) + + # Check 1: Do probabilities add to 1? + print(f"1. Do probabilities add to 1? -> {total_prob:.6f} ", end="") + if abs(total_prob - 1.0) < 1e-5: + print("(✅ PASS)") + else: + print("(❌ FAIL)") + + # Check 2: The exact objective (lowest selection probability) + print(f"2. Objective (Lowest Selection Prob) -> {min_prob:.6f}") + print("="*50) + + # ========================================== + # STRIP PLOTS & CSV GENERATION + # ========================================== + try: + import csv + import matplotlib.pyplot as plt + + dataset_size = people.count + print(f"\nGenerating CSV and Strip Plot for dataset size {dataset_size}...") + + # save to CSV + csv_filename = f"individual_probs_{dataset_size}.csv" + with open(csv_filename, mode='w', newline='') as file: + writer = csv.writer(file) + writer.writerow(["Agent_ID", "Selection_Probability"]) + for agent_id, prob in fixed_probabilities.items(): + writer.writerow([agent_id, f"{prob:.6f}"]) + + # generate Strip Plot + prob_values = list(fixed_probabilities.values()) + plt.figure(figsize=(10, 3)) + plt.scatter(prob_values, [0] * len(prob_values), alpha=0.3, color='blue', edgecolors='none') + plt.title(f'Strip Plot of Selection Probabilities (N={dataset_size})', fontsize=14) + plt.xlabel('Probability of being selected', fontsize=12) + plt.yticks([]) + plt.gca().spines['left'].set_visible(False) + plt.gca().spines['right'].set_visible(False) + plt.gca().spines['top'].set_visible(False) + plt.tight_layout() + + img_filename = f"strip_plot_{dataset_size}.png" + plt.savefig(img_filename, dpi=300) + plt.close() # Close figure to save memory + print(f"✅ Saved {csv_filename} and {img_filename}!\n") + except Exception as e: + print(f"⚠️ Could not generate plot/csv: {e}\n") + + return list(committees), probabilities_normalised, report \ No newline at end of file diff --git a/src/sortition_algorithms/committee_generation/og_leximin.py b/src/sortition_algorithms/committee_generation/og_leximin.py new file mode 100644 index 0000000..628a3a8 --- /dev/null +++ b/src/sortition_algorithms/committee_generation/og_leximin.py @@ -0,0 +1,334 @@ +# ABOUTME: Leximin algorithm for committee generation. +# ABOUTME: Maximizes minimum probability, breaking ties by second-lowest, third-lowest, etc. + +import logging +from typing import Any + +import numpy + +try: + import gurobipy as grb + + GUROBI_AVAILABLE = True +except ImportError: + GUROBI_AVAILABLE = False + grb = None + +from sortition_algorithms.committee_generation.common import ( + EPS, + generate_initial_committees, + ilp_results_to_committee, + setup_committee_generation, +) +from sortition_algorithms.committee_generation.solver import Solver, SolverSense, solver_sum +from sortition_algorithms.features import FeatureCollection +from sortition_algorithms.people import People +from sortition_algorithms.utils import RunReport, logger + + +def _dual_leximin_stage( + people: People, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], +) -> tuple: + """Implements the dual LP described in `find_distribution_leximin`, but where P only ranges over the panels + in `committees` rather than over all feasible panels: + minimize ŷ - Σ_{i in fixed_probabilities} fixed_probabilities[i] * yᵢ + s.t. Σ_{i ∈ P} yᵢ ≤ ŷ ∀ P + Σ_{i not in fixed_probabilities} yᵢ = 1 + ŷ, yᵢ ≥ 0 ∀ i + + Args: + people: People object with all agents + committees: set of feasible committees + fixed_probabilities: dict mapping agent_id to fixed probability + + Returns: + tuple of (grb.Model, dict[str, grb.Var], grb.Var) - (model, agent_vars, cap_var) + + Raises: + RuntimeError: If Gurobi is not available + """ + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + assert len(committees) != 0 + + model = grb.Model() + agent_vars = {agent_id: model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for agent_id in people} # yᵢ + cap_var = model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) # ŷ + model.addConstr( + grb.quicksum(agent_vars[agent_id] for agent_id in people if agent_id not in fixed_probabilities) == 1 + ) + for committee in committees: + model.addConstr(grb.quicksum(agent_vars[agent_id] for agent_id in committee) <= cap_var) + model.setObjective( + cap_var + - grb.quicksum(fixed_probabilities[agent_id] * agent_vars[agent_id] for agent_id in fixed_probabilities), + grb.GRB.MINIMIZE, + ) + + # Change Gurobi configuration to encourage strictly complementary ("inner") solutions. These solutions will + # typically allow to fix more probabilities per outer loop of the leximin algorithm. + model.setParam("Method", 2) # optimize via barrier only + model.setParam("Crossover", 0) # deactivate cross-over + + return model, agent_vars, cap_var + + +def _add_report_update( + report: RunReport, dual_obj: float, value: float, upper: float, committees: set[frozenset[str]] +) -> None: + at_most = f"{dual_obj - upper + value:.2%}" + dual_obj_str = f"{dual_obj:.2%}" + gap_str = f"{value - upper:.2%}" + report.add_message_and_log( + "leximin_is_at_most", + log_level=logging.DEBUG, + at_most=at_most, + dual_obj_str=dual_obj_str, + num_committees=len(committees), + gap_str=gap_str, + ) + + +def _run_leximin_column_generation_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, # grb.Model + dual_agent_vars: dict[str, Any], # dict[str, grb.Var] + dual_cap_var: Any, # grb.Var + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], + people: People, + reduction_counter: int, + report: RunReport, +) -> tuple[bool, int]: + """Run the column generation inner loop for leximin optimization. + + The primal LP being solved by column generation, with a variable x_P for each feasible panel P: + + maximize z + s.t. Σ_{P : i ∈ P} x_P ≥ z ∀ i not in fixed_probabilities + Σ_{P : i ∈ P} x_P ≥ fixed_probabilities[i] ∀ i in fixed_probabilities + Σ_P x_P ≤ 1 (This should be thought of as equality, and wlog. + optimal solutions have equality, but simplifies dual) + x_P ≥ 0 ∀ P + + We instead solve its dual linear program: + minimize ŷ - Σ_{i in fixed_probabilities} fixed_probabilities[i] * yᵢ + s.t. Σ_{i ∈ P} yᵢ ≤ ŷ ∀ P + Σ_{i not in fixed_probabilities} yᵢ = 1 + ŷ, yᵢ ≥ 0 ∀ i + + Args: + new_committee_solver: Solver for finding committees + agent_vars: agent variables in the committee solver + dual_model: Gurobi model for dual LP + dual_agent_vars: agent variables in dual model + dual_cap_var: capacity variable in dual model + committees: set of committees (modified in-place) + fixed_probabilities: probabilities that have been fixed (modified in-place) + people: People object with all agents + reduction_counter: counter for probability reductions + output_lines: list of output messages (modified in-place) + + Returns: + tuple of (should_break_outer_loop, updated_reduction_counter) + """ + while True: + dual_model.optimize() + if dual_model.status != grb.GRB.OPTIMAL: + # In theory, the LP is feasible in the first iterations, and we only add constraints (by fixing + # probabilities) that preserve feasibility. Due to floating-point issues, however, it may happen that + # Gurobi still cannot satisfy all the fixed probabilities in the primal (meaning that the dual will be + # unbounded). In this case, we slightly relax the LP by slightly reducing all fixed probabilities. + for agent_id in fixed_probabilities: + # Relax all fixed probabilities by a small constant + fixed_probabilities[agent_id] = max(0.0, fixed_probabilities[agent_id] - 0.0001) + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + logger.debug(f"Status: {dual_model.status} REDUCE PROBS for {reduction_counter}th time.") + reduction_counter += 1 + continue + + # Find the panel P for which Σ_{i ∈ P} yᵢ is largest, i.e., for which Σ_{i ∈ P} yᵢ ≤ ŷ is tightest + agent_weights = {agent_id: agent_var.x for agent_id, agent_var in dual_agent_vars.items()} + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), SolverSense.MAXIMIZE + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) # panel P + value = new_committee_solver.get_objective_value() # Σ_{i ∈ P} yᵢ + + upper = dual_cap_var.x # ŷ + dual_obj = dual_model.objVal # ŷ - Σ_{i in fixed_probabilities} fixed_probabilities[i] * yᵢ + + _add_report_update(report, dual_obj, value, upper, committees) + if value <= upper + EPS: + # Within numeric tolerance, the panels in `committees` are enough to constrain the dual, i.e., they are + # enough to support an optimal primal solution. + for agent_id, agent_weight in agent_weights.items(): + if agent_weight > EPS and agent_id not in fixed_probabilities: + # `agent_weight` is the dual variable yᵢ of the constraint "Σ_{P : i ∈ P} x_P ≥ z" for + # i = `agent_id` in the primal LP. If yᵢ is positive, this means that the constraint must be + # binding in all optimal solutions [1], and we can fix `agent_id`'s probability to the + # optimal value of the primal/dual LP. + # [1] Theorem 3.3 in: Renato Pelessoni. Some remarks on the use of the strict complementarity in + # checking coherence and extending coherent probabilities. 1998. + fixed_probabilities[agent_id] = max(0, dual_obj) + return True, reduction_counter # Break outer loop + + # Given that Σ_{i ∈ P} yᵢ > ŷ, the current solution to `dual_model` is not yet a solution to the dual. + # Thus, add the constraint for panel P and recurse. + assert new_set not in committees + committees.add(new_set) + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + + +def _solve_leximin_primal_for_final_probabilities( + committees: set[frozenset[str]], fixed_probabilities: dict[str, float] +) -> list[float]: + """Solve the final primal problem to get committee probabilities from fixed agent probabilities. + + The previous algorithm computed the leximin selection probabilities of each agent and a set of panels such that + the selection probabilities can be obtained by randomizing over these panels. Here, such a randomization is found. + + Args: + committees: set of committees + fixed_probabilities: fixed probabilities for each agent + + Returns: + list of normalized probabilities for each committee + """ + primal = grb.Model() + # Variables for the output probabilities of the different panels + committee_vars = [primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for _ in committees] + # To avoid numerical problems, we formally minimize the largest downward deviation from the fixed probabilities. + eps = primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + primal.addConstr(grb.quicksum(committee_vars) == 1) # Probabilities add up to 1 + for agent_id, prob in fixed_probabilities.items(): + agent_probability = grb.quicksum( + comm_var for committee, comm_var in zip(committees, committee_vars, strict=False) if agent_id in committee + ) + primal.addConstr(agent_probability >= prob - eps) + primal.setObjective(eps, grb.GRB.MINIMIZE) + primal.optimize() + + # Bound variables between 0 and 1 and renormalize, because numpy.random.choice is sensitive to small deviations here + probabilities = numpy.array([comm_var.x for comm_var in committee_vars]).clip(0, 1) + return list(probabilities / sum(probabilities)) + + +def _run_leximin_main_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + committees: set[frozenset[str]], + people: People, + report: RunReport, +) -> dict[str, float]: + """Run the main leximin optimization loop that fixes probabilities iteratively. + + The outer loop maximizes the minimum of all unfixed probabilities while satisfying the fixed probabilities. + In each iteration, at least one more probability is fixed, but often more than one. + + Args: + new_committee_solver: Solver for finding committees + agent_vars: agent variables in the committee solver + committees: set of committees (modified in-place) + people: People object with all agents + output_lines: list of output messages (modified in-place) + + Returns: + dict mapping agent_id to fixed probability + """ + fixed_probabilities: dict[str, float] = {} + reduction_counter = 0 + + while len(fixed_probabilities) < len(people): + logger.debug(f"Fixed {len(fixed_probabilities)}/{len(people)} probabilities.") + + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + + # Run column generation inner loop + should_break, reduction_counter = _run_leximin_column_generation_loop( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + committees, + fixed_probabilities, + people, + reduction_counter, + report, + ) + if should_break: + break + + return fixed_probabilities + + +def find_distribution_leximin( + features: FeatureCollection, + people: People, + number_people_wanted: int, + check_same_address_columns: list[str], + solver_backend: str = "highspy", +) -> tuple[list[frozenset[str]], list[float], RunReport]: + """Find a distribution over feasible committees that maximizes the minimum probability of an agent being selected + (just like maximin), but breaks ties to maximize the second-lowest probability, breaks further ties to maximize the + third-lowest probability and so forth. + + Args: + features: FeatureCollection with min/max quotas + people: People object with pool members + number_people_wanted: desired size of the panel + check_same_address_columns: Address columns for household identification, or empty + if no address checking to be done. + solver_backend: solver backend to use ("highspy" or "mip") for committee discovery. + Note: The dual LP still uses Gurobi. + + Returns: + tuple of (committees, probabilities, output_lines) + - committees: list of feasible committees (frozenset of agent IDs) + - probabilities: list of probabilities for each committee + - output_lines: list of debug strings + + Raises: + RuntimeError: If Gurobi is not available + """ + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + report = RunReport() + report.add_message_and_log("using_leximin_algorithm", logging.INFO) + grb.setParam("OutputFlag", 0) + + # Set up an ILP that can be used for discovering new feasible committees + new_committee_solver, agent_vars = setup_committee_generation( + features, people, number_people_wanted, check_same_address_columns, solver_backend + ) + + # Find initial committees that cover every possible agent + committees, covered_agents, initial_report = generate_initial_committees( + new_committee_solver, agent_vars, 3 * len(people) + ) + report.add_report(initial_report) + + # Run the main leximin optimization loop to fix agent probabilities + fixed_probabilities = _run_leximin_main_loop(new_committee_solver, agent_vars, committees, people, report) + + # Convert fixed agent probabilities to committee probabilities + probabilities_normalised = _solve_leximin_primal_for_final_probabilities(committees, fixed_probabilities) + + return list(committees), probabilities_normalised, report diff --git a/src/sortition_algorithms/committee_generation/op1_leximin.py b/src/sortition_algorithms/committee_generation/op1_leximin.py new file mode 100644 index 0000000..3e1933a --- /dev/null +++ b/src/sortition_algorithms/committee_generation/op1_leximin.py @@ -0,0 +1,250 @@ +# ABOUTME: Leximin algorithm for committee generation. +# ABOUTME: Maximizes minimum probability, breaking ties by second-lowest, third-lowest, etc. +# Option 1: The "Rebuilding the World" Bottleneck + +import logging +from typing import Any + +import numpy + +try: + import gurobipy as grb + + GUROBI_AVAILABLE = True +except ImportError: + GUROBI_AVAILABLE = False + grb = None + +from sortition_algorithms.committee_generation.common import ( + EPS, + generate_initial_committees, + ilp_results_to_committee, + setup_committee_generation, +) +from sortition_algorithms.committee_generation.solver import Solver, SolverSense, solver_sum +from sortition_algorithms.features import FeatureCollection +from sortition_algorithms.people import People +from sortition_algorithms.utils import RunReport, logger + + +def _setup_dual_model( + people: People, + committees: set[frozenset[str]], +) -> tuple: + """Sets up the initial Gurobi model for the dual LP exactly once.""" + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + assert len(committees) != 0 + + model = grb.Model() + + # Variables: yᵢ for each agent, and ŷ (cap_var) + agent_vars = {agent_id: model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for agent_id in people} + cap_var = model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + + # Initial constraint: Σ yᵢ = 1 + # (Since no probabilities are fixed yet, all agents have coefficient 1) + sum_constr = model.addConstr( + grb.quicksum(agent_vars[agent_id] for agent_id in people) == 1, + name="sum_unfixed" + ) + + # Initial committee constraints + for committee in committees: + model.addConstr(grb.quicksum(agent_vars[agent_id] for agent_id in committee) <= cap_var) + + # Initial objective: minimize ŷ (cap_var) + model.setObjective(cap_var, grb.GRB.MINIMIZE) + + # Change Gurobi configuration to encourage strictly complementary ("inner") solutions. + model.setParam("Method", 2) # optimize via barrier only + model.setParam("Crossover", 0) # deactivate cross-over + + return model, agent_vars, cap_var, sum_constr + + +def _update_dual_model( + model: Any, + agent_vars: dict[str, Any], + cap_var: Any, + sum_constr: Any, + fixed_probabilities: dict[str, float], + people: People +): + """Updates the persistent dual LP in-place without rebuilding it from scratch.""" + # 1. Update the sum constraint coefficients. + for agent_id in people: + if agent_id in fixed_probabilities: + model.chgCoeff(sum_constr, agent_vars[agent_id], 0.0) + else: + model.chgCoeff(sum_constr, agent_vars[agent_id], 1.0) + + # 2. Update the objective function + model.setObjective( + cap_var - grb.quicksum( + fixed_probabilities[agent_id] * agent_vars[agent_id] + for agent_id in fixed_probabilities + ), + grb.GRB.MINIMIZE + ) + + +def _add_report_update( + report: RunReport, dual_obj: float, value: float, upper: float, committees: set[frozenset[str]] +) -> None: + at_most = f"{dual_obj - upper + value:.2%}" + dual_obj_str = f"{dual_obj:.2%}" + gap_str = f"{value - upper:.2%}" + report.add_message_and_log( + "leximin_is_at_most", + log_level=logging.DEBUG, + at_most=at_most, + dual_obj_str=dual_obj_str, + num_committees=len(committees), + gap_str=gap_str, + ) + + +def _run_leximin_column_generation_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, + dual_agent_vars: dict[str, Any], + dual_cap_var: Any, + sum_constr: Any, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], + people: People, + reduction_counter: int, + report: RunReport, +) -> tuple[bool, int]: + """Run the column generation inner loop for leximin optimization.""" + while True: + dual_model.optimize() + if dual_model.status != grb.GRB.OPTIMAL: + for agent_id in fixed_probabilities: + # Relax all fixed probabilities by a small constant + fixed_probabilities[agent_id] = max(0.0, fixed_probabilities[agent_id] - 0.0001) + + # Update persistent model instead of rebuilding + _update_dual_model(dual_model, dual_agent_vars, dual_cap_var, sum_constr, fixed_probabilities, people) + + logger.debug(f"Status: {dual_model.status} REDUCE PROBS for {reduction_counter}th time.") + reduction_counter += 1 + continue + + agent_weights = {agent_id: agent_var.x for agent_id, agent_var in dual_agent_vars.items()} + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), SolverSense.MAXIMIZE + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) + value = new_committee_solver.get_objective_value() + + upper = dual_cap_var.x + dual_obj = dual_model.objVal + + _add_report_update(report, dual_obj, value, upper, committees) + if value <= upper + EPS: + for agent_id, agent_weight in agent_weights.items(): + if agent_weight > EPS and agent_id not in fixed_probabilities: + fixed_probabilities[agent_id] = max(0, dual_obj) + return True, reduction_counter + + assert new_set not in committees + committees.add(new_set) + # Add the new constraint directly to the persistent model + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + + +def _solve_leximin_primal_for_final_probabilities( + committees: set[frozenset[str]], fixed_probabilities: dict[str, float] +) -> list[float]: + """Solve the final primal problem to get committee probabilities from fixed agent probabilities.""" + primal = grb.Model() + committee_vars = [primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for _ in committees] + eps = primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + primal.addConstr(grb.quicksum(committee_vars) == 1) + for agent_id, prob in fixed_probabilities.items(): + agent_probability = grb.quicksum( + comm_var for committee, comm_var in zip(committees, committee_vars, strict=False) if agent_id in committee + ) + primal.addConstr(agent_probability >= prob - eps) + primal.setObjective(eps, grb.GRB.MINIMIZE) + primal.optimize() + + probabilities = numpy.array([comm_var.x for comm_var in committee_vars]).clip(0, 1) + return list(probabilities / sum(probabilities)) + + +def _run_leximin_main_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + committees: set[frozenset[str]], + people: People, + report: RunReport, +) -> dict[str, float]: + """Run the main leximin optimization loop that fixes probabilities iteratively.""" + fixed_probabilities: dict[str, float] = {} + reduction_counter = 0 + + # Build the model exactly ONCE + dual_model, dual_agent_vars, dual_cap_var, sum_constr = _setup_dual_model(people, committees) + + while len(fixed_probabilities) < people.count: + logger.debug(f"Fixed {len(fixed_probabilities)}/{people.count} probabilities.") + + # Update the math before running the inner loop + _update_dual_model(dual_model, dual_agent_vars, dual_cap_var, sum_constr, fixed_probabilities, people) + + should_break, reduction_counter = _run_leximin_column_generation_loop( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + sum_constr, + committees, + fixed_probabilities, + people, + reduction_counter, + report, + ) + if should_break: + break + + return fixed_probabilities + + +def find_distribution_leximin( + features: FeatureCollection, + people: People, + number_people_wanted: int, + check_same_address_columns: list[str], + solver_backend: str = "highspy", +) -> tuple[list[frozenset[str]], list[float], RunReport]: + """Find a distribution over feasible committees that maximizes the minimum probability of an agent being selected.""" + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + report = RunReport() + report.add_message_and_log("using_leximin_algorithm", logging.INFO) + grb.setParam("OutputFlag", 0) + + new_committee_solver, agent_vars = setup_committee_generation( + features, people, number_people_wanted, check_same_address_columns, solver_backend + ) + + committees, covered_agents, initial_report = generate_initial_committees( + new_committee_solver, agent_vars, 3 * people.count + ) + report.add_report(initial_report) + + fixed_probabilities = _run_leximin_main_loop(new_committee_solver, agent_vars, committees, people, report) + + probabilities_normalised = _solve_leximin_primal_for_final_probabilities(committees, fixed_probabilities) + + return list(committees), probabilities_normalised, report \ No newline at end of file diff --git a/src/sortition_algorithms/committee_generation/op2_leximin.py b/src/sortition_algorithms/committee_generation/op2_leximin.py new file mode 100644 index 0000000..5fa2acd --- /dev/null +++ b/src/sortition_algorithms/committee_generation/op2_leximin.py @@ -0,0 +1,273 @@ +# ABOUTME: Leximin algorithm for committee generation. +# ABOUTME: Maximizes minimum probability, breaking ties by second-lowest, third-lowest, etc. +# Option 2: Fixing the "Batch" Heuristic + +import logging +from typing import Any + +import numpy + +try: + import gurobipy as grb + + GUROBI_AVAILABLE = True +except ImportError: + GUROBI_AVAILABLE = False + grb = None + +from sortition_algorithms.committee_generation.common import ( + EPS, + generate_initial_committees, + ilp_results_to_committee, + setup_committee_generation, +) +from sortition_algorithms.committee_generation.solver import Solver, SolverSense, solver_sum +from sortition_algorithms.features import FeatureCollection +from sortition_algorithms.people import People +from sortition_algorithms.utils import RunReport, logger + + +def _dual_leximin_stage( + people: People, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], +) -> tuple: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + assert len(committees) != 0 + + model = grb.Model() + agent_vars = {agent_id: model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for agent_id in people} + cap_var = model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + model.addConstr( + grb.quicksum(agent_vars[agent_id] for agent_id in people if agent_id not in fixed_probabilities) == 1 + ) + for committee in committees: + model.addConstr(grb.quicksum(agent_vars[agent_id] for agent_id in committee) <= cap_var) + model.setObjective( + cap_var + - grb.quicksum(fixed_probabilities[agent_id] * agent_vars[agent_id] for agent_id in fixed_probabilities), + grb.GRB.MINIMIZE, + ) + + model.setParam("Method", 2) + model.setParam("Crossover", 0) + + return model, agent_vars, cap_var + + +def _add_report_update( + report: RunReport, dual_obj: float, value: float, upper: float, committees: set[frozenset[str]] +) -> None: + at_most = f"{dual_obj - upper + value:.2%}" + dual_obj_str = f"{dual_obj:.2%}" + gap_str = f"{value - upper:.2%}" + report.add_message_and_log( + "leximin_is_at_most", + log_level=logging.DEBUG, + at_most=at_most, + dual_obj_str=dual_obj_str, + num_committees=len(committees), + gap_str=gap_str, + ) + + +def _run_leximin_heuristic_for_additional_committees( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, + dual_agent_vars: dict[str, Any], + dual_cap_var: Any, + committees: set[frozenset[str]], + people: People, + agent_weights: dict[str, float], + upper: float, + value: float, +) -> int: + """Run heuristic to find additional committees to reduce outer loop calls.""" + counter = 0 + new_set = None + + for _ in range(10): # Try to harvest up to 10 extra committees + if new_set is not None: + for agent_id in new_set: + # Penalize agents in the last found committee so it finds different ones + agent_weights[agent_id] *= (upper / value) if value > EPS else 0.5 + + sum_weights = sum(agent_weights.values()) + if sum_weights < EPS: + break + for agent_id in agent_weights: + agent_weights[agent_id] /= sum_weights + upper /= sum_weights + + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), + SolverSense.MAXIMIZE, + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) + value = sum(agent_weights[agent_id] for agent_id in new_set) + + if value <= upper + EPS or new_set in committees: + break + + committees.add(new_set) + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + counter += 1 + + return counter + + +def _run_leximin_column_generation_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, + dual_agent_vars: dict[str, Any], + dual_cap_var: Any, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], + people: People, + reduction_counter: int, + report: RunReport, +) -> tuple[bool, int]: + while True: + dual_model.optimize() + if dual_model.status != grb.GRB.OPTIMAL: + for agent_id in fixed_probabilities: + fixed_probabilities[agent_id] = max(0.0, fixed_probabilities[agent_id] - 0.0001) + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + logger.debug(f"Status: {dual_model.status} REDUCE PROBS for {reduction_counter}th time.") + reduction_counter += 1 + continue + + agent_weights = {agent_id: agent_var.x for agent_id, agent_var in dual_agent_vars.items()} + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), SolverSense.MAXIMIZE + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) + value = new_committee_solver.get_objective_value() + + upper = dual_cap_var.x + dual_obj = dual_model.objVal + + _add_report_update(report, dual_obj, value, upper, committees) + if value <= upper + EPS: + for agent_id, agent_weight in agent_weights.items(): + if agent_weight > EPS and agent_id not in fixed_probabilities: + fixed_probabilities[agent_id] = max(0, dual_obj) + return True, reduction_counter + + assert new_set not in committees + committees.add(new_set) + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + + # --- NEW HEURISTIC TRICK INJECTED HERE --- + counter = _run_leximin_heuristic_for_additional_committees( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + committees, + people, + agent_weights, + upper, + value + ) + if counter > 0: + logger.debug(f"Heuristic added {counter} extra committees.") + + +def _solve_leximin_primal_for_final_probabilities( + committees: set[frozenset[str]], fixed_probabilities: dict[str, float] +) -> list[float]: + primal = grb.Model() + committee_vars = [primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for _ in committees] + eps = primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + primal.addConstr(grb.quicksum(committee_vars) == 1) + for agent_id, prob in fixed_probabilities.items(): + agent_probability = grb.quicksum( + comm_var for committee, comm_var in zip(committees, committee_vars, strict=False) if agent_id in committee + ) + primal.addConstr(agent_probability >= prob - eps) + primal.setObjective(eps, grb.GRB.MINIMIZE) + primal.optimize() + + probabilities = numpy.array([comm_var.x for comm_var in committee_vars]).clip(0, 1) + return list(probabilities / sum(probabilities)) + + +def _run_leximin_main_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + committees: set[frozenset[str]], + people: People, + report: RunReport, +) -> dict[str, float]: + fixed_probabilities: dict[str, float] = {} + reduction_counter = 0 + + while len(fixed_probabilities) < people.count: + logger.debug(f"Fixed {len(fixed_probabilities)}/{people.count} probabilities.") + + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + + should_break, reduction_counter = _run_leximin_column_generation_loop( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + committees, + fixed_probabilities, + people, + reduction_counter, + report, + ) + if should_break: + break + + return fixed_probabilities + + +def find_distribution_leximin( + features: FeatureCollection, + people: People, + number_people_wanted: int, + check_same_address_columns: list[str], + solver_backend: str = "highspy", +) -> tuple[list[frozenset[str]], list[float], RunReport]: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + report = RunReport() + report.add_message_and_log("using_leximin_algorithm", logging.INFO) + grb.setParam("OutputFlag", 0) + + new_committee_solver, agent_vars = setup_committee_generation( + features, people, number_people_wanted, check_same_address_columns, solver_backend + ) + + committees, covered_agents, initial_report = generate_initial_committees( + new_committee_solver, agent_vars, 3 * people.count + ) + report.add_report(initial_report) + + fixed_probabilities = _run_leximin_main_loop(new_committee_solver, agent_vars, committees, people, report) + + probabilities_normalised = _solve_leximin_primal_for_final_probabilities(committees, fixed_probabilities) + + return list(committees), probabilities_normalised, report \ No newline at end of file diff --git a/src/sortition_algorithms/committee_generation/op4_leximin.py b/src/sortition_algorithms/committee_generation/op4_leximin.py new file mode 100644 index 0000000..6c119e0 --- /dev/null +++ b/src/sortition_algorithms/committee_generation/op4_leximin.py @@ -0,0 +1,213 @@ +# ABOUTME: Leximin algorithm for committee generation. +# ABOUTME: Maximizes minimum probability, breaking ties by second-lowest, third-lowest, etc. +# Option 4: Heuristic Pricing (The "Good Enough" Approach) + +import logging +from typing import Any + +import numpy + +try: + import gurobipy as grb + + GUROBI_AVAILABLE = True +except ImportError: + GUROBI_AVAILABLE = False + grb = None + +from sortition_algorithms.committee_generation.common import ( + EPS, + generate_initial_committees, + ilp_results_to_committee, + setup_committee_generation, +) +from sortition_algorithms.committee_generation.solver import Solver, SolverSense, solver_sum +from sortition_algorithms.features import FeatureCollection +from sortition_algorithms.people import People +from sortition_algorithms.utils import RunReport, logger + + +def _dual_leximin_stage( + people: People, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], +) -> tuple: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + assert len(committees) != 0 + + model = grb.Model() + agent_vars = {agent_id: model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for agent_id in people} + cap_var = model.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + model.addConstr( + grb.quicksum(agent_vars[agent_id] for agent_id in people if agent_id not in fixed_probabilities) == 1 + ) + for committee in committees: + model.addConstr(grb.quicksum(agent_vars[agent_id] for agent_id in committee) <= cap_var) + model.setObjective( + cap_var + - grb.quicksum(fixed_probabilities[agent_id] * agent_vars[agent_id] for agent_id in fixed_probabilities), + grb.GRB.MINIMIZE, + ) + + # ---> OPTION 4 SPEEDUP: Let Gurobi use its default Simplex method <--- + # We are commenting out Paul's forced Barrier method. + # model.setParam("Method", 2) # optimize via barrier only + # model.setParam("Crossover", 0) # deactivate cross-over + + return model, agent_vars, cap_var + + +def _add_report_update( + report: RunReport, dual_obj: float, value: float, upper: float, committees: set[frozenset[str]] +) -> None: + at_most = f"{dual_obj - upper + value:.2%}" + dual_obj_str = f"{dual_obj:.2%}" + gap_str = f"{value - upper:.2%}" + report.add_message_and_log( + "leximin_is_at_most", + log_level=logging.DEBUG, + at_most=at_most, + dual_obj_str=dual_obj_str, + num_committees=len(committees), + gap_str=gap_str, + ) + + +def _run_leximin_column_generation_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + dual_model: Any, + dual_agent_vars: dict[str, Any], + dual_cap_var: Any, + committees: set[frozenset[str]], + fixed_probabilities: dict[str, float], + people: People, + reduction_counter: int, + report: RunReport, +) -> tuple[bool, int]: + while True: + dual_model.optimize() + if dual_model.status != grb.GRB.OPTIMAL: + for agent_id in fixed_probabilities: + fixed_probabilities[agent_id] = max(0.0, fixed_probabilities[agent_id] - 0.0001) + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + logger.debug(f"Status: {dual_model.status} REDUCE PROBS for {reduction_counter}th time.") + reduction_counter += 1 + continue + + agent_weights = {agent_id: agent_var.x for agent_id, agent_var in dual_agent_vars.items()} + new_committee_solver.set_objective( + solver_sum(agent_weights[agent_id] * agent_vars[agent_id] for agent_id in people), SolverSense.MAXIMIZE + ) + new_committee_solver.optimize() + new_set = ilp_results_to_committee(new_committee_solver, agent_vars) + value = new_committee_solver.get_objective_value() + + upper = dual_cap_var.x + dual_obj = dual_model.objVal + + _add_report_update(report, dual_obj, value, upper, committees) + if value <= upper + EPS: + for agent_id, agent_weight in agent_weights.items(): + if agent_weight > EPS and agent_id not in fixed_probabilities: + fixed_probabilities[agent_id] = max(0, dual_obj) + return True, reduction_counter + + assert new_set not in committees + committees.add(new_set) + dual_model.addConstr(grb.quicksum(dual_agent_vars[agent_id] for agent_id in new_set) <= dual_cap_var) + + +def _solve_leximin_primal_for_final_probabilities( + committees: set[frozenset[str]], fixed_probabilities: dict[str, float] +) -> list[float]: + primal = grb.Model() + committee_vars = [primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) for _ in committees] + eps = primal.addVar(vtype=grb.GRB.CONTINUOUS, lb=0.0) + primal.addConstr(grb.quicksum(committee_vars) == 1) + for agent_id, prob in fixed_probabilities.items(): + agent_probability = grb.quicksum( + comm_var for committee, comm_var in zip(committees, committee_vars, strict=False) if agent_id in committee + ) + primal.addConstr(agent_probability >= prob - eps) + primal.setObjective(eps, grb.GRB.MINIMIZE) + primal.optimize() + + probabilities = numpy.array([comm_var.x for comm_var in committee_vars]).clip(0, 1) + return list(probabilities / sum(probabilities)) + + +def _run_leximin_main_loop( + new_committee_solver: Solver, + agent_vars: dict[str, Any], + committees: set[frozenset[str]], + people: People, + report: RunReport, +) -> dict[str, float]: + fixed_probabilities: dict[str, float] = {} + reduction_counter = 0 + + while len(fixed_probabilities) < people.count: + logger.debug(f"Fixed {len(fixed_probabilities)}/{people.count} probabilities.") + + dual_model, dual_agent_vars, dual_cap_var = _dual_leximin_stage( + people, + committees, + fixed_probabilities, + ) + + should_break, reduction_counter = _run_leximin_column_generation_loop( + new_committee_solver, + agent_vars, + dual_model, + dual_agent_vars, + dual_cap_var, + committees, + fixed_probabilities, + people, + reduction_counter, + report, + ) + if should_break: + break + + return fixed_probabilities + + +def find_distribution_leximin( + features: FeatureCollection, + people: People, + number_people_wanted: int, + check_same_address_columns: list[str], + solver_backend: str = "highspy", +) -> tuple[list[frozenset[str]], list[float], RunReport]: + if not GUROBI_AVAILABLE: + msg = "Leximin algorithm requires Gurobi solver which is not available" + raise RuntimeError(msg, "gurobi_not_available", {}) + + report = RunReport() + report.add_message_and_log("using_leximin_algorithm", logging.INFO) + grb.setParam("OutputFlag", 0) + + new_committee_solver, agent_vars = setup_committee_generation( + features, people, number_people_wanted, check_same_address_columns, solver_backend + ) + + # We keep our Option 3 win here (capped at 100) + committees, covered_agents, initial_report = generate_initial_committees( + new_committee_solver, agent_vars, 100 + ) + report.add_report(initial_report) + + fixed_probabilities = _run_leximin_main_loop(new_committee_solver, agent_vars, committees, people, report) + + probabilities_normalised = _solve_leximin_primal_for_final_probabilities(committees, fixed_probabilities) + + return list(committees), probabilities_normalised, report \ No newline at end of file