Use Markov chain Monte Carlo to analyze districting plans and gerrymanders
—
Analyze districting plans using partisan metrics, compactness measures, and demographic analysis. Metrics provide quantitative tools for comparing redistricting plans and identifying outliers.
Measure partisan bias and electoral competitiveness using established political science metrics.
def mean_median(election_results: Dict[str, Dict[str, int]]) -> float:
"""
Calculate mean-median difference for partisan bias analysis.
Compares the median district vote share to the statewide vote share.
Positive values indicate bias toward the party with >50% statewide vote.
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
float: Mean-median difference as decimal (-1 to 1)
"""
def mean_thirdian(election_results: Dict[str, Dict[str, int]]) -> float:
"""
Calculate mean-thirdian difference (more robust than mean-median).
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
float: Mean-thirdian difference as decimal
"""
def partisan_bias(election_results: Dict[str, Dict[str, int]]) -> float:
"""
Calculate partisan bias using the standard seats-votes relationship.
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
float: Partisan bias measure
"""
def partisan_gini(election_results: Dict[str, Dict[str, int]]) -> float:
"""
Calculate partisan Gini coefficient measuring vote distribution inequality.
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
float: Gini coefficient (0 = perfectly equal, 1 = maximally unequal)
"""
def efficiency_gap(election_results: Dict[str, Dict[str, int]]) -> float:
"""
Calculate efficiency gap measuring wasted votes.
Standard metric for measuring partisan gerrymandering. Values >7% or <-7%
are often considered evidence of significant partisan bias.
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
float: Efficiency gap as decimal (-1 to 1)
"""
def wasted_votes(election_results: Dict[str, Dict[str, int]]) -> Dict[str, Dict[str, int]]:
"""
Calculate wasted votes by party and district.
Wasted votes are votes that don't contribute to winning:
- All votes for losing candidate
- Surplus votes for winning candidate (beyond 50% + 1)
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
Dict[str, Dict[str, int]]: District -> party -> wasted votes
"""
def seats_won(election_results: Dict[str, Dict[str, int]]) -> Dict[str, int]:
"""
Count seats won by each party.
Parameters:
- election_results (Dict[str, Dict[str, int]]): District -> party -> votes
Returns:
Dict[str, int]: Party -> number of seats won
"""Usage example:
from gerrychain.metrics import mean_median, efficiency_gap, seats_won
from gerrychain.updaters import Election
# Set up election tracking
partition = GeographicPartition(
graph,
assignment="district",
updaters={"SEN18": Election("SEN18", ["SEN18D", "SEN18R"])}
)
# Calculate metrics
mm = mean_median(partition["SEN18"])
eg = efficiency_gap(partition["SEN18"])
seats = seats_won(partition["SEN18"])
print(f"Mean-Median: {mm:.3f}")
print(f"Efficiency Gap: {eg:.3f}")
print(f"Dem seats: {seats['SEN18D']}, Rep seats: {seats['SEN18R']}")Measure district shape compactness using geometric and graph-theoretic measures.
def polsby_popper(partition: Partition) -> Dict[DistrictId, float]:
"""
Calculate Polsby-Popper compactness score for each district.
Measures ratio of district area to area of circle with same perimeter.
Higher values (closer to 1.0) indicate more compact districts.
Parameters:
- partition (Partition): Partition with geometric data
Returns:
Dict[DistrictId, float]: Polsby-Popper scores by district
"""
def schwartzberg(partition: Partition) -> Dict[DistrictId, float]:
"""
Calculate Schwartzberg compactness ratio for each district.
Measures ratio of district perimeter to perimeter of circle with same area.
Lower values (closer to 1.0) indicate more compact districts.
Parameters:
- partition (Partition): Partition with geometric data
Returns:
Dict[DistrictId, float]: Schwartzberg ratios by district
"""
def convex_hull_ratio(partition: Partition) -> Dict[DistrictId, float]:
"""
Calculate ratio of district area to its convex hull area.
Parameters:
- partition (Partition): Partition with geometric data
Returns:
Dict[DistrictId, float]: Convex hull ratios by district
"""
def reock(partition: Partition) -> Dict[DistrictId, float]:
"""
Calculate Reock compactness score (area to minimum bounding circle).
Parameters:
- partition (Partition): Partition with geometric data
Returns:
Dict[DistrictId, float]: Reock scores by district
"""
def boundary_node_ratio(partition: Partition) -> Dict[DistrictId, float]:
"""
Calculate ratio of boundary nodes to total nodes per district.
Graph-theoretic compactness measure not requiring geometric data.
Parameters:
- partition (Partition): Any partition
Returns:
Dict[DistrictId, float]: Boundary node ratios by district
"""Usage example:
from gerrychain.metrics import polsby_popper, schwartzberg, boundary_node_ratio
# Calculate compactness for current partition
pp_scores = polsby_popper(partition)
schw_scores = schwartzberg(partition)
boundary_ratios = boundary_node_ratio(partition)
# Report compactness
for district in partition.parts:
print(f"District {district}:")
print(f" Polsby-Popper: {pp_scores[district]:.3f}")
print(f" Schwartzberg: {schw_scores[district]:.3f}")
print(f" Boundary ratio: {boundary_ratios[district]:.3f}")Example of comprehensive metric analysis across a Markov chain:
from gerrychain import MarkovChain, GeographicPartition
from gerrychain.metrics import (
mean_median, efficiency_gap, partisan_bias,
polsby_popper, schwartzberg, seats_won
)
from gerrychain.updaters import Election
import numpy as np
# Set up partition with election data
partition = GeographicPartition(
graph,
assignment="district",
updaters={
"SEN18": Election("SEN18", ["SEN18D", "SEN18R"]),
"GOV18": Election("GOV18", ["GOV18D", "GOV18R"])
}
)
# Set up chain
chain = MarkovChain(
proposal=recom,
constraints=constraints,
accept=always_accept,
initial_state=partition,
total_steps=10000
)
# Collect metrics
metrics_data = {
"mean_median_sen": [],
"efficiency_gap_sen": [],
"mean_median_gov": [],
"efficiency_gap_gov": [],
"dem_seats_sen": [],
"rep_seats_sen": [],
"avg_polsby_popper": [],
"avg_schwartzberg": []
}
for state in chain.with_progress_bar():
# Partisan metrics
metrics_data["mean_median_sen"].append(mean_median(state["SEN18"]))
metrics_data["efficiency_gap_sen"].append(efficiency_gap(state["SEN18"]))
metrics_data["mean_median_gov"].append(mean_median(state["GOV18"]))
metrics_data["efficiency_gap_gov"].append(efficiency_gap(state["GOV18"]))
# Seat counts
seats_sen = seats_won(state["SEN18"])
metrics_data["dem_seats_sen"].append(seats_sen["SEN18D"])
metrics_data["rep_seats_sen"].append(seats_sen["SEN18R"])
# Compactness (average across districts)
pp_scores = polsby_popper(state)
schw_scores = schwartzberg(state)
metrics_data["avg_polsby_popper"].append(np.mean(list(pp_scores.values())))
metrics_data["avg_schwartzberg"].append(np.mean(list(schw_scores.values())))
# Analyze distributions
print("Metric Analysis Results:")
print("=" * 50)
for metric, values in metrics_data.items():
mean_val = np.mean(values)
std_val = np.std(values)
print(f"{metric:20}: {mean_val:7.3f} ± {std_val:.3f}")
# Compare initial plan to ensemble
initial_mm = mean_median(partition["SEN18"])
ensemble_mm = metrics_data["mean_median_sen"]
percentile = 100 * np.mean(np.array(ensemble_mm) <= initial_mm)
print(f"\nInitial plan mean-median: {initial_mm:.3f}")
print(f"Percentile in ensemble: {percentile:.1f}%")
# Outlier analysis
outlier_threshold = 2.0 # 2 standard deviations
outliers = []
for i, mm in enumerate(ensemble_mm):
z_score = abs(mm - np.mean(ensemble_mm)) / np.std(ensemble_mm)
if z_score > outlier_threshold:
outliers.append((i, mm, z_score))
print(f"\nFound {len(outliers)} outlier plans (>2σ)")Examples of creating custom evaluation metrics:
def competitiveness_index(election_results):
"""Measure electoral competitiveness across districts."""
competitive_districts = 0
total_districts = len(election_results)
for district, votes in election_results.items():
total_votes = sum(votes.values())
if total_votes > 0:
parties = list(votes.keys())
if len(parties) >= 2:
# Sort by vote count
sorted_votes = sorted(votes.values(), reverse=True)
margin = (sorted_votes[0] - sorted_votes[1]) / total_votes
if margin < 0.10: # Within 10 percentage points
competitive_districts += 1
return competitive_districts / total_districts if total_districts > 0 else 0
def minority_opportunity_districts(partition, minority_column="BVAP", threshold=0.4):
"""Count districts with minority opportunity (>40% minority VAP)."""
count = 0
for district in partition.parts:
total_vap = partition["vap"][district]
minority_vap = partition[minority_column][district]
if total_vap > 0 and minority_vap / total_vap >= threshold:
count += 1
return count
# Use in analysis
competitiveness_scores = []
minority_districts = []
for state in chain:
comp_score = competitiveness_index(state["SEN18"])
minority_count = minority_opportunity_districts(state)
competitiveness_scores.append(comp_score)
minority_districts.append(minority_count)ElectionResults = Dict[str, Dict[str, int]] # District -> Party -> Votes
DistrictId = int
CompactnessScores = Dict[DistrictId, float]Install with Tessl CLI
npx tessl i tessl/pypi-gerrychain