Use Markov chain Monte Carlo to analyze districting plans and gerrymanders
—
Compute and track partition attributes including election results, demographic data, and structural properties. Updater functions automatically calculate district-level summaries whenever partitions change.
Track and analyze election results across districts with automatic vote tallying and percentage calculations.
class Election:
def __init__(
self,
name: str,
columns: Union[Dict[str, str], List[str]],
alias: str = None
) -> None:
"""
Create election updater for tracking vote data by district.
Parameters:
- name (str): Name identifier for this election
- columns (Union[Dict[str, str], List[str]]): Either dict mapping party names to column names, or list of column names that serve as both party names and columns
- alias (str, optional): Alternative name for accessing results
Returns:
None
"""The Election class returns an ElectionResults object when used as an updater:
class ElectionResults:
def percents(self, party: str) -> Tuple[float, ...]:
"""
Get vote percentages for a party across all districts.
Parameters:
- party (str): Party name
Returns:
Tuple[float, ...]: Vote percentages by district
"""
def counts(self, party: str) -> Tuple[int, ...]:
"""
Get raw vote counts for a party across all districts.
Parameters:
- party (str): Party name
Returns:
Tuple[int, ...]: Vote counts by district
"""
@property
def totals_for_party(self) -> Dict[str, Dict[int, float]]:
"""
Get vote totals organized by party and district.
Returns:
Dict[str, Dict[int, float]]: Party -> District -> votes
"""
@property
def totals(self) -> Dict[int, int]:
"""
Get total votes by district.
Returns:
Dict[int, int]: District -> total votes
"""Usage example:
from gerrychain.updaters import Election
# Set up election tracking
election = Election("SEN18", ["SEN18D", "SEN18R"]) # List format
# Or: election = Election("SEN18", {"Democratic": "SEN18D", "Republican": "SEN18R"}) # Dict format
# Use in partition
partition = GeographicPartition(
graph,
assignment="district",
updaters={"SEN18": election}
)
# Access results
election_results = partition["SEN18"] # Returns ElectionResults object
dem_votes = election_results.counts("SEN18D") # Tuple of counts by district
dem_percents = election_results.percents("SEN18D") # Tuple of percentages by district
total_votes = election_results.totals # Dict[district_id, total_votes]Aggregate arbitrary numeric data by district using flexible tally functions.
class Tally:
def __init__(
self,
columns: Union[str, List[str]],
alias: str = None
) -> None:
"""
Create tally updater for summing data by district.
Parameters:
- columns (Union[str, List[str]]): Column name(s) to sum
- alias (str, optional): Alternative name for accessing results
Returns:
None
"""
class DataTally:
def __init__(
self,
columns: Union[str, List[str]],
alias: str = None
) -> None:
"""
Generic data tally with additional processing options.
Parameters:
- columns (Union[str, List[str]]): Column name(s) to aggregate
- alias (str, optional): Alternative name for accessing results
Returns:
None
"""Usage example:
from gerrychain.updaters import Tally
# Set up demographic tallies
partition = GeographicPartition(
graph,
assignment="district",
updaters={
"population": Tally("TOTPOP"),
"vap": Tally("VAP"), # Voting age population
"minority_pop": Tally(["BVAP", "HVAP", "ASIANVAP"]),
"households": Tally("households")
}
)
# Access tallied data
district_pop = partition["population"][district_id]
minority_pop = partition["minority_pop"][district_id]Track graph-theoretic and geometric properties of partitions.
def cut_edges(partition: Partition) -> Set[Tuple[NodeId, NodeId]]:
"""
Find edges that cross district boundaries.
Parameters:
- partition (Partition): Partition to analyze
Returns:
Set[Tuple[NodeId, NodeId]]: Set of edges crossing districts
"""
def cut_edges_by_part(partition: Partition) -> Dict[DistrictId, Set[Tuple[NodeId, NodeId]]]:
"""
Find cut edges grouped by district.
Parameters:
- partition (Partition): Partition to analyze
Returns:
Dict[DistrictId, Set[Tuple[NodeId, NodeId]]]: Cut edges by district
"""
def county_splits(
partition: Partition,
county_column: str = "county"
) -> Dict[str, int]:
"""
Count number of districts each county is split across.
Parameters:
- partition (Partition): Partition to analyze
- county_column (str): Column name for county identifiers
Returns:
Dict[str, int]: Number of districts per county
"""
def boundary_nodes(partition: Partition) -> Set[NodeId]:
"""
Find all nodes on district boundaries.
Parameters:
- partition (Partition): Partition to analyze
Returns:
Set[NodeId]: Set of nodes on district boundaries
"""
def exterior_boundaries(partition: Partition) -> Set[Tuple[NodeId, NodeId]]:
"""
Find edges on the exterior boundary of the partition.
Parameters:
- partition (Partition): Partition to analyze
Returns:
Set[Tuple[NodeId, NodeId]]: Exterior boundary edges
"""
def interior_boundaries(partition: Partition) -> Set[Tuple[NodeId, NodeId]]:
"""
Find edges on interior boundaries between districts.
Parameters:
- partition (Partition): Partition to analyze
Returns:
Set[Tuple[NodeId, NodeId]]: Interior boundary edges
"""
def flows_from_changes(
changes: Dict[NodeId, DistrictId],
pop_col: str = "population"
) -> Dict[Tuple[DistrictId, DistrictId], float]:
"""
Calculate population flows from partition changes.
Parameters:
- changes (Dict[NodeId, DistrictId]): Node assignment changes
- pop_col (str): Population column name
Returns:
Dict[Tuple[DistrictId, DistrictId], float]: Flow between district pairs
"""
class CountySplit:
def __init__(self, county_column: str = "county") -> None:
"""
Track county splits across districts.
Parameters:
- county_column (str): Column name for county data
Returns:
None
"""Usage example:
from gerrychain.updaters import cut_edges, county_splits
# Track structural properties
partition = GeographicPartition(
graph,
assignment="district",
updaters={
"cut_edges": cut_edges,
"county_splits": county_splits
}
)
# Access properties
num_cut_edges = len(partition["cut_edges"])
split_counties = {
county: count for county, count in partition["county_splits"].items()
if count > 1
}Example showing comprehensive data tracking in a real analysis workflow:
from gerrychain import GeographicPartition, Graph
from gerrychain.updaters import Election, Tally, cut_edges, county_splits
# Load data
graph = Graph.from_file("precincts.shp")
# Set up comprehensive updaters
partition = GeographicPartition(
graph,
assignment="district",
updaters={
# Demographics
"population": Tally("TOTPOP"),
"vap": Tally("VAP"),
"white_pop": Tally("WVAP"),
"black_pop": Tally("BVAP"),
"hispanic_pop": Tally("HVAP"),
# Elections
"SEN18": Election("SEN18", ["SEN18D", "SEN18R"]),
"GOV18": Election("GOV18", ["GOV18D", "GOV18R"]),
"PRES16": Election("PRES16", ["PRES16D", "PRES16R"]),
# Structure
"cut_edges": cut_edges,
"county_splits": county_splits,
# Economic data
"median_income": Tally("median_income"),
"poverty_rate": Tally("poverty_count") # Will need custom calculation for rates
}
)
# Use in analysis
for district in partition.parts:
print(f"District {district}:")
print(f" Population: {partition['population'][district]:,}")
print(f" % Black: {100 * partition['black_pop'][district] / partition['population'][district]:.1f}%")
sen_votes = partition["SEN18"]["counts"][district]
dem_pct = 100 * sen_votes["SEN18D"] / sum(sen_votes.values())
print(f" Senate Dem %: {dem_pct:.1f}%")
print(f" Cut edges: {len([e for e in partition['cut_edges'] if district in e])}")
print()
# Track changes over Markov chain
populations = []
cut_edge_counts = []
for state in chain:
populations.append(list(state["population"].values()))
cut_edge_counts.append(len(state["cut_edges"]))
# Analyze distributions
import numpy as np
print(f"Population std dev: {np.std(populations[-1]):.0f}")
print(f"Avg cut edges: {np.mean(cut_edge_counts):.1f}")Examples of creating custom updater functions for specialized analysis:
def minority_vap_percent(partition):
"""Calculate minority VAP percentage by district."""
result = {}
for district in partition.parts:
total_vap = partition["vap"][district]
minority_vap = (partition["black_pop"][district] +
partition["hispanic_pop"][district] +
partition["asian_pop"][district])
result[district] = minority_vap / total_vap if total_vap > 0 else 0
return result
def compactness_scores(partition):
"""Calculate multiple compactness measures."""
from gerrychain.metrics import polsby_popper, schwartzberg
return {
"polsby_popper": polsby_popper(partition),
"schwartzberg": schwartzberg(partition)
}
# Use custom updaters
partition = GeographicPartition(
graph,
assignment="district",
updaters={
"population": Tally("TOTPOP"),
"vap": Tally("VAP"),
"black_pop": Tally("BVAP"),
"hispanic_pop": Tally("HVAP"),
"asian_pop": Tally("ASIANVAP"),
"minority_vap_pct": minority_vap_percent,
"compactness": compactness_scores
}
)UpdaterFunction = Callable[[Partition], Any]
DistrictId = int
NodeId = Union[int, str]
VoteData = Dict[str, int] # Party -> vote count
PercentageData = Dict[str, float] # Party -> percentageInstall with Tessl CLI
npx tessl i tessl/pypi-gerrychain