Spatial econometric regression models for analyzing geographically-related data interactions.
Overall
score
87%
Two-stage least squares (TSLS) estimation for handling endogenous variables in regression models, with spatial diagnostic capabilities and regime-based analysis options.
Core two-stage least squares estimation without diagnostics, providing instrumental variable estimation for models with endogeneity.
class BaseTSLS:
def __init__(self, y, x, yend, q=None, h=None, robust=None, gwk=None, sig2n_k=False):
"""
Two-stage least squares estimation (no diagnostics).
Parameters:
- y (array): nx1 dependent variable
- x (array): nxk exogenous independent variables (excluding constant)
- yend (array): nxp endogenous variables
- q (array, optional): nxq external instruments (cannot use with h)
- h (array, optional): nxl all instruments (cannot use with q)
- robust (str, optional): 'white' or 'hac' for robust standard errors
- gwk (pysal W object, optional): Kernel weights for HAC estimation
- sig2n_k (bool): If True, use n-k for sigma^2 estimation
Attributes:
- betas (array): kx1 estimated coefficients (for x and yend combined)
- u (array): nx1 residuals
- predy (array): nx1 predicted values
- z (array): nxk combined exogenous and endogenous variables
- h (array): nxl all instruments
- vm (array): kxk variance-covariance matrix
- sig2 (float): Sigma squared
- n (int): Number of observations
- k (int): Number of parameters
- kstar (int): Number of endogenous variables
"""Complete TSLS implementation with spatial diagnostics, endogeneity tests, and comprehensive output formatting.
class TSLS:
def __init__(self, y, x, yend, q, h=None, robust=None, gwk=None, sig2n_k=False,
nonspat_diag=True, spat_diag=False, w=None, slx_lags=0,
slx_vars='All', regimes=None, vm=False, constant_regi='one',
cols2regi='all', regime_err_sep=False, regime_lag_sep=False,
cores=False, name_y=None, name_x=None, name_yend=None,
name_q=None, name_h=None, name_w=None, name_ds=None, latex=False):
"""
Two-stage least squares with diagnostics.
Parameters:
- y (array): nx1 dependent variable
- x (array): nxk exogenous independent variables (constant added automatically)
- yend (array): nxp endogenous variables
- q (array): nxq external instruments
- h (array, optional): nxl all instruments (alternative to q)
- robust (str, optional): 'white' or 'hac' for robust standard errors
- gwk (pysal W object, optional): Kernel weights for HAC
- sig2n_k (bool): Use n-k for sigma^2 estimation
- nonspat_diag (bool): Compute non-spatial diagnostics
- spat_diag (bool): Compute Anselin-Kelejian test (requires w)
- w (pysal W object, optional): Spatial weights for spatial diagnostics
- slx_lags (int): Number of spatial lags of X to include
- slx_vars (str/list): Variables to be spatially lagged
- regimes (list/Series, optional): Regime identifier
- vm (bool): Include variance-covariance matrix
- constant_regi (str): Regime treatment of constant
- cols2regi (str/list): Variables that vary by regime
- regime_err_sep (bool): Separate error variance by regime
- regime_lag_sep (bool): Separate spatial lag by regime
- cores (bool): Use multiprocessing
- name_y, name_x, name_yend, name_q, name_h, name_w, name_ds (str): Variable names
- latex (bool): LaTeX formatting
Attributes:
- All BaseTSLS attributes plus:
- pr2 (float): Pseudo R-squared
- z_stat (list): z-statistics with p-values for each coefficient
- ak_test (dict): Anselin-Kelejian test for spatial dependence (if spat_diag=True)
- dwh (dict): Durbin-Wu-Hausman endogeneity test
- summary (str): Comprehensive formatted results
- output (DataFrame): Formatted results table
"""import numpy as np
import spreg
# Generate data with endogeneity
n = 100
# Structural error and measurement error
e1 = np.random.randn(n, 1) # structural error
e2 = np.random.randn(n, 1) # error in endogenous variable
# Exogenous variables and instruments
x = np.random.randn(n, 2)
z = np.random.randn(n, 1) # external instrument
# Endogenous variable (correlated with error)
yend = 2 * z + 0.5 * e1 + e2
# Dependent variable
y = 1 + 2 * x[:, 0:1] + 3 * x[:, 1:2] + 1.5 * yend + e1
# TSLS estimation
tsls_model = spreg.TSLS(y, x, yend, z, name_y='y',
name_x=['x1', 'x2'], name_yend=['yend'],
name_q=['instrument'])
print(tsls_model.summary)
print("Pseudo R-squared:", tsls_model.pr2)
print("Durbin-Wu-Hausman test:", tsls_model.dwh)import numpy as np
import spreg
# Multiple endogenous variables and instruments
n = 100
x = np.random.randn(n, 2)
z1 = np.random.randn(n, 1) # instrument for first endogenous var
z2 = np.random.randn(n, 1) # instrument for second endogenous var
z3 = np.random.randn(n, 1) # additional instrument (overidentification)
# Two endogenous variables
yend1 = 1.5 * z1 + 0.3 * z3 + np.random.randn(n, 1)
yend2 = 2.0 * z2 + 0.4 * z3 + np.random.randn(n, 1)
yend = np.hstack([yend1, yend2])
# All external instruments
q = np.hstack([z1, z2, z3])
# Dependent variable
y = 1 + x[:, 0:1] + 2 * x[:, 1:2] + 0.5 * yend1 + 1.2 * yend2 + np.random.randn(n, 1)
# TSLS with multiple endogenous variables
multi_tsls = spreg.TSLS(y, x, yend, q,
name_y='y', name_x=['x1', 'x2'],
name_yend=['yend1', 'yend2'],
name_q=['z1', 'z2', 'z3'])
print(multi_tsls.summary)
print(f"Model is {'over' if multi_tsls.h.shape[1] > multi_tsls.kstar else 'just'}identified")import numpy as np
import spreg
from libpysal import weights
# Spatial TSLS
n = 49 # 7x7 grid
x = np.random.randn(n, 1)
z = np.random.randn(n, 1) # instrument
w = weights.lat2W(7, 7) # spatial weights
# Endogenous variable
yend = 1.5 * z + np.random.randn(n, 1)
# Dependent variable with spatial structure
y = np.random.randn(n, 1)
# TSLS with Anselin-Kelejian test
spatial_tsls = spreg.TSLS(y, x, yend, z, w=w, spat_diag=True,
name_y='y', name_x=['x1'],
name_yend=['yend'], name_q=['instrument'])
print(spatial_tsls.summary)
print("Anselin-Kelejian test:", spatial_tsls.ak_test)
if spatial_tsls.ak_test['p-value'] < 0.05:
print("Spatial dependence detected in TSLS residuals")import numpy as np
import spreg
from libpysal import weights
# TSLS with spatial lag of X
n = 100
x = np.random.randn(n, 2)
z = np.random.randn(n, 1)
w = weights.KNN.from_array(np.random.randn(n, 2), k=5)
# Endogenous variable
yend = 2 * z + np.random.randn(n, 1)
# Dependent variable
y = 1 + x.sum(axis=1, keepdims=True) + 0.8 * yend + np.random.randn(n, 1)
# Include spatial lags of exogenous variables
slx_tsls = spreg.TSLS(y, x, yend, z, w=w, slx_lags=1, slx_vars='All',
name_y='y', name_x=['x1', 'x2'],
name_yend=['yend'], name_q=['instrument'])
print(slx_tsls.summary)
print("Includes spatial lags of X variables")import numpy as np
import spreg
# TSLS with heteroskedasticity-robust standard errors
n = 100
x = np.random.randn(n, 2)
z = np.random.randn(n, 2) # two instruments
yend = np.random.randn(n, 1)
y = np.random.randn(n, 1)
# White-robust TSLS
robust_tsls = spreg.TSLS(y, x, yend, z, robust='white',
name_y='y', name_x=['x1', 'x2'],
name_yend=['yend'], name_q=['z1', 'z2'])
print(robust_tsls.summary)
print("Uses White-robust standard errors")import numpy as np
import spreg
# TSLS with regimes
n = 150
x = np.random.randn(n, 2)
z = np.random.randn(n, 2)
yend = np.random.randn(n, 1)
y = np.random.randn(n, 1)
regimes = np.random.choice(['North', 'South', 'East'], n)
# TSLS allowing coefficients to vary by regime
regime_tsls = spreg.TSLS(y, x, yend, z, regimes=regimes,
constant_regi='many', cols2regi='all',
name_y='y', name_x=['x1', 'x2'],
name_yend=['yend'], name_q=['z1', 'z2'],
name_regimes='region')
print(regime_tsls.summary)
print("Coefficients vary by regime")
print("Chow test:", regime_tsls.chow)dwh: Durbin-Wu-Hausman test for endogeneity
ak_test: Anselin-Kelejian test for spatial dependence in TSLS residuals
pr2: Pseudo R-squared for TSLS models
Install with Tessl CLI
npx tessl i tessl/pypi-spregdocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10