CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-astropy

Astronomy and astrophysics core library providing comprehensive tools for astronomical computations and data handling

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

statistics.mddocs/

Statistics

Statistical tools specifically designed for astronomical data analysis, including sigma clipping, robust statistics, and confidence intervals.

Capabilities

Sigma Clipping and Outlier Rejection

Iterative sigma clipping for robust statistical analysis in the presence of outliers.

def sigma_clip(data, sigma=3, sigma_lower=None, sigma_upper=None, maxiters=5, 
               cenfunc='median', stdfunc='std', axis=None, masked=True, 
               return_bounds=False, copy=True, invalid='mask'):
    """
    Perform sigma clipping on data.
    
    Parameters:
    - data: input data array
    - sigma: number of standard deviations for clipping
    - sigma_lower: lower sigma threshold (default: sigma)
    - sigma_upper: upper sigma threshold (default: sigma)
    - maxiters: maximum number of clipping iterations
    - cenfunc: function for computing center ('median', 'mean', or callable)
    - stdfunc: function for computing spread ('std', 'mad_std', or callable)
    - axis: axis along which to clip
    - masked: return masked array
    - return_bounds: also return clipping bounds
    - copy: copy input data
    - invalid: how to handle invalid values
    
    Returns:
    MaskedArray or tuple: clipped data, optionally with bounds
    """

class SigmaClip:
    """
    Sigma clipping class for reusable clipping operations.
    
    Parameters:
    - sigma: sigma threshold
    - sigma_lower: lower sigma threshold  
    - sigma_upper: upper sigma threshold
    - maxiters: maximum iterations
    - cenfunc: center function
    - stdfunc: spread function
    """
    def __init__(self, sigma=3, sigma_lower=None, sigma_upper=None, maxiters=5,
                 cenfunc='median', stdfunc='std'): ...
    
    def __call__(self, data, axis=None, masked=True, return_bounds=False, copy=True):
        """Apply sigma clipping to data."""

Robust Statistics

Robust statistical estimators that are resistant to outliers and non-Gaussian distributions.

def mad_std(data, axis=None, func=None, ignore_nan=False):
    """
    Calculate median absolute deviation scaled to standard deviation.
    
    Parameters:
    - data: input data
    - axis: axis along which to compute
    - func: function to apply before MAD calculation
    - ignore_nan: ignore NaN values
    
    Returns:
    ndarray or scalar: MAD-based standard deviation estimate
    """

def biweight_location(data, c=6.0, M=None, axis=None, modify_sample_size=False):
    """
    Compute biweight location (robust estimate of central tendency).
    
    Parameters:
    - data: input data
    - c: tuning constant
    - M: initial estimate of location (default: median)
    - axis: axis along which to compute
    - modify_sample_size: modify sample size for small samples
    
    Returns:
    ndarray or scalar: biweight location estimate
    """

def biweight_scale(data, c=9.0, M=None, axis=None, modify_sample_size=False):
    """
    Compute biweight scale (robust estimate of scale).
    
    Parameters:
    - data: input data
    - c: tuning constant
    - M: location estimate (default: median)
    - axis: axis along which to compute
    - modify_sample_size: modify sample size for small samples
    
    Returns:
    ndarray or scalar: biweight scale estimate
    """

def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
    """
    Calculate median absolute deviation.
    
    Parameters:
    - data: input data
    - axis: axis along which to compute
    - func: function to apply before MAD calculation
    - ignore_nan: ignore NaN values
    
    Returns:
    ndarray or scalar: median absolute deviation
    """

Bootstrap and Jackknife Resampling

Statistical resampling methods for uncertainty estimation and hypothesis testing.

def bootstrap(data, bootnum=100, samples=None, bootfunc=None):
    """
    Perform bootstrap resampling.
    
    Parameters:
    - data: input data array
    - bootnum: number of bootstrap samples
    - samples: sample size for each bootstrap (default: len(data))
    - bootfunc: function to apply to each bootstrap sample
    
    Returns:
    ndarray: bootstrap results
    """

def jackknife_resampling(data):
    """
    Perform jackknife resampling (leave-one-out).
    
    Parameters:
    - data: input data array
    
    Returns:
    ndarray: jackknife resampled data
    """

def jackknife_stats(data, statistic, confidence_level=0.95):
    """
    Compute jackknife statistics and confidence intervals.
    
    Parameters:
    - data: input data
    - statistic: function to compute statistic
    - confidence_level: confidence level for intervals
    
    Returns:
    tuple: (statistic, standard error, confidence interval)
    """

class JackknifeStat:
    """
    Jackknife statistics calculator.
    
    Parameters:
    - statistic: function to compute statistic
    - confidence_level: confidence level
    """
    def __init__(self, statistic, confidence_level=0.95): ...
    
    def __call__(self, data):
        """Compute jackknife statistics."""

Circular Statistics

Statistical functions for circular data (angles, phases, directions).

def circmean(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
    """
    Compute circular mean of angular data.
    
    Parameters:
    - data: angular data
    - high: upper bound of angular range
    - low: lower bound of angular range
    - axis: axis along which to compute
    - nan_policy: how to handle NaN values
    
    Returns:
    ndarray or scalar: circular mean
    """

def circstd(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
    """
    Compute circular standard deviation.
    
    Parameters:
    - data: angular data
    - high: upper bound of angular range
    - low: lower bound of angular range
    - axis: axis along which to compute
    - nan_policy: how to handle NaN values
    
    Returns:
    ndarray or scalar: circular standard deviation
    """

def circvar(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
    """
    Compute circular variance.
    
    Parameters:
    - data: angular data
    - high: upper bound of angular range
    - low: lower bound of angular range
    - axis: axis along which to compute
    - nan_policy: how to handle NaN values
    
    Returns:
    ndarray or scalar: circular variance
    """

Confidence Intervals

Statistical confidence interval calculations for various distributions commonly used in astronomy.

def poisson_conf_interval(n, interval='root-n', confidence_level=0.6826894921370859, 
                         background=0, conflevel=None):
    """
    Confidence interval for Poisson-distributed data.
    
    Parameters:
    - n: observed counts
    - interval: method ('root-n', 'root-n-0', 'pearson', 'sherpagehrels', 'kraft-burrows-nousek')
    - confidence_level: confidence level
    - background: background counts
    - conflevel: deprecated parameter
    
    Returns:
    tuple: (lower_bound, upper_bound)
    """

def binom_conf_interval(k, n, confidence_level=0.6826894921370859, interval='wilson'):
    """
    Confidence interval for binomial distribution.
    
    Parameters:
    - k: number of successes
    - n: number of trials
    - confidence_level: confidence level
    - interval: method ('wilson', 'jeffreys', 'flat', 'wald')
    
    Returns:
    tuple: (lower_bound, upper_bound)
    """

def median_conf_interval(data, confidence_level=0.6826894921370859, axis=None):
    """
    Confidence interval for median.
    
    Parameters:
    - data: input data
    - confidence_level: confidence level
    - axis: axis along which to compute
    
    Returns:
    tuple: (lower_bound, upper_bound)
    """

Histogram and Density Estimation

Advanced histogram functions with automatic binning and density estimation.

def histogram(a, bins='blocks', range=None, **kwargs):
    """
    Enhanced histogram with automatic binning algorithms.
    
    Parameters:
    - a: input data
    - bins: binning method ('blocks', 'knuth', 'scott', 'freedman', int, or array)
    - range: range of histogram
    - **kwargs: additional arguments
    
    Returns:
    tuple: (counts, bin_edges)
    """

def knuth_bin_width(data, return_bins=False, quiet=True):
    """
    Optimal histogram bin width using Knuth's rule.
    
    Parameters:
    - data: input data
    - return_bins: return bin edges instead of width
    - quiet: suppress output
    
    Returns:
    float or ndarray: bin width or bin edges
    """

def scott_bin_width(data, return_bins=False):
    """
    Optimal histogram bin width using Scott's rule.
    
    Parameters:
    - data: input data
    - return_bins: return bin edges instead of width
    
    Returns:
    float or ndarray: bin width or bin edges
    """

def freedman_bin_width(data, return_bins=False):
    """
    Optimal histogram bin width using Freedman-Diaconis rule.
    
    Parameters:
    - data: input data
    - return_bins: return bin edges instead of width
    
    Returns:
    float or ndarray: bin width or bin edges
    """

def bayesian_blocks(t, x=None, sigma=None, fitness='events', **kwargs):
    """
    Bayesian blocks algorithm for optimal binning.
    
    Parameters:
    - t: data values or time stamps
    - x: data values (if t represents time)
    - sigma: error on data values
    - fitness: fitness function ('events', 'regular_events', 'measures')
    
    Returns:
    ndarray: optimal bin edges
    """

Usage Examples

Sigma Clipping for Outlier Rejection

from astropy.stats import sigma_clip, SigmaClip
import numpy as np

# Generate data with outliers
np.random.seed(42)
data = np.random.normal(100, 15, 100)
data[::10] = 200  # Add outliers

# Basic sigma clipping
clipped_data = sigma_clip(data, sigma=3, maxiters=5)
print(f"Original data: {len(data)} points")
print(f"After clipping: {len(clipped_data.compressed())} points")
print(f"Mean before: {np.mean(data):.2f}")
print(f"Mean after: {np.mean(clipped_data.compressed()):.2f}")

# Reusable sigma clipper
clipper = SigmaClip(sigma=2.5, maxiters=10)
clipped_data2 = clipper(data)

Robust Statistics

from astropy.stats import mad_std, biweight_location, biweight_scale

# Robust statistics on data with outliers
robust_center = biweight_location(data)
robust_scale = biweight_scale(data)
mad_scale = mad_std(data)

print(f"Standard mean: {np.mean(data):.2f}")
print(f"Biweight location: {robust_center:.2f}")
print(f"Standard std: {np.std(data):.2f}")
print(f"Biweight scale: {robust_scale:.2f}")
print(f"MAD std: {mad_scale:.2f}")

Bootstrap Uncertainty Estimation

from astropy.stats import bootstrap

# Define statistic of interest
def ratio_stat(data):
    return np.mean(data) / np.std(data)

# Original statistic
original_stat = ratio_stat(data)

# Bootstrap to estimate uncertainty
bootstrap_results = bootstrap(data, bootnum=1000, bootfunc=ratio_stat)
bootstrap_std = np.std(bootstrap_results)
confidence_interval = np.percentile(bootstrap_results, [16, 84])

print(f"Original statistic: {original_stat:.3f}")
print(f"Bootstrap uncertainty: ±{bootstrap_std:.3f}")
print(f"68% confidence interval: [{confidence_interval[0]:.3f}, {confidence_interval[1]:.3f}]")

Circular Statistics

from astropy.stats import circmean, circstd
import astropy.units as u

# Angular data in radians
angles = np.array([0.1, 0.3, 6.2, 6.1, 0.2, 0.4])  # Mix of small and ~2π angles

# Circular statistics
circular_mean = circmean(angles)
circular_std = circstd(angles)

print(f"Linear mean: {np.mean(angles):.3f} rad")
print(f"Circular mean: {circular_mean:.3f} rad")  
print(f"Linear std: {np.std(angles):.3f} rad")
print(f"Circular std: {circular_std:.3f} rad")

# Convert to degrees
print(f"Circular mean: {circular_mean * 180/np.pi:.1f} degrees")

Confidence Intervals

from astropy.stats import poisson_conf_interval, binom_conf_interval

# Poisson confidence intervals (common in photon counting)
observed_counts = 25
lower, upper = poisson_conf_interval(observed_counts, interval='root-n')
print(f"Observed counts: {observed_counts}")
print(f"Root-n interval: [{lower:.1f}, {upper:.1f}]")

# More sophisticated Poisson interval
lower2, upper2 = poisson_conf_interval(observed_counts, interval='kraft-burrows-nousek')
print(f"Kraft-Burrows-Nousek interval: [{lower2:.1f}, {upper2:.1f}]")

# Binomial confidence intervals
successes = 15
trials = 20
lower_b, upper_b = binom_conf_interval(successes, trials, interval='wilson')
success_rate = successes / trials
print(f"Success rate: {success_rate:.2f}")
print(f"Wilson interval: [{lower_b:.3f}, {upper_b:.3f}]")

Optimal Histogram Binning

from astropy.stats import histogram, knuth_bin_width, bayesian_blocks

# Generate astronomical data (e.g., stellar magnitudes)
magnitudes = np.random.normal(18, 2, 1000)

# Compare different binning methods
bins_knuth = knuth_bin_width(magnitudes, return_bins=True)
bins_blocks = bayesian_blocks(magnitudes)

# Create histograms
counts_auto, bins_auto = histogram(magnitudes, bins='blocks')
counts_knuth = np.histogram(magnitudes, bins=bins_knuth)[0]
counts_blocks = np.histogram(magnitudes, bins=bins_blocks)[0]

print(f"Automatic bins: {len(bins_auto)-1}")
print(f"Knuth bins: {len(bins_knuth)-1}")
print(f"Bayesian blocks: {len(bins_blocks)-1}")

Combining Multiple Statistical Methods

# Comprehensive statistical analysis pipeline
def analyze_data(data, name="Dataset"):
    \"\"\"Comprehensive statistical analysis.\"\"\"
    print(f"\\n=== Analysis of {name} ===")
    
    # Basic statistics
    print(f"Sample size: {len(data)}")
    print(f"Mean: {np.mean(data):.3f}")
    print(f"Median: {np.median(data):.3f}")
    print(f"Std dev: {np.std(data):.3f}")
    
    # Robust statistics
    robust_center = biweight_location(data)
    robust_scale = biweight_scale(data)
    print(f"Biweight location: {robust_center:.3f}")
    print(f"Biweight scale: {robust_scale:.3f}")
    
    # Sigma clipping
    clipped = sigma_clip(data, sigma=3)
    outlier_fraction = 1 - len(clipped.compressed()) / len(data)
    print(f"Outlier fraction (3σ): {outlier_fraction:.1%}")
    
    # Bootstrap uncertainty on mean
    bootstrap_means = bootstrap(data, bootnum=1000, bootfunc=np.mean)
    bootstrap_uncertainty = np.std(bootstrap_means)
    print(f"Bootstrap uncertainty on mean: ±{bootstrap_uncertainty:.3f}")

# Apply to different datasets
clean_data = np.random.normal(10, 2, 100)
contaminated_data = np.concatenate([clean_data, [50, -20, 30]])

analyze_data(clean_data, "Clean data")
analyze_data(contaminated_data, "Contaminated data")

Install with Tessl CLI

npx tessl i tessl/pypi-astropy

docs

configuration.md

constants.md

convolution.md

coordinates.md

cosmology.md

fits-io.md

index.md

modeling.md

nddata.md

samp.md

statistics.md

tables.md

time.md

timeseries.md

uncertainty.md

units-quantities.md

utils.md

visualization.md

wcs.md

tile.json