Tessl Tile for pypi/astropy@6.1.0

or run

npx @tessl/cli init

statistics.mddocs/

0
# Statistics
1

2
Statistical tools specifically designed for astronomical data analysis, including sigma clipping, robust statistics, and confidence intervals.
3

4
## Capabilities
5

6
### Sigma Clipping and Outlier Rejection
7

8
Iterative sigma clipping for robust statistical analysis in the presence of outliers.
9

10
```python { .api }
11
def sigma_clip(data, sigma=3, sigma_lower=None, sigma_upper=None, maxiters=5, 
12
               cenfunc='median', stdfunc='std', axis=None, masked=True, 
13
               return_bounds=False, copy=True, invalid='mask'):
14
    """
15
    Perform sigma clipping on data.
16
    
17
    Parameters:
18
    - data: input data array
19
    - sigma: number of standard deviations for clipping
20
    - sigma_lower: lower sigma threshold (default: sigma)
21
    - sigma_upper: upper sigma threshold (default: sigma)
22
    - maxiters: maximum number of clipping iterations
23
    - cenfunc: function for computing center ('median', 'mean', or callable)
24
    - stdfunc: function for computing spread ('std', 'mad_std', or callable)
25
    - axis: axis along which to clip
26
    - masked: return masked array
27
    - return_bounds: also return clipping bounds
28
    - copy: copy input data
29
    - invalid: how to handle invalid values
30
    
31
    Returns:
32
    MaskedArray or tuple: clipped data, optionally with bounds
33
    """
34

35
class SigmaClip:
36
    """
37
    Sigma clipping class for reusable clipping operations.
38
    
39
    Parameters:
40
    - sigma: sigma threshold
41
    - sigma_lower: lower sigma threshold  
42
    - sigma_upper: upper sigma threshold
43
    - maxiters: maximum iterations
44
    - cenfunc: center function
45
    - stdfunc: spread function
46
    """
47
    def __init__(self, sigma=3, sigma_lower=None, sigma_upper=None, maxiters=5,
48
                 cenfunc='median', stdfunc='std'): ...
49
    
50
    def __call__(self, data, axis=None, masked=True, return_bounds=False, copy=True):
51
        """Apply sigma clipping to data."""
52
```
53

54
### Robust Statistics
55

56
Robust statistical estimators that are resistant to outliers and non-Gaussian distributions.
57

58
```python { .api }
59
def mad_std(data, axis=None, func=None, ignore_nan=False):
60
    """
61
    Calculate median absolute deviation scaled to standard deviation.
62
    
63
    Parameters:
64
    - data: input data
65
    - axis: axis along which to compute
66
    - func: function to apply before MAD calculation
67
    - ignore_nan: ignore NaN values
68
    
69
    Returns:
70
    ndarray or scalar: MAD-based standard deviation estimate
71
    """
72

73
def biweight_location(data, c=6.0, M=None, axis=None, modify_sample_size=False):
74
    """
75
    Compute biweight location (robust estimate of central tendency).
76
    
77
    Parameters:
78
    - data: input data
79
    - c: tuning constant
80
    - M: initial estimate of location (default: median)
81
    - axis: axis along which to compute
82
    - modify_sample_size: modify sample size for small samples
83
    
84
    Returns:
85
    ndarray or scalar: biweight location estimate
86
    """
87

88
def biweight_scale(data, c=9.0, M=None, axis=None, modify_sample_size=False):
89
    """
90
    Compute biweight scale (robust estimate of scale).
91
    
92
    Parameters:
93
    - data: input data
94
    - c: tuning constant
95
    - M: location estimate (default: median)
96
    - axis: axis along which to compute
97
    - modify_sample_size: modify sample size for small samples
98
    
99
    Returns:
100
    ndarray or scalar: biweight scale estimate
101
    """
102

103
def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
104
    """
105
    Calculate median absolute deviation.
106
    
107
    Parameters:
108
    - data: input data
109
    - axis: axis along which to compute
110
    - func: function to apply before MAD calculation
111
    - ignore_nan: ignore NaN values
112
    
113
    Returns:
114
    ndarray or scalar: median absolute deviation
115
    """
116
```
117

118
### Bootstrap and Jackknife Resampling
119

120
Statistical resampling methods for uncertainty estimation and hypothesis testing.
121

122
```python { .api }
123
def bootstrap(data, bootnum=100, samples=None, bootfunc=None):
124
    """
125
    Perform bootstrap resampling.
126
    
127
    Parameters:
128
    - data: input data array
129
    - bootnum: number of bootstrap samples
130
    - samples: sample size for each bootstrap (default: len(data))
131
    - bootfunc: function to apply to each bootstrap sample
132
    
133
    Returns:
134
    ndarray: bootstrap results
135
    """
136

137
def jackknife_resampling(data):
138
    """
139
    Perform jackknife resampling (leave-one-out).
140
    
141
    Parameters:
142
    - data: input data array
143
    
144
    Returns:
145
    ndarray: jackknife resampled data
146
    """
147

148
def jackknife_stats(data, statistic, confidence_level=0.95):
149
    """
150
    Compute jackknife statistics and confidence intervals.
151
    
152
    Parameters:
153
    - data: input data
154
    - statistic: function to compute statistic
155
    - confidence_level: confidence level for intervals
156
    
157
    Returns:
158
    tuple: (statistic, standard error, confidence interval)
159
    """
160

161
class JackknifeStat:
162
    """
163
    Jackknife statistics calculator.
164
    
165
    Parameters:
166
    - statistic: function to compute statistic
167
    - confidence_level: confidence level
168
    """
169
    def __init__(self, statistic, confidence_level=0.95): ...
170
    
171
    def __call__(self, data):
172
        """Compute jackknife statistics."""
173
```
174

175
### Circular Statistics
176

177
Statistical functions for circular data (angles, phases, directions).
178

179
```python { .api }
180
def circmean(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
181
    """
182
    Compute circular mean of angular data.
183
    
184
    Parameters:
185
    - data: angular data
186
    - high: upper bound of angular range
187
    - low: lower bound of angular range
188
    - axis: axis along which to compute
189
    - nan_policy: how to handle NaN values
190
    
191
    Returns:
192
    ndarray or scalar: circular mean
193
    """
194

195
def circstd(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
196
    """
197
    Compute circular standard deviation.
198
    
199
    Parameters:
200
    - data: angular data
201
    - high: upper bound of angular range
202
    - low: lower bound of angular range
203
    - axis: axis along which to compute
204
    - nan_policy: how to handle NaN values
205
    
206
    Returns:
207
    ndarray or scalar: circular standard deviation
208
    """
209

210
def circvar(data, high=2*np.pi, low=0, axis=None, nan_policy='propagate'):
211
    """
212
    Compute circular variance.
213
    
214
    Parameters:
215
    - data: angular data
216
    - high: upper bound of angular range
217
    - low: lower bound of angular range
218
    - axis: axis along which to compute
219
    - nan_policy: how to handle NaN values
220
    
221
    Returns:
222
    ndarray or scalar: circular variance
223
    """
224
```
225

226
### Confidence Intervals
227

228
Statistical confidence interval calculations for various distributions commonly used in astronomy.
229

230
```python { .api }
231
def poisson_conf_interval(n, interval='root-n', confidence_level=0.6826894921370859, 
232
                         background=0, conflevel=None):
233
    """
234
    Confidence interval for Poisson-distributed data.
235
    
236
    Parameters:
237
    - n: observed counts
238
    - interval: method ('root-n', 'root-n-0', 'pearson', 'sherpagehrels', 'kraft-burrows-nousek')
239
    - confidence_level: confidence level
240
    - background: background counts
241
    - conflevel: deprecated parameter
242
    
243
    Returns:
244
    tuple: (lower_bound, upper_bound)
245
    """
246

247
def binom_conf_interval(k, n, confidence_level=0.6826894921370859, interval='wilson'):
248
    """
249
    Confidence interval for binomial distribution.
250
    
251
    Parameters:
252
    - k: number of successes
253
    - n: number of trials
254
    - confidence_level: confidence level
255
    - interval: method ('wilson', 'jeffreys', 'flat', 'wald')
256
    
257
    Returns:
258
    tuple: (lower_bound, upper_bound)
259
    """
260

261
def median_conf_interval(data, confidence_level=0.6826894921370859, axis=None):
262
    """
263
    Confidence interval for median.
264
    
265
    Parameters:
266
    - data: input data
267
    - confidence_level: confidence level
268
    - axis: axis along which to compute
269
    
270
    Returns:
271
    tuple: (lower_bound, upper_bound)
272
    """
273
```
274

275
### Histogram and Density Estimation
276

277
Advanced histogram functions with automatic binning and density estimation.
278

279
```python { .api }
280
def histogram(a, bins='blocks', range=None, **kwargs):
281
    """
282
    Enhanced histogram with automatic binning algorithms.
283
    
284
    Parameters:
285
    - a: input data
286
    - bins: binning method ('blocks', 'knuth', 'scott', 'freedman', int, or array)
287
    - range: range of histogram
288
    - **kwargs: additional arguments
289
    
290
    Returns:
291
    tuple: (counts, bin_edges)
292
    """
293

294
def knuth_bin_width(data, return_bins=False, quiet=True):
295
    """
296
    Optimal histogram bin width using Knuth's rule.
297
    
298
    Parameters:
299
    - data: input data
300
    - return_bins: return bin edges instead of width
301
    - quiet: suppress output
302
    
303
    Returns:
304
    float or ndarray: bin width or bin edges
305
    """
306

307
def scott_bin_width(data, return_bins=False):
308
    """
309
    Optimal histogram bin width using Scott's rule.
310
    
311
    Parameters:
312
    - data: input data
313
    - return_bins: return bin edges instead of width
314
    
315
    Returns:
316
    float or ndarray: bin width or bin edges
317
    """
318

319
def freedman_bin_width(data, return_bins=False):
320
    """
321
    Optimal histogram bin width using Freedman-Diaconis rule.
322
    
323
    Parameters:
324
    - data: input data
325
    - return_bins: return bin edges instead of width
326
    
327
    Returns:
328
    float or ndarray: bin width or bin edges
329
    """
330

331
def bayesian_blocks(t, x=None, sigma=None, fitness='events', **kwargs):
332
    """
333
    Bayesian blocks algorithm for optimal binning.
334
    
335
    Parameters:
336
    - t: data values or time stamps
337
    - x: data values (if t represents time)
338
    - sigma: error on data values
339
    - fitness: fitness function ('events', 'regular_events', 'measures')
340
    
341
    Returns:
342
    ndarray: optimal bin edges
343
    """
344
```
345

346
## Usage Examples
347

348
### Sigma Clipping for Outlier Rejection
349

350
```python
351
from astropy.stats import sigma_clip, SigmaClip
352
import numpy as np
353

354
# Generate data with outliers
355
np.random.seed(42)
356
data = np.random.normal(100, 15, 100)
357
data[::10] = 200  # Add outliers
358

359
# Basic sigma clipping
360
clipped_data = sigma_clip(data, sigma=3, maxiters=5)
361
print(f"Original data: {len(data)} points")
362
print(f"After clipping: {len(clipped_data.compressed())} points")
363
print(f"Mean before: {np.mean(data):.2f}")
364
print(f"Mean after: {np.mean(clipped_data.compressed()):.2f}")
365

366
# Reusable sigma clipper
367
clipper = SigmaClip(sigma=2.5, maxiters=10)
368
clipped_data2 = clipper(data)
369
```
370

371
### Robust Statistics
372

373
```python
374
from astropy.stats import mad_std, biweight_location, biweight_scale
375

376
# Robust statistics on data with outliers
377
robust_center = biweight_location(data)
378
robust_scale = biweight_scale(data)
379
mad_scale = mad_std(data)
380

381
print(f"Standard mean: {np.mean(data):.2f}")
382
print(f"Biweight location: {robust_center:.2f}")
383
print(f"Standard std: {np.std(data):.2f}")
384
print(f"Biweight scale: {robust_scale:.2f}")
385
print(f"MAD std: {mad_scale:.2f}")
386
```
387

388
### Bootstrap Uncertainty Estimation
389

390
```python
391
from astropy.stats import bootstrap
392

393
# Define statistic of interest
394
def ratio_stat(data):
395
    return np.mean(data) / np.std(data)
396

397
# Original statistic
398
original_stat = ratio_stat(data)
399

400
# Bootstrap to estimate uncertainty
401
bootstrap_results = bootstrap(data, bootnum=1000, bootfunc=ratio_stat)
402
bootstrap_std = np.std(bootstrap_results)
403
confidence_interval = np.percentile(bootstrap_results, [16, 84])
404

405
print(f"Original statistic: {original_stat:.3f}")
406
print(f"Bootstrap uncertainty: ±{bootstrap_std:.3f}")
407
print(f"68% confidence interval: [{confidence_interval[0]:.3f}, {confidence_interval[1]:.3f}]")
408
```
409

410
### Circular Statistics
411

412
```python
413
from astropy.stats import circmean, circstd
414
import astropy.units as u
415

416
# Angular data in radians
417
angles = np.array([0.1, 0.3, 6.2, 6.1, 0.2, 0.4])  # Mix of small and ~2π angles
418

419
# Circular statistics
420
circular_mean = circmean(angles)
421
circular_std = circstd(angles)
422

423
print(f"Linear mean: {np.mean(angles):.3f} rad")
424
print(f"Circular mean: {circular_mean:.3f} rad")  
425
print(f"Linear std: {np.std(angles):.3f} rad")
426
print(f"Circular std: {circular_std:.3f} rad")
427

428
# Convert to degrees
429
print(f"Circular mean: {circular_mean * 180/np.pi:.1f} degrees")
430
```
431

432
### Confidence Intervals
433

434
```python
435
from astropy.stats import poisson_conf_interval, binom_conf_interval
436

437
# Poisson confidence intervals (common in photon counting)
438
observed_counts = 25
439
lower, upper = poisson_conf_interval(observed_counts, interval='root-n')
440
print(f"Observed counts: {observed_counts}")
441
print(f"Root-n interval: [{lower:.1f}, {upper:.1f}]")
442

443
# More sophisticated Poisson interval
444
lower2, upper2 = poisson_conf_interval(observed_counts, interval='kraft-burrows-nousek')
445
print(f"Kraft-Burrows-Nousek interval: [{lower2:.1f}, {upper2:.1f}]")
446

447
# Binomial confidence intervals
448
successes = 15
449
trials = 20
450
lower_b, upper_b = binom_conf_interval(successes, trials, interval='wilson')
451
success_rate = successes / trials
452
print(f"Success rate: {success_rate:.2f}")
453
print(f"Wilson interval: [{lower_b:.3f}, {upper_b:.3f}]")
454
```
455

456
### Optimal Histogram Binning
457

458
```python
459
from astropy.stats import histogram, knuth_bin_width, bayesian_blocks
460

461
# Generate astronomical data (e.g., stellar magnitudes)
462
magnitudes = np.random.normal(18, 2, 1000)
463

464
# Compare different binning methods
465
bins_knuth = knuth_bin_width(magnitudes, return_bins=True)
466
bins_blocks = bayesian_blocks(magnitudes)
467

468
# Create histograms
469
counts_auto, bins_auto = histogram(magnitudes, bins='blocks')
470
counts_knuth = np.histogram(magnitudes, bins=bins_knuth)[0]
471
counts_blocks = np.histogram(magnitudes, bins=bins_blocks)[0]
472

473
print(f"Automatic bins: {len(bins_auto)-1}")
474
print(f"Knuth bins: {len(bins_knuth)-1}")
475
print(f"Bayesian blocks: {len(bins_blocks)-1}")
476
```
477

478
### Combining Multiple Statistical Methods
479

480
```python
481
# Comprehensive statistical analysis pipeline
482
def analyze_data(data, name="Dataset"):
483
    \"\"\"Comprehensive statistical analysis.\"\"\"
484
    print(f"\\n=== Analysis of {name} ===")
485
    
486
    # Basic statistics
487
    print(f"Sample size: {len(data)}")
488
    print(f"Mean: {np.mean(data):.3f}")
489
    print(f"Median: {np.median(data):.3f}")
490
    print(f"Std dev: {np.std(data):.3f}")
491
    
492
    # Robust statistics
493
    robust_center = biweight_location(data)
494
    robust_scale = biweight_scale(data)
495
    print(f"Biweight location: {robust_center:.3f}")
496
    print(f"Biweight scale: {robust_scale:.3f}")
497
    
498
    # Sigma clipping
499
    clipped = sigma_clip(data, sigma=3)
500
    outlier_fraction = 1 - len(clipped.compressed()) / len(data)
501
    print(f"Outlier fraction (3σ): {outlier_fraction:.1%}")
502
    
503
    # Bootstrap uncertainty on mean
504
    bootstrap_means = bootstrap(data, bootnum=1000, bootfunc=np.mean)
505
    bootstrap_uncertainty = np.std(bootstrap_means)
506
    print(f"Bootstrap uncertainty on mean: ±{bootstrap_uncertainty:.3f}")
507

508
# Apply to different datasets
509
clean_data = np.random.normal(10, 2, 100)
510
contaminated_data = np.concatenate([clean_data, [50, -20, 30]])
511

512
analyze_data(clean_data, "Clean data")
513
analyze_data(contaminated_data, "Contaminated data")
514
```

Version

Tile

Files

statistics.mddocs/

Version

Tile

Files

statistics.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics.mddocs/