Tessl Tile for pypi/pandas@2.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

api-types.md configuration.md core-data-structures.md data-io.md data-manipulation.md data-types.md errors.md index.md plotting.md statistics-math.md time-series.md

statistics-math.mddocs/

0
# Statistical and Mathematical Operations
1

2
Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations.
3

4
## Core Imports
5

6
```python
7
import pandas as pd
8
from pandas import cut, qcut, factorize, value_counts
9
```
10

11
## Capabilities
12

13
### Descriptive Statistics
14

15
Core statistical functions available on DataFrame and Series objects.
16

17
```python { .api }
18
# These are methods available on DataFrame and Series:
19

20
# Central tendency
21
def mean(axis=None, skipna=True, level=None, numeric_only=None):
22
    """Return the mean of the values over the requested axis."""
23

24
def median(axis=None, skipna=True, level=None, numeric_only=None):
25
    """Return the median of the values over the requested axis."""
26

27
def mode(axis=0, numeric_only=False, dropna=True):
28
    """Return the mode(s) of each element along the selected axis."""
29

30
# Measures of spread
31
def std(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
32
    """Return sample standard deviation over requested axis."""
33

34
def var(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
35
    """Return unbiased variance over requested axis."""
36

37
def sem(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
38
    """Return unbiased standard error of the mean over requested axis."""
39

40
def mad(axis=None, skipna=True, level=None):
41
    """Return the mean absolute deviation of the values over the requested axis."""
42

43
# Distribution shape
44
def skew(axis=None, skipna=True, level=None, numeric_only=None):
45
    """Return unbiased skew over requested axis."""
46

47
def kurt(axis=None, skipna=True, level=None, numeric_only=None):
48
    """Return unbiased kurtosis over requested axis."""
49

50
def kurtosis(axis=None, skipna=True, level=None, numeric_only=None):
51
    """Return unbiased kurtosis over requested axis (alias for kurt)."""
52

53
# Extremes
54
def min(axis=None, skipna=True, level=None, numeric_only=None):
55
    """Return the minimum of the values over the requested axis."""
56

57
def max(axis=None, skipna=True, level=None, numeric_only=None):
58
    """Return the maximum of the values over the requested axis."""
59

60
def idxmin(axis=0, skipna=True):
61
    """Return index of first occurrence of minimum over requested axis."""
62

63
def idxmax(axis=0, skipna=True):
64
    """Return index of first occurrence of maximum over requested axis."""
65

66
# Aggregation
67
def sum(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
68
    """Return the sum of the values over the requested axis."""
69

70
def prod(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
71
    """Return the product of the values over the requested axis."""
72

73
def product(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
74
    """Return the product of the values over the requested axis (alias for prod)."""
75

76
def count(axis=0, level=None, numeric_only=False):
77
    """Count non-NA cells for each column or row."""
78

79
def nunique(axis=0, dropna=True):
80
    """Count number of distinct elements in specified axis."""
81

82
# Quantiles and percentiles
83
def quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear', method='single'):
84
    """Return values at the given quantile over requested axis."""
85

86
def describe(percentiles=None, include=None, exclude=None):
87
    """Generate descriptive statistics."""
88

89
# Cumulative operations
90
def cumsum(axis=None, skipna=True):
91
    """Return cumulative sum over a DataFrame or Series axis."""
92

93
def cumprod(axis=None, skipna=True):
94
    """Return cumulative product over a DataFrame or Series axis."""
95

96
def cummax(axis=None, skipna=True):
97
    """Return cumulative maximum over a DataFrame or Series axis."""
98

99
def cummin(axis=None, skipna=True):
100
    """Return cumulative minimum over a DataFrame or Series axis."""
101
```
102

103
### Correlation and Covariance
104

105
Functions to compute relationships between variables.
106

107
```python { .api }
108
# These are methods available on DataFrame and Series:
109

110
def corr(method='pearson', min_periods=1, numeric_only=True):
111
    """
112
    Compute pairwise correlation of columns.
113
    
114
    Parameters:
115
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
116
    - min_periods: int, minimum number of observations for valid result
117
    - numeric_only: bool, include only numeric columns
118
    
119
    Returns:
120
    DataFrame, correlation matrix
121
    """
122

123
def cov(min_periods=None, ddof=1, numeric_only=True):
124
    """
125
    Compute pairwise covariance of columns.
126
    
127
    Parameters:
128
    - min_periods: int, minimum number of observations for valid result
129
    - ddof: int, delta degrees of freedom
130
    - numeric_only: bool, include only numeric columns
131
    
132
    Returns:
133
    DataFrame, covariance matrix
134
    """
135

136
def corrwith(other, axis=0, drop=False, method='pearson', numeric_only=True):
137
    """
138
    Compute pairwise correlation.
139
    
140
    Parameters:
141
    - other: DataFrame, Series, or array-like
142
    - axis: int, axis to use (0 or 1)
143
    - drop: bool, drop missing indices from result
144
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
145
    - numeric_only: bool, include only numeric columns
146
    
147
    Returns:
148
    Series, correlations
149
    """
150
```
151

152
### Mathematical Operations
153

154
Element-wise mathematical functions and operations.
155

156
```python { .api }
157
# These are methods available on DataFrame and Series:
158

159
def abs():
160
    """Return a Series/DataFrame with absolute numeric value of each element."""
161

162
def round(decimals=0):
163
    """Round each value to the given number of decimals."""
164

165
def clip(lower=None, upper=None, axis=None, inplace=False):
166
    """Trim values at input threshold(s)."""
167

168
def rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False):
169
    """
170
    Compute numerical data ranks along axis.
171
    
172
    Parameters:
173
    - axis: int, axis to rank along
174
    - method: str, how to rank ('average', 'min', 'max', 'first', 'dense')
175
    - numeric_only: bool, include only numeric columns
176
    - na_option: str, how to rank NaN values ('keep', 'top', 'bottom')
177
    - ascending: bool, rank in ascending order
178
    - pct: bool, return percentile rank
179
    
180
    Returns:
181
    same type as caller, data ranks
182
    """
183

184
# Exponential and logarithmic functions (available via NumPy integration)
185
def exp():
186
    """Calculate exponential of elements."""
187

188
def log():
189
    """Calculate natural logarithm of elements."""
190

191
def log10():
192
    """Calculate base-10 logarithm of elements."""
193

194
def log2():
195
    """Calculate base-2 logarithm of elements."""
196

197
def sqrt():
198
    """Calculate square root of elements."""
199

200
def pow(other):
201
    """Calculate exponential power of elements."""
202

203
# Trigonometric functions (available via NumPy integration)
204
def sin():
205
    """Calculate sine of elements."""
206

207
def cos():
208
    """Calculate cosine of elements."""
209

210
def tan():
211
    """Calculate tangent of elements."""
212

213
def arcsin():
214
    """Calculate inverse sine of elements."""
215

216
def arccos():
217
    """Calculate inverse cosine of elements."""
218

219
def arctan():
220
    """Calculate inverse tangent of elements."""
221
```
222

223
### Comparison Operations
224

225
Functions for comparing and ranking data.
226

227
```python { .api }
228
# These are methods available on DataFrame and Series:
229

230
def eq(other, axis='columns', level=None):
231
    """Get equal to of dataframe and other, element-wise (binary operator ==)."""
232

233
def ne(other, axis='columns', level=None):
234
    """Get not equal to of dataframe and other, element-wise (binary operator !=)."""
235

236
def lt(other, axis='columns', level=None):
237
    """Get less than of dataframe and other, element-wise (binary operator <)."""
238

239
def le(other, axis='columns', level=None):
240
    """Get less than or equal to of dataframe and other, element-wise (binary operator <=)."""
241

242
def gt(other, axis='columns', level=None):
243
    """Get greater than of dataframe and other, element-wise (binary operator >)."""
244

245
def ge(other, axis='columns', level=None):
246
    """Get greater than or equal to of dataframe and other, element-wise (binary operator >=)."""
247

248
def between(left, right, inclusive='both'):
249
    """
250
    Return boolean Series equivalent to left <= series <= right.
251
    
252
    Parameters:
253
    - left: scalar or list-like, left boundary
254
    - right: scalar or list-like, right boundary
255
    - inclusive: str, include boundaries ('both', 'neither', 'left', 'right')
256
    
257
    Returns:
258
    Series, boolean values
259
    """
260

261
def isin(values):
262
    """
263
    Whether each element in the Series/DataFrame is contained in values.
264
    
265
    Parameters:
266
    - values: set or list-like, sequence of values to test
267
    
268
    Returns:
269
    Series/DataFrame of bools, boolean values
270
    """
271
```
272

273
### Top-Level Statistical Functions
274

275
Standalone statistical functions that operate on array-like data.
276

277
```python { .api }
278
def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise', ordered=True):
279
    """
280
    Bin values into discrete intervals.
281
    
282
    Parameters:
283
    - x: array-like, input array to be binned
284
    - bins: int, sequence of scalars, or IntervalIndex
285
    - right: bool, whether bins include rightmost edge
286
    - labels: array or bool, labels for returned bins
287
    - retbins: bool, return bins
288
    - precision: int, precision for bin labels
289
    - include_lowest: bool, whether first interval is left-inclusive
290
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
291
    - ordered: bool, whether returned Categorical is ordered
292
    
293
    Returns:
294
    Categorical, Series, or array
295
    """
296

297
def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
298
    """
299
    Quantile-based discretization function.
300
    
301
    Parameters:
302
    - x: array-like, input array to be binned
303
    - q: int or list-like of float, quantiles to compute
304
    - labels: array or bool, labels for returned bins
305
    - retbins: bool, return (bins, labels)
306
    - precision: int, precision for bin labels
307
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
308
    
309
    Returns:
310
    Categorical, Series, or array
311
    """
312

313
def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None):
314
    """
315
    Encode the object as an enumerated type or categorical variable.
316
    
317
    Parameters:
318
    - values: sequence, 1-d array-like
319
    - sort: bool, sort uniques
320
    - na_sentinel: int, value for missing values
321
    - use_na_sentinel: bool, use na_sentinel for missing values
322
    - size_hint: int, hint for hashtable size
323
    
324
    Returns:
325
    tuple of (codes, uniques)
326
    """
327

328
def unique(values):
329
    """
330
    Return unique values based on a hash table.
331
    
332
    Parameters:
333
    - values: 1d array-like
334
    
335
    Returns:
336
    ndarray or ExtensionArray
337
    """
338

339
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True):
340
    """
341
    Compute a histogram of the 1D array values.
342
    
343
    Parameters:
344
    - values: 1d array-like
345
    - sort: bool, sort by values
346
    - ascending: bool, sort in ascending order
347
    - normalize: bool, return relative frequencies
348
    - bins: int, group into half-open bins
349
    - dropna: bool, exclude NaN values
350
    
351
    Returns:
352
    Series
353
    """
354
```
355

356
### Numeric Conversion
357

358
Functions for converting data to numeric types.
359

360
```python { .api }
361
def to_numeric(arg, errors='raise', downcast=None):
362
    """
363
    Convert argument to a numeric type.
364
    
365
    Parameters:
366
    - arg: scalar, list, tuple, 1-d array, or Series
367
    - errors: str, error handling ('raise', 'coerce', 'ignore')
368
    - downcast: str, downcast resulting data ('integer', 'signed', 'unsigned', 'float')
369
    
370
    Returns:
371
    numeric, converted values
372
    """
373
```
374

375
### Groupby Statistical Operations
376

377
Statistical methods available on GroupBy objects.
378

379
```python { .api }
380
# Available on DataFrameGroupBy and SeriesGroupBy objects:
381

382
class GroupBy:
383
    """GroupBy object with statistical methods."""
384
    
385
    def mean(self, numeric_only=True, engine=None, engine_kwargs=None):
386
        """Compute mean of groups."""
387
    
388
    def median(self, numeric_only=True):
389
        """Compute median of groups."""
390
    
391
    def sum(self, numeric_only=True, min_count=0, engine=None, engine_kwargs=None):
392
        """Compute sum of groups."""
393
    
394
    def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
395
        """Compute min of groups."""
396
    
397
    def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
398
        """Compute max of groups."""
399
    
400
    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
401
        """Compute standard deviation of groups."""
402
    
403
    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
404
        """Compute variance of groups."""
405
    
406
    def count(self):
407
        """Compute count of group."""
408
    
409
    def size(self):
410
        """Compute group sizes."""
411
    
412
    def nunique(self, dropna=True):
413
        """Count number of unique values in each group."""
414
    
415
    def quantile(self, q=0.5, interpolation='linear', numeric_only=True):
416
        """Return values at given quantile for each group."""
417
    
418
    def describe(self, percentiles=None, include=None, exclude=None):
419
        """Generate descriptive statistics for each group."""
420
    
421
    def sem(self, ddof=1, numeric_only=True):
422
        """Compute standard error of the mean for each group."""
423
    
424
    def rank(self, method='average', ascending=True, na_option='keep', pct=False, axis=0):
425
        """Provide the rank of values within each group."""
426
    
427
    def cumcount(self, ascending=True):
428
        """Number each item in each group from 0 to the length of that group - 1."""
429
    
430
    def cumsum(self, axis=0, **kwargs):
431
        """Cumulative sum for each group."""
432
    
433
    def cumprod(self, axis=0, **kwargs):
434
        """Cumulative product for each group."""
435
    
436
    def cummax(self, axis=0, numeric_only=False, **kwargs):
437
        """Cumulative max for each group."""
438
    
439
    def cummin(self, axis=0, numeric_only=False, **kwargs):
440
        """Cumulative min for each group."""
441
    
442
    def skew(self, axis=0, skipna=True, numeric_only=True, **kwargs):
443
        """Return unbiased skew within groups."""
444
    
445
    def kurt(self, axis=0, skipna=True, numeric_only=True, **kwargs):
446
        """Return unbiased kurtosis within groups."""
447
    
448
    def mad(self, **kwargs):
449
        """Return mean absolute deviation within groups."""
450
    
451
    def prod(self, numeric_only=True, min_count=0):
452
        """Compute product of group values."""
453
    
454
    def ohlc(self):
455
        """Compute open, high, low and close values of a group."""
456
    
457
    def first(self, numeric_only=False, min_count=-1):
458
        """Return first value within each group."""
459
    
460
    def last(self, numeric_only=False, min_count=-1):
461
        """Return last value within each group."""
462
    
463
    def nth(self, n, dropna=None):
464
        """Take nth value, or subset if n is a list."""
465
    
466
    def idxmax(self, axis=0, skipna=True):
467
        """Return index of maximum value within each group."""
468
    
469
    def idxmin(self, axis=0, skipna=True):
470
        """Return index of minimum value within each group."""
471
```
472

473
### Advanced Statistical Functions
474

475
More specialized statistical operations and utilities.
476

477
```python { .api }
478
# These functions work with DataFrame/Series or can be called independently:
479

480
def pct_change(periods=1, fill_method='pad', limit=None, freq=None):
481
    """
482
    Percentage change between current and prior element.
483
    
484
    Parameters:
485
    - periods: int, periods to shift for forming percent change
486
    - fill_method: str, how to handle NaNs before computing percent changes
487
    - limit: int, number of consecutive NaNs to fill before stopping
488
    - freq: DateOffset, Timedelta or str, increment to use for time rule
489
    
490
    Returns:
491
    Series/DataFrame, percentage changes
492
    """
493

494
def diff(periods=1, axis=0):
495
    """
496
    First discrete difference of element.
497
    
498
    Parameters:
499
    - periods: int, periods to shift for calculating difference
500
    - axis: int, axis to shift along
501
    
502
    Returns:
503
    Series/DataFrame, differences
504
    """
505

506
def shift(periods=1, freq=None, axis=0, fill_value=None):
507
    """
508
    Shift index by desired number of periods.
509
    
510
    Parameters:
511
    - periods: int, number of periods to shift
512
    - freq: DateOffset, Timedelta, or str, offset to use from time series API
513
    - axis: int, axis to shift
514
    - fill_value: object, scalar value to use for missing values
515
    
516
    Returns:
517
    Series/DataFrame, shifted data
518
    """
519

520
def expanding(min_periods=1, center=None, axis=0, method='single'):
521
    """
522
    Provide expanding window calculations.
523
    
524
    Parameters:
525
    - min_periods: int, minimum number of observations in window
526
    - center: bool, whether result should be centered
527
    - axis: int, axis along which to slide window
528
    - method: str, execution method ('single' thread or 'table')
529
    
530
    Returns:
531
    Expanding object
532
    """
533

534
def rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None, method='single'):
535
    """
536
    Provide rolling window calculations.
537
    
538
    Parameters:
539
    - window: int, size of moving window
540
    - min_periods: int, minimum number of observations in window  
541
    - center: bool, whether result should be centered
542
    - win_type: str, window type
543
    - on: str, datetime-like column for DatetimeIndex
544
    - axis: int, axis along which to slide window
545
    - closed: str, make interval closed on 'right', 'left', 'both' or 'neither'
546
    - method: str, execution method ('single' or 'table')
547
    
548
    Returns:
549
    Rolling object
550
    """
551

552
def ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=0, times=None, method='single'):
553
    """
554
    Provide exponentially weighted (EW) calculations.
555
    
556
    Parameters:
557
    - com: float, center of mass
558
    - span: float, span
559
    - halflife: float, decay in terms of half-life
560
    - alpha: float, smoothing factor
561
    - min_periods: int, minimum number of observations
562
    - adjust: bool, divide by decaying adjustment factor
563
    - ignore_na: bool, ignore missing values
564
    - axis: int, axis along which to calculate
565
    - times: array-like, times corresponding to observations
566
    - method: str, execution method ('single' or 'table')
567
    
568
    Returns:
569
    ExponentialMovingWindow object
570
    """
571
```
572

573
## Types
574

575
```python { .api }
576
# Statistical method options
577
StatMethod = Literal['average', 'min', 'max', 'first', 'dense']
578
CorrelationMethod = Literal['pearson', 'kendall', 'spearman']
579
InterpolationMethod = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']
580
QuantileInterpolation = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']
581

582
# Ranking options
583
RankMethod = Literal['average', 'min', 'max', 'first', 'dense']
584
RankNaOption = Literal['keep', 'top', 'bottom']
585

586
# Numeric conversion options
587
NumericErrors = Literal['raise', 'coerce', 'ignore']
588
DowncastOptions = Literal['integer', 'signed', 'unsigned', 'float']
589

590
# Binning options
591
BinningDuplicates = Literal['raise', 'drop']
592
IntervalInclusive = Literal['both', 'neither', 'left', 'right']
593

594
# Window calculation options
595
WindowMethod = Literal['single', 'table']
596
WindowType = Literal[
597
    'boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen',
598
    'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser',
599
    'gaussian', 'general_gaussian', 'slepian', 'exponential'
600
]
601

602
# Percentile inclusion options
603
PercentileInclusive = Literal['both', 'neither', 'left', 'right']
604

605
# Axis specification
606
AxisOption = Union[int, str, None]
607
```

Version

Tile

Files

statistics-math.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics-math.mddocs/