CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pandas

Powerful data structures for data analysis, time series, and statistics

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

statistics-math.mddocs/

Statistical and Mathematical Operations

Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations.

Core Imports

import pandas as pd
from pandas import cut, qcut, factorize, value_counts

Capabilities

Descriptive Statistics

Core statistical functions available on DataFrame and Series objects.

# These are methods available on DataFrame and Series:

# Central tendency
def mean(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the mean of the values over the requested axis."""

def median(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the median of the values over the requested axis."""

def mode(axis=0, numeric_only=False, dropna=True):
    """Return the mode(s) of each element along the selected axis."""

# Measures of spread
def std(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return sample standard deviation over requested axis."""

def var(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return unbiased variance over requested axis."""

def sem(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return unbiased standard error of the mean over requested axis."""

def mad(axis=None, skipna=True, level=None):
    """Return the mean absolute deviation of the values over the requested axis."""

# Distribution shape
def skew(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased skew over requested axis."""

def kurt(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased kurtosis over requested axis."""

def kurtosis(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased kurtosis over requested axis (alias for kurt)."""

# Extremes
def min(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the minimum of the values over the requested axis."""

def max(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the maximum of the values over the requested axis."""

def idxmin(axis=0, skipna=True):
    """Return index of first occurrence of minimum over requested axis."""

def idxmax(axis=0, skipna=True):
    """Return index of first occurrence of maximum over requested axis."""

# Aggregation
def sum(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the sum of the values over the requested axis."""

def prod(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the product of the values over the requested axis."""

def product(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the product of the values over the requested axis (alias for prod)."""

def count(axis=0, level=None, numeric_only=False):
    """Count non-NA cells for each column or row."""

def nunique(axis=0, dropna=True):
    """Count number of distinct elements in specified axis."""

# Quantiles and percentiles
def quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear', method='single'):
    """Return values at the given quantile over requested axis."""

def describe(percentiles=None, include=None, exclude=None):
    """Generate descriptive statistics."""

# Cumulative operations
def cumsum(axis=None, skipna=True):
    """Return cumulative sum over a DataFrame or Series axis."""

def cumprod(axis=None, skipna=True):
    """Return cumulative product over a DataFrame or Series axis."""

def cummax(axis=None, skipna=True):
    """Return cumulative maximum over a DataFrame or Series axis."""

def cummin(axis=None, skipna=True):
    """Return cumulative minimum over a DataFrame or Series axis."""

Correlation and Covariance

Functions to compute relationships between variables.

# These are methods available on DataFrame and Series:

def corr(method='pearson', min_periods=1, numeric_only=True):
    """
    Compute pairwise correlation of columns.
    
    Parameters:
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
    - min_periods: int, minimum number of observations for valid result
    - numeric_only: bool, include only numeric columns
    
    Returns:
    DataFrame, correlation matrix
    """

def cov(min_periods=None, ddof=1, numeric_only=True):
    """
    Compute pairwise covariance of columns.
    
    Parameters:
    - min_periods: int, minimum number of observations for valid result
    - ddof: int, delta degrees of freedom
    - numeric_only: bool, include only numeric columns
    
    Returns:
    DataFrame, covariance matrix
    """

def corrwith(other, axis=0, drop=False, method='pearson', numeric_only=True):
    """
    Compute pairwise correlation.
    
    Parameters:
    - other: DataFrame, Series, or array-like
    - axis: int, axis to use (0 or 1)
    - drop: bool, drop missing indices from result
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
    - numeric_only: bool, include only numeric columns
    
    Returns:
    Series, correlations
    """

Mathematical Operations

Element-wise mathematical functions and operations.

# These are methods available on DataFrame and Series:

def abs():
    """Return a Series/DataFrame with absolute numeric value of each element."""

def round(decimals=0):
    """Round each value to the given number of decimals."""

def clip(lower=None, upper=None, axis=None, inplace=False):
    """Trim values at input threshold(s)."""

def rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False):
    """
    Compute numerical data ranks along axis.
    
    Parameters:
    - axis: int, axis to rank along
    - method: str, how to rank ('average', 'min', 'max', 'first', 'dense')
    - numeric_only: bool, include only numeric columns
    - na_option: str, how to rank NaN values ('keep', 'top', 'bottom')
    - ascending: bool, rank in ascending order
    - pct: bool, return percentile rank
    
    Returns:
    same type as caller, data ranks
    """

# Exponential and logarithmic functions (available via NumPy integration)
def exp():
    """Calculate exponential of elements."""

def log():
    """Calculate natural logarithm of elements."""

def log10():
    """Calculate base-10 logarithm of elements."""

def log2():
    """Calculate base-2 logarithm of elements."""

def sqrt():
    """Calculate square root of elements."""

def pow(other):
    """Calculate exponential power of elements."""

# Trigonometric functions (available via NumPy integration)
def sin():
    """Calculate sine of elements."""

def cos():
    """Calculate cosine of elements."""

def tan():
    """Calculate tangent of elements."""

def arcsin():
    """Calculate inverse sine of elements."""

def arccos():
    """Calculate inverse cosine of elements."""

def arctan():
    """Calculate inverse tangent of elements."""

Comparison Operations

Functions for comparing and ranking data.

# These are methods available on DataFrame and Series:

def eq(other, axis='columns', level=None):
    """Get equal to of dataframe and other, element-wise (binary operator ==)."""

def ne(other, axis='columns', level=None):
    """Get not equal to of dataframe and other, element-wise (binary operator !=)."""

def lt(other, axis='columns', level=None):
    """Get less than of dataframe and other, element-wise (binary operator <)."""

def le(other, axis='columns', level=None):
    """Get less than or equal to of dataframe and other, element-wise (binary operator <=)."""

def gt(other, axis='columns', level=None):
    """Get greater than of dataframe and other, element-wise (binary operator >)."""

def ge(other, axis='columns', level=None):
    """Get greater than or equal to of dataframe and other, element-wise (binary operator >=)."""

def between(left, right, inclusive='both'):
    """
    Return boolean Series equivalent to left <= series <= right.
    
    Parameters:
    - left: scalar or list-like, left boundary
    - right: scalar or list-like, right boundary
    - inclusive: str, include boundaries ('both', 'neither', 'left', 'right')
    
    Returns:
    Series, boolean values
    """

def isin(values):
    """
    Whether each element in the Series/DataFrame is contained in values.
    
    Parameters:
    - values: set or list-like, sequence of values to test
    
    Returns:
    Series/DataFrame of bools, boolean values
    """

Top-Level Statistical Functions

Standalone statistical functions that operate on array-like data.

def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise', ordered=True):
    """
    Bin values into discrete intervals.
    
    Parameters:
    - x: array-like, input array to be binned
    - bins: int, sequence of scalars, or IntervalIndex
    - right: bool, whether bins include rightmost edge
    - labels: array or bool, labels for returned bins
    - retbins: bool, return bins
    - precision: int, precision for bin labels
    - include_lowest: bool, whether first interval is left-inclusive
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
    - ordered: bool, whether returned Categorical is ordered
    
    Returns:
    Categorical, Series, or array
    """

def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
    """
    Quantile-based discretization function.
    
    Parameters:
    - x: array-like, input array to be binned
    - q: int or list-like of float, quantiles to compute
    - labels: array or bool, labels for returned bins
    - retbins: bool, return (bins, labels)
    - precision: int, precision for bin labels
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
    
    Returns:
    Categorical, Series, or array
    """

def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None):
    """
    Encode the object as an enumerated type or categorical variable.
    
    Parameters:
    - values: sequence, 1-d array-like
    - sort: bool, sort uniques
    - na_sentinel: int, value for missing values
    - use_na_sentinel: bool, use na_sentinel for missing values
    - size_hint: int, hint for hashtable size
    
    Returns:
    tuple of (codes, uniques)
    """

def unique(values):
    """
    Return unique values based on a hash table.
    
    Parameters:
    - values: 1d array-like
    
    Returns:
    ndarray or ExtensionArray
    """

def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True):
    """
    Compute a histogram of the 1D array values.
    
    Parameters:
    - values: 1d array-like
    - sort: bool, sort by values
    - ascending: bool, sort in ascending order
    - normalize: bool, return relative frequencies
    - bins: int, group into half-open bins
    - dropna: bool, exclude NaN values
    
    Returns:
    Series
    """

Numeric Conversion

Functions for converting data to numeric types.

def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.
    
    Parameters:
    - arg: scalar, list, tuple, 1-d array, or Series
    - errors: str, error handling ('raise', 'coerce', 'ignore')
    - downcast: str, downcast resulting data ('integer', 'signed', 'unsigned', 'float')
    
    Returns:
    numeric, converted values
    """

Groupby Statistical Operations

Statistical methods available on GroupBy objects.

# Available on DataFrameGroupBy and SeriesGroupBy objects:

class GroupBy:
    """GroupBy object with statistical methods."""
    
    def mean(self, numeric_only=True, engine=None, engine_kwargs=None):
        """Compute mean of groups."""
    
    def median(self, numeric_only=True):
        """Compute median of groups."""
    
    def sum(self, numeric_only=True, min_count=0, engine=None, engine_kwargs=None):
        """Compute sum of groups."""
    
    def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        """Compute min of groups."""
    
    def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        """Compute max of groups."""
    
    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
        """Compute standard deviation of groups."""
    
    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
        """Compute variance of groups."""
    
    def count(self):
        """Compute count of group."""
    
    def size(self):
        """Compute group sizes."""
    
    def nunique(self, dropna=True):
        """Count number of unique values in each group."""
    
    def quantile(self, q=0.5, interpolation='linear', numeric_only=True):
        """Return values at given quantile for each group."""
    
    def describe(self, percentiles=None, include=None, exclude=None):
        """Generate descriptive statistics for each group."""
    
    def sem(self, ddof=1, numeric_only=True):
        """Compute standard error of the mean for each group."""
    
    def rank(self, method='average', ascending=True, na_option='keep', pct=False, axis=0):
        """Provide the rank of values within each group."""
    
    def cumcount(self, ascending=True):
        """Number each item in each group from 0 to the length of that group - 1."""
    
    def cumsum(self, axis=0, **kwargs):
        """Cumulative sum for each group."""
    
    def cumprod(self, axis=0, **kwargs):
        """Cumulative product for each group."""
    
    def cummax(self, axis=0, numeric_only=False, **kwargs):
        """Cumulative max for each group."""
    
    def cummin(self, axis=0, numeric_only=False, **kwargs):
        """Cumulative min for each group."""
    
    def skew(self, axis=0, skipna=True, numeric_only=True, **kwargs):
        """Return unbiased skew within groups."""
    
    def kurt(self, axis=0, skipna=True, numeric_only=True, **kwargs):
        """Return unbiased kurtosis within groups."""
    
    def mad(self, **kwargs):
        """Return mean absolute deviation within groups."""
    
    def prod(self, numeric_only=True, min_count=0):
        """Compute product of group values."""
    
    def ohlc(self):
        """Compute open, high, low and close values of a group."""
    
    def first(self, numeric_only=False, min_count=-1):
        """Return first value within each group."""
    
    def last(self, numeric_only=False, min_count=-1):
        """Return last value within each group."""
    
    def nth(self, n, dropna=None):
        """Take nth value, or subset if n is a list."""
    
    def idxmax(self, axis=0, skipna=True):
        """Return index of maximum value within each group."""
    
    def idxmin(self, axis=0, skipna=True):
        """Return index of minimum value within each group."""

Advanced Statistical Functions

More specialized statistical operations and utilities.

# These functions work with DataFrame/Series or can be called independently:

def pct_change(periods=1, fill_method='pad', limit=None, freq=None):
    """
    Percentage change between current and prior element.
    
    Parameters:
    - periods: int, periods to shift for forming percent change
    - fill_method: str, how to handle NaNs before computing percent changes
    - limit: int, number of consecutive NaNs to fill before stopping
    - freq: DateOffset, Timedelta or str, increment to use for time rule
    
    Returns:
    Series/DataFrame, percentage changes
    """

def diff(periods=1, axis=0):
    """
    First discrete difference of element.
    
    Parameters:
    - periods: int, periods to shift for calculating difference
    - axis: int, axis to shift along
    
    Returns:
    Series/DataFrame, differences
    """

def shift(periods=1, freq=None, axis=0, fill_value=None):
    """
    Shift index by desired number of periods.
    
    Parameters:
    - periods: int, number of periods to shift
    - freq: DateOffset, Timedelta, or str, offset to use from time series API
    - axis: int, axis to shift
    - fill_value: object, scalar value to use for missing values
    
    Returns:
    Series/DataFrame, shifted data
    """

def expanding(min_periods=1, center=None, axis=0, method='single'):
    """
    Provide expanding window calculations.
    
    Parameters:
    - min_periods: int, minimum number of observations in window
    - center: bool, whether result should be centered
    - axis: int, axis along which to slide window
    - method: str, execution method ('single' thread or 'table')
    
    Returns:
    Expanding object
    """

def rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None, method='single'):
    """
    Provide rolling window calculations.
    
    Parameters:
    - window: int, size of moving window
    - min_periods: int, minimum number of observations in window  
    - center: bool, whether result should be centered
    - win_type: str, window type
    - on: str, datetime-like column for DatetimeIndex
    - axis: int, axis along which to slide window
    - closed: str, make interval closed on 'right', 'left', 'both' or 'neither'
    - method: str, execution method ('single' or 'table')
    
    Returns:
    Rolling object
    """

def ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=0, times=None, method='single'):
    """
    Provide exponentially weighted (EW) calculations.
    
    Parameters:
    - com: float, center of mass
    - span: float, span
    - halflife: float, decay in terms of half-life
    - alpha: float, smoothing factor
    - min_periods: int, minimum number of observations
    - adjust: bool, divide by decaying adjustment factor
    - ignore_na: bool, ignore missing values
    - axis: int, axis along which to calculate
    - times: array-like, times corresponding to observations
    - method: str, execution method ('single' or 'table')
    
    Returns:
    ExponentialMovingWindow object
    """

Types

# Statistical method options
StatMethod = Literal['average', 'min', 'max', 'first', 'dense']
CorrelationMethod = Literal['pearson', 'kendall', 'spearman']
InterpolationMethod = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']
QuantileInterpolation = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']

# Ranking options
RankMethod = Literal['average', 'min', 'max', 'first', 'dense']
RankNaOption = Literal['keep', 'top', 'bottom']

# Numeric conversion options
NumericErrors = Literal['raise', 'coerce', 'ignore']
DowncastOptions = Literal['integer', 'signed', 'unsigned', 'float']

# Binning options
BinningDuplicates = Literal['raise', 'drop']
IntervalInclusive = Literal['both', 'neither', 'left', 'right']

# Window calculation options
WindowMethod = Literal['single', 'table']
WindowType = Literal[
    'boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen',
    'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser',
    'gaussian', 'general_gaussian', 'slepian', 'exponential'
]

# Percentile inclusion options
PercentileInclusive = Literal['both', 'neither', 'left', 'right']

# Axis specification
AxisOption = Union[int, str, None]

Install with Tessl CLI

npx tessl i tessl/pypi-pandas

docs

api-types.md

configuration.md

core-data-structures.md

data-io.md

data-manipulation.md

data-types.md

errors.md

index.md

plotting.md

statistics-math.md

time-series.md

tile.json