tessl/pypi-cupy-cuda110

NumPy & SciPy for GPU - CUDA 11.0 compatible package providing GPU-accelerated computing with Python through a NumPy/SciPy-compatible array library

—

Pending

Overview

Eval results

Files

Statistics

Name: tessl/pypi-cupy-cuda110
Author: tessl

Statistical functions and reduction operations for data analysis and aggregation. Provides comprehensive functionality for descriptive statistics, data summarization, and numerical analysis on GPU arrays.

Capabilities

Reduction Operations

Basic aggregation functions that reduce arrays along specified axes.

def sum(a, axis=None, dtype=None, out=None, keepdims=False):
    """
    Sum of array elements over given axis.
    
    Parameters:
    - a: array_like, input array
    - axis: int/tuple, axis along which sum is performed
    - dtype: data type of output
    - out: ndarray, optional output array
    - keepdims: bool, keep dimensions of input
    
    Returns:
    cupy.ndarray: Sum of array elements
    """

def prod(a, axis=None, dtype=None, out=None, keepdims=False):
    """Return product of array elements over given axis."""

def cumsum(a, axis=None, dtype=None, out=None):
    """Return cumulative sum of elements along given axis."""

def cumprod(a, axis=None, dtype=None, out=None):
    """Return cumulative product of elements along given axis."""

def diff(a, n=1, axis=-1, prepend=None, append=None):
    """Calculate n-th discrete difference along given axis."""

def ediff1d(ary, to_end=None, to_begin=None):
    """Differences between consecutive elements of array."""

def gradient(f, *varargs, axis=None, edge_order=1):
    """Return gradient of N-dimensional array."""

def trapz(y, x=None, dx=1.0, axis=-1):
    """Integrate using composite trapezoidal rule."""

Order Statistics

Functions for computing order-based statistics and extrema.

def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
    """
    Return maximum of array or maximum along axis.
    
    Parameters:
    - a: array_like, input array
    - axis: int/tuple, axis along which maximum is computed
    - out: ndarray, optional output array
    - keepdims: bool, keep dimensions of input
    - initial: scalar, minimum value of output
    - where: array_like, elements to include in maximum
    
    Returns:
    cupy.ndarray: Maximum values
    """

def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
    """Return minimum of array or minimum along axis."""

def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
    """Return maximum along axis, ignoring NaNs."""

def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
    """Return minimum along axis, ignoring NaNs."""

def ptp(a, axis=None, out=None, keepdims=False):
    """Range of values (maximum - minimum) along axis."""

def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
    """
    Compute qth percentile along specified axis.
    
    Parameters:
    - a: array_like, input array
    - q: float/array_like, percentile(s) to compute (0-100)
    - axis: int/tuple, axis along which percentiles are computed
    - interpolation: str, interpolation method
    
    Returns:
    cupy.ndarray: Percentile values
    """

def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
    """Compute qth quantile along specified axis."""

Central Tendency

Functions for measuring central tendency and spread of data.

def mean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
    """
    Compute arithmetic mean along specified axis.
    
    Parameters:
    - a: array_like, input array
    - axis: int/tuple, axis along which mean is computed
    - dtype: data type for computation
    - out: ndarray, optional output array
    - keepdims: bool, keep dimensions of input
    - where: array_like, elements to include in mean
    
    Returns:
    cupy.ndarray: Arithmetic mean
    """

def average(a, axis=None, weights=None, returned=False):
    """Compute weighted average along specified axis."""

def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
    """Compute median along specified axis."""

def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
    """Compute arithmetic mean along axis, ignoring NaNs."""

def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
    """Compute median along axis, ignoring NaNs."""

Variability

Functions for measuring spread and variability of data distributions.

def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
    """
    Compute variance along specified axis.
    
    Parameters:
    - a: array_like, input array
    - axis: int/tuple, axis along which variance is computed
    - dtype: data type for computation
    - out: ndarray, optional output array
    - ddof: int, delta degrees of freedom
    - keepdims: bool, keep dimensions of input
    - where: array_like, elements to include
    
    Returns:
    cupy.ndarray: Variance values
    """

def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
    """Compute standard deviation along specified axis."""

def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
    """Compute variance along axis, ignoring NaNs."""

def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
    """Compute standard deviation along axis, ignoring NaNs."""

Correlation Analysis

Functions for computing correlations and covariances between variables.

def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None, dtype=None):
    """
    Return Pearson product-moment correlation coefficients.
    
    Parameters:
    - x: array_like, input array
    - y: array_like, optional additional input
    - rowvar: bool, whether rows represent variables
    - ddof: int, delta degrees of freedom
    - dtype: data type for computation
    
    Returns:
    cupy.ndarray: Correlation coefficient matrix
    """

def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, dtype=None):
    """Estimate covariance matrix."""

def correlate(a, v, mode='valid'):
    """Cross-correlation of two 1-dimensional sequences."""

Histogram Functions

Functions for binning data and creating histograms.

def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
    """
    Compute histogram of dataset.
    
    Parameters:
    - a: array_like, input data
    - bins: int/sequence, bin specification
    - range: tuple, range for bins
    - weights: array_like, weights for each value
    - density: bool, normalize to form probability density
    
    Returns:
    hist, bin_edges: ndarrays, histogram values and bin edges
    """

def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
    """Compute 2D histogram of two datasets."""

def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
    """Compute multidimensional histogram of dataset."""

def bincount(x, weights=None, minlength=0):
    """Count number of occurrences of each value in array."""

def digitize(x, bins, right=False):
    """Return indices of bins to which each value belongs."""

Counting Operations

Functions for counting elements that meet specific criteria.

def count_nonzero(a, axis=None, keepdims=False):
    """
    Count number of nonzero elements along axis.
    
    Parameters:
    - a: array_like, input array
    - axis: int/tuple, axis along which to count
    - keepdims: bool, keep dimensions of input
    
    Returns:
    cupy.ndarray: Number of nonzero elements
    """

Usage Examples

Basic Statistics

import cupy as cp

# Create sample data
data = cp.random.normal(0, 1, (1000, 100))

# Central tendency
mean_val = cp.mean(data)
median_val = cp.median(data)
mean_per_col = cp.mean(data, axis=0)

# Variability  
std_val = cp.std(data)
var_val = cp.var(data)
std_per_row = cp.std(data, axis=1)

# Order statistics
min_val = cp.amin(data)
max_val = cp.amax(data)
percentiles = cp.percentile(data, [25, 50, 75])

Advanced Statistical Analysis

# Correlation analysis
x = cp.random.normal(0, 1, 1000)
y = 2 * x + cp.random.normal(0, 0.5, 1000)  # Correlated data

correlation_matrix = cp.corrcoef(x, y)
covariance_matrix = cp.cov(x, y)

# Multi-dimensional correlation
multi_data = cp.random.multivariate_normal([0, 0, 0], 
                                          [[1, 0.5, 0.3],
                                           [0.5, 1, 0.7], 
                                           [0.3, 0.7, 1]], 
                                          size=10000)
multi_corr = cp.corrcoef(multi_data.T)

Histogram and Distribution Analysis

# Create histogram
data = cp.random.gamma(2, 2, 10000)
hist, bin_edges = cp.histogram(data, bins=50, density=True)

# 2D histogram for bivariate analysis
x = cp.random.normal(0, 1, 5000)
y = cp.random.normal(0, 1, 5000)
hist_2d, xedges, yedges = cp.histogram2d(x, y, bins=30)

# Multi-dimensional histogram
sample = cp.random.random((1000, 3))
hist_nd, edges = cp.histogramdd(sample, bins=10)

Reduction Operations

# Various reduction operations
matrix = cp.random.random((100, 50))

# Sums and products
total_sum = cp.sum(matrix)
row_sums = cp.sum(matrix, axis=1)
col_sums = cp.sum(matrix, axis=0)

total_prod = cp.prod(matrix)
cumulative_sum = cp.cumsum(matrix, axis=0)

# Differences and gradients
time_series = cp.sin(cp.linspace(0, 4*cp.pi, 1000))
differences = cp.diff(time_series)
gradient_vals = cp.gradient(time_series)

Handling Missing Data

# Data with NaN values
data_with_nan = cp.random.random((100, 100))
data_with_nan[cp.random.random((100, 100)) < 0.1] = cp.nan

# NaN-aware statistics
nan_mean = cp.nanmean(data_with_nan)
nan_std = cp.nanstd(data_with_nan)
nan_max = cp.nanmax(data_with_nan, axis=0)
nan_min = cp.nanmin(data_with_nan, axis=1)

# Count non-NaN elements
valid_count = cp.count_nonzero(~cp.isnan(data_with_nan), axis=0)

Weighted Statistics

# Weighted average
values = cp.array([1, 2, 3, 4, 5])
weights = cp.array([0.1, 0.2, 0.4, 0.2, 0.1])
weighted_avg = cp.average(values, weights=weights)

# Weighted histogram
data = cp.random.exponential(2, 1000)
weights = cp.random.random(1000)
weighted_hist, bins = cp.histogram(data, bins=30, weights=weights, density=True)

Statistical Tests and Analysis

# Percentile-based analysis
data = cp.random.lognormal(0, 1, 10000)

# Quartiles
q1, q2, q3 = cp.percentile(data, [25, 50, 75])  
iqr = q3 - q1  # Interquartile range

# Outlier detection using IQR
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
outliers = data[(data < lower_bound) | (data > upper_bound)]

# Data summary statistics
summary = {
    'count': len(data),
    'mean': cp.mean(data),
    'std': cp.std(data),
    'min': cp.min(data),
    'q1': q1,
    'median': q2,
    'q3': q3,
    'max': cp.max(data),
    'outliers': len(outliers)
}

Install with Tessl CLI