Tessl Tile for pypi/cupy@13.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-creation.md cuda-management.md fft.md index.md kernels.md linear-algebra.md math-functions.md random.md scipy-extensions.md sparse.md statistics.md

statistics.mddocs/

0
# Statistics and Data Analysis
1

2
Statistical functions for data analysis including descriptive statistics, correlations, and histograms. All functions operate on GPU arrays and support axis-wise operations with the same interface as NumPy.
3

4
## Capabilities
5

6
### Descriptive Statistics
7

8
Basic statistical measures for data analysis.
9

10
```python { .api }
11
def mean(a, axis=None, dtype=None, out=None, keepdims=False):
12
    """
13
    Compute arithmetic mean along specified axes.
14
    
15
    Parameters:
16
    - a: array-like, input data
17
    - axis: None or int or tuple of ints, axes to compute mean over
18
    - dtype: data type, type of output
19
    - out: cupy.ndarray, output array
20
    - keepdims: bool, keep reduced dimensions as size 1
21
    
22
    Returns:
23
    cupy.ndarray: Mean values
24
    """
25

26
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
27
    """Compute standard deviation along specified axes."""
28

29
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
30
    """Compute variance along specified axes."""
31

32
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
33
    """Compute median along specified axes."""
34

35
def average(a, axis=None, weights=None, returned=False):
36
    """
37
    Compute weighted average along specified axis.
38
    
39
    Parameters:
40
    - a: array-like, input data
41
    - axis: None or int, axis to average over
42
    - weights: array-like, weights for averaging
43
    - returned: bool, return weights sum if True
44
    
45
    Returns:
46
    cupy.ndarray: Weighted average
47
    tuple: (average, sum_of_weights) if returned=True
48
    """
49
```
50

51
### Order Statistics
52

53
Statistical measures based on data ordering.
54

55
```python { .api }
56
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=None):
57
    """Return maximum along axes."""
58

59
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=None):
60
    """Return minimum along axes."""
61

62
def max(a, axis=None, out=None, keepdims=False, initial=None, where=None):
63
    """Return maximum along axes (alias for amax)."""
64

65
def min(a, axis=None, out=None, keepdims=False, initial=None, where=None):
66
    """Return minimum along axes (alias for amin)."""
67

68
def percentile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
69
    """
70
    Compute percentiles along specified axes.
71
    
72
    Parameters:
73
    - a: array-like, input data
74
    - q: float or array-like, percentile(s) to compute (0-100)
75
    - axis: None or int or tuple of ints, axes to compute over
76
    - out: cupy.ndarray, output array
77
    - overwrite_input: bool, allow input modification
78
    - method: str, interpolation method
79
    - keepdims: bool, keep reduced dimensions
80
    
81
    Returns:
82
    cupy.ndarray: Percentile values
83
    """
84

85
def quantile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
86
    """Compute quantiles along specified axes (0-1 scale)."""
87

88
def ptp(a, axis=None, out=None, keepdims=False):
89
    """Return range (peak-to-peak) along axes."""
90
```
91

92
### Correlation and Covariance
93

94
Statistical relationships between variables.
95

96
```python { .api }
97
def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):
98
    """
99
    Return Pearson correlation coefficients.
100
    
101
    Parameters:
102
    - x: array-like, input data
103
    - y: array-like, additional data
104
    - rowvar: bool, rows represent variables if True
105
    - bias: deprecated parameter
106
    - ddof: deprecated parameter
107
    
108
    Returns:
109
    cupy.ndarray: Correlation coefficient matrix
110
    """
111

112
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
113
    """
114
    Estimate covariance matrix.
115
    
116
    Parameters:
117
    - m: array-like, input data
118
    - y: array-like, additional data
119
    - rowvar: bool, rows represent variables if True
120
    - bias: bool, normalization by N if True, N-1 if False
121
    - ddof: int, delta degrees of freedom
122
    - fweights: array-like, frequency weights
123
    - aweights: array-like, observation weights
124
    
125
    Returns:
126
    cupy.ndarray: Covariance matrix
127
    """
128

129
def correlate(a, v, mode='valid'):
130
    """
131
    Cross-correlation of two 1-D sequences.
132
    
133
    Parameters:
134
    - a, v: array-like, input sequences
135
    - mode: {'valid', 'same', 'full'}, output size
136
    
137
    Returns:
138
    cupy.ndarray: Cross-correlation
139
    """
140
```
141

142
### Histograms
143

144
Data distribution analysis and binning.
145

146
```python { .api }
147
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
148
    """
149
    Compute histogram of dataset.
150
    
151
    Parameters:
152
    - a: array-like, input data
153
    - bins: int or array-like, number of bins or bin edges
154
    - range: tuple, lower and upper range of bins
155
    - normed: deprecated, use density instead
156
    - weights: array-like, weights for each value
157
    - density: bool, normalize to probability density
158
    
159
    Returns:
160
    tuple: (hist, bin_edges)
161
    """
162

163
def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
164
    """
165
    Compute 2D histogram.
166
    
167
    Parameters:
168
    - x, y: array-like, input data
169
    - bins: int or [int, int] or array-like, bin specification
170
    - range: array-like, bin ranges [[xmin, xmax], [ymin, ymax]]
171
    - normed: deprecated, use density instead
172
    - weights: array-like, weights for each sample
173
    - density: bool, normalize to probability density
174
    
175
    Returns:
176
    tuple: (H, xedges, yedges)
177
    """
178

179
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
180
    """Compute multidimensional histogram."""
181

182
def bincount(x, weights=None, minlength=0):
183
    """
184
    Count occurrences of each value in array.
185
    
186
    Parameters:
187
    - x: array-like, non-negative integer array
188
    - weights: array-like, weights for each value
189
    - minlength: int, minimum number of bins
190
    
191
    Returns:
192
    cupy.ndarray: Counts for each value
193
    """
194

195
def digitize(x, bins, right=False):
196
    """
197
    Return indices of bins to which each value belongs.
198
    
199
    Parameters:
200
    - x: array-like, input array
201
    - bins: array-like, bin edges
202
    - right: bool, left or right interval boundaries
203
    
204
    Returns:
205
    cupy.ndarray: Bin indices
206
    """
207
```
208

209
### NaN-aware Statistics
210

211
Statistical functions that handle NaN values appropriately.
212

213
```python { .api }
214
def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
215
    """Compute mean ignoring NaNs."""
216

217
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
218
    """Compute standard deviation ignoring NaNs."""
219

220
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
221
    """Compute variance ignoring NaNs."""
222

223
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
224
    """Compute median ignoring NaNs."""
225

226
def nanmax(a, axis=None, out=None, keepdims=False):
227
    """Return maximum ignoring NaNs."""
228

229
def nanmin(a, axis=None, out=None, keepdims=False):
230
    """Return minimum ignoring NaNs."""
231
```
232

233
## Usage Examples
234

235
### Basic Statistical Analysis
236

237
```python
238
import cupy as cp
239

240
# Sample data
241
data = cp.random.normal(100, 15, size=(10000,))
242

243
# Basic statistics
244
mean_val = cp.mean(data)
245
std_val = cp.std(data)
246
var_val = cp.var(data)
247
median_val = cp.median(data)
248

249
print(f"Mean: {mean_val:.2f}, Std: {std_val:.2f}")
250
print(f"Median: {median_val:.2f}, Range: {cp.ptp(data):.2f}")
251

252
# Percentiles
253
percentiles = cp.percentile(data, [25, 50, 75, 90, 95])
254
```
255

256
### Multi-dimensional Statistics
257

258
```python
259
# Multi-dimensional data analysis
260
matrix_data = cp.random.normal(0, 1, size=(1000, 50))
261

262
# Statistics along different axes
263
col_means = cp.mean(matrix_data, axis=0)  # Mean of each column
264
row_means = cp.mean(matrix_data, axis=1)  # Mean of each row
265
overall_mean = cp.mean(matrix_data)       # Overall mean
266

267
# Correlation analysis
268
correlation_matrix = cp.corrcoef(matrix_data.T)  # 50x50 correlation matrix
269
covariance_matrix = cp.cov(matrix_data.T)       # 50x50 covariance matrix
270
```
271

272
### Histogram Analysis
273

274
```python
275
# Distribution analysis
276
data = cp.random.exponential(2.0, size=100000)
277

278
# Basic histogram
279
counts, bin_edges = cp.histogram(data, bins=50, range=(0, 20))
280

281
# Probability density
282
density_counts, _ = cp.histogram(data, bins=50, range=(0, 20), density=True)
283

284
# 2D histogram for joint distributions
285
x = cp.random.normal(0, 1, 10000)
286
y = 2*x + cp.random.normal(0, 0.5, 10000)
287
H, xedges, yedges = cp.histogram2d(x, y, bins=50)
288
```

Version

Tile

Files

statistics.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics.mddocs/