Tessl Tile for pypi/cupy-cuda110@12.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-operations.md cuda-interface.md custom-kernels.md index.md linear-algebra.md mathematical-functions.md random-generation.md scipy-extensions.md statistics.md

statistics.mddocs/

0
# Statistics
1

2
Statistical functions and reduction operations for data analysis and aggregation. Provides comprehensive functionality for descriptive statistics, data summarization, and numerical analysis on GPU arrays.
3

4
## Capabilities
5

6
### Reduction Operations
7

8
Basic aggregation functions that reduce arrays along specified axes.
9

10
```python { .api }
11
def sum(a, axis=None, dtype=None, out=None, keepdims=False):
12
    """
13
    Sum of array elements over given axis.
14
    
15
    Parameters:
16
    - a: array_like, input array
17
    - axis: int/tuple, axis along which sum is performed
18
    - dtype: data type of output
19
    - out: ndarray, optional output array
20
    - keepdims: bool, keep dimensions of input
21
    
22
    Returns:
23
    cupy.ndarray: Sum of array elements
24
    """
25

26
def prod(a, axis=None, dtype=None, out=None, keepdims=False):
27
    """Return product of array elements over given axis."""
28

29
def cumsum(a, axis=None, dtype=None, out=None):
30
    """Return cumulative sum of elements along given axis."""
31

32
def cumprod(a, axis=None, dtype=None, out=None):
33
    """Return cumulative product of elements along given axis."""
34

35
def diff(a, n=1, axis=-1, prepend=None, append=None):
36
    """Calculate n-th discrete difference along given axis."""
37

38
def ediff1d(ary, to_end=None, to_begin=None):
39
    """Differences between consecutive elements of array."""
40

41
def gradient(f, *varargs, axis=None, edge_order=1):
42
    """Return gradient of N-dimensional array."""
43

44
def trapz(y, x=None, dx=1.0, axis=-1):
45
    """Integrate using composite trapezoidal rule."""
46
```
47

48
### Order Statistics
49

50
Functions for computing order-based statistics and extrema.
51

52
```python { .api }
53
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
54
    """
55
    Return maximum of array or maximum along axis.
56
    
57
    Parameters:
58
    - a: array_like, input array
59
    - axis: int/tuple, axis along which maximum is computed
60
    - out: ndarray, optional output array
61
    - keepdims: bool, keep dimensions of input
62
    - initial: scalar, minimum value of output
63
    - where: array_like, elements to include in maximum
64
    
65
    Returns:
66
    cupy.ndarray: Maximum values
67
    """
68

69
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
70
    """Return minimum of array or minimum along axis."""
71

72
def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
73
    """Return maximum along axis, ignoring NaNs."""
74

75
def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
76
    """Return minimum along axis, ignoring NaNs."""
77

78
def ptp(a, axis=None, out=None, keepdims=False):
79
    """Range of values (maximum - minimum) along axis."""
80

81
def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
82
    """
83
    Compute qth percentile along specified axis.
84
    
85
    Parameters:
86
    - a: array_like, input array
87
    - q: float/array_like, percentile(s) to compute (0-100)
88
    - axis: int/tuple, axis along which percentiles are computed
89
    - interpolation: str, interpolation method
90
    
91
    Returns:
92
    cupy.ndarray: Percentile values
93
    """
94

95
def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
96
    """Compute qth quantile along specified axis."""
97
```
98

99
### Central Tendency
100

101
Functions for measuring central tendency and spread of data.
102

103
```python { .api }
104
def mean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
105
    """
106
    Compute arithmetic mean along specified axis.
107
    
108
    Parameters:
109
    - a: array_like, input array
110
    - axis: int/tuple, axis along which mean is computed
111
    - dtype: data type for computation
112
    - out: ndarray, optional output array
113
    - keepdims: bool, keep dimensions of input
114
    - where: array_like, elements to include in mean
115
    
116
    Returns:
117
    cupy.ndarray: Arithmetic mean
118
    """
119

120
def average(a, axis=None, weights=None, returned=False):
121
    """Compute weighted average along specified axis."""
122

123
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
124
    """Compute median along specified axis."""
125

126
def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
127
    """Compute arithmetic mean along axis, ignoring NaNs."""
128

129
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
130
    """Compute median along axis, ignoring NaNs."""
131
```
132

133
### Variability
134

135
Functions for measuring spread and variability of data distributions.
136

137
```python { .api }
138
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
139
    """
140
    Compute variance along specified axis.
141
    
142
    Parameters:
143
    - a: array_like, input array
144
    - axis: int/tuple, axis along which variance is computed
145
    - dtype: data type for computation
146
    - out: ndarray, optional output array
147
    - ddof: int, delta degrees of freedom
148
    - keepdims: bool, keep dimensions of input
149
    - where: array_like, elements to include
150
    
151
    Returns:
152
    cupy.ndarray: Variance values
153
    """
154

155
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
156
    """Compute standard deviation along specified axis."""
157

158
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
159
    """Compute variance along axis, ignoring NaNs."""
160

161
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
162
    """Compute standard deviation along axis, ignoring NaNs."""
163
```
164

165
### Correlation Analysis
166

167
Functions for computing correlations and covariances between variables.
168

169
```python { .api }
170
def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None, dtype=None):
171
    """
172
    Return Pearson product-moment correlation coefficients.
173
    
174
    Parameters:
175
    - x: array_like, input array
176
    - y: array_like, optional additional input
177
    - rowvar: bool, whether rows represent variables
178
    - ddof: int, delta degrees of freedom
179
    - dtype: data type for computation
180
    
181
    Returns:
182
    cupy.ndarray: Correlation coefficient matrix
183
    """
184

185
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, dtype=None):
186
    """Estimate covariance matrix."""
187

188
def correlate(a, v, mode='valid'):
189
    """Cross-correlation of two 1-dimensional sequences."""
190
```
191

192
### Histogram Functions
193

194
Functions for binning data and creating histograms.
195

196
```python { .api }
197
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
198
    """
199
    Compute histogram of dataset.
200
    
201
    Parameters:
202
    - a: array_like, input data
203
    - bins: int/sequence, bin specification
204
    - range: tuple, range for bins
205
    - weights: array_like, weights for each value
206
    - density: bool, normalize to form probability density
207
    
208
    Returns:
209
    hist, bin_edges: ndarrays, histogram values and bin edges
210
    """
211

212
def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
213
    """Compute 2D histogram of two datasets."""
214

215
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
216
    """Compute multidimensional histogram of dataset."""
217

218
def bincount(x, weights=None, minlength=0):
219
    """Count number of occurrences of each value in array."""
220

221
def digitize(x, bins, right=False):
222
    """Return indices of bins to which each value belongs."""
223
```
224

225
### Counting Operations
226

227
Functions for counting elements that meet specific criteria.
228

229
```python { .api }
230
def count_nonzero(a, axis=None, keepdims=False):
231
    """
232
    Count number of nonzero elements along axis.
233
    
234
    Parameters:
235
    - a: array_like, input array
236
    - axis: int/tuple, axis along which to count
237
    - keepdims: bool, keep dimensions of input
238
    
239
    Returns:
240
    cupy.ndarray: Number of nonzero elements
241
    """
242
```
243

244
## Usage Examples
245

246
### Basic Statistics
247

248
```python
249
import cupy as cp
250

251
# Create sample data
252
data = cp.random.normal(0, 1, (1000, 100))
253

254
# Central tendency
255
mean_val = cp.mean(data)
256
median_val = cp.median(data)
257
mean_per_col = cp.mean(data, axis=0)
258

259
# Variability  
260
std_val = cp.std(data)
261
var_val = cp.var(data)
262
std_per_row = cp.std(data, axis=1)
263

264
# Order statistics
265
min_val = cp.amin(data)
266
max_val = cp.amax(data)
267
percentiles = cp.percentile(data, [25, 50, 75])
268
```
269

270
### Advanced Statistical Analysis
271

272
```python
273
# Correlation analysis
274
x = cp.random.normal(0, 1, 1000)
275
y = 2 * x + cp.random.normal(0, 0.5, 1000)  # Correlated data
276

277
correlation_matrix = cp.corrcoef(x, y)
278
covariance_matrix = cp.cov(x, y)
279

280
# Multi-dimensional correlation
281
multi_data = cp.random.multivariate_normal([0, 0, 0], 
282
                                          [[1, 0.5, 0.3],
283
                                           [0.5, 1, 0.7], 
284
                                           [0.3, 0.7, 1]], 
285
                                          size=10000)
286
multi_corr = cp.corrcoef(multi_data.T)
287
```
288

289
### Histogram and Distribution Analysis
290

291
```python
292
# Create histogram
293
data = cp.random.gamma(2, 2, 10000)
294
hist, bin_edges = cp.histogram(data, bins=50, density=True)
295

296
# 2D histogram for bivariate analysis
297
x = cp.random.normal(0, 1, 5000)
298
y = cp.random.normal(0, 1, 5000)
299
hist_2d, xedges, yedges = cp.histogram2d(x, y, bins=30)
300

301
# Multi-dimensional histogram
302
sample = cp.random.random((1000, 3))
303
hist_nd, edges = cp.histogramdd(sample, bins=10)
304
```
305

306
### Reduction Operations
307

308
```python
309
# Various reduction operations
310
matrix = cp.random.random((100, 50))
311

312
# Sums and products
313
total_sum = cp.sum(matrix)
314
row_sums = cp.sum(matrix, axis=1)
315
col_sums = cp.sum(matrix, axis=0)
316

317
total_prod = cp.prod(matrix)
318
cumulative_sum = cp.cumsum(matrix, axis=0)
319

320
# Differences and gradients
321
time_series = cp.sin(cp.linspace(0, 4*cp.pi, 1000))
322
differences = cp.diff(time_series)
323
gradient_vals = cp.gradient(time_series)
324
```
325

326
### Handling Missing Data
327

328
```python
329
# Data with NaN values
330
data_with_nan = cp.random.random((100, 100))
331
data_with_nan[cp.random.random((100, 100)) < 0.1] = cp.nan
332

333
# NaN-aware statistics
334
nan_mean = cp.nanmean(data_with_nan)
335
nan_std = cp.nanstd(data_with_nan)
336
nan_max = cp.nanmax(data_with_nan, axis=0)
337
nan_min = cp.nanmin(data_with_nan, axis=1)
338

339
# Count non-NaN elements
340
valid_count = cp.count_nonzero(~cp.isnan(data_with_nan), axis=0)
341
```
342

343
### Weighted Statistics
344

345
```python
346
# Weighted average
347
values = cp.array([1, 2, 3, 4, 5])
348
weights = cp.array([0.1, 0.2, 0.4, 0.2, 0.1])
349
weighted_avg = cp.average(values, weights=weights)
350

351
# Weighted histogram
352
data = cp.random.exponential(2, 1000)
353
weights = cp.random.random(1000)
354
weighted_hist, bins = cp.histogram(data, bins=30, weights=weights, density=True)
355
```
356

357
### Statistical Tests and Analysis
358

359
```python
360
# Percentile-based analysis
361
data = cp.random.lognormal(0, 1, 10000)
362

363
# Quartiles
364
q1, q2, q3 = cp.percentile(data, [25, 50, 75])  
365
iqr = q3 - q1  # Interquartile range
366

367
# Outlier detection using IQR
368
lower_bound = q1 - 1.5 * iqr
369
upper_bound = q3 + 1.5 * iqr
370
outliers = data[(data < lower_bound) | (data > upper_bound)]
371

372
# Data summary statistics
373
summary = {
374
    'count': len(data),
375
    'mean': cp.mean(data),
376
    'std': cp.std(data),
377
    'min': cp.min(data),
378
    'q1': q1,
379
    'median': q2,
380
    'q3': q3,
381
    'max': cp.max(data),
382
    'outliers': len(outliers)
383
}
384
```

Version

Tile

Files

statistics.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics.mddocs/