Tessl Tile for pypi/cupy-cuda101@9.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-creation.md array-manipulation.md binary-operations.md cuda.md fft.md index.md indexing-searching.md linalg.md logic-functions.md math-functions.md memory-performance.md random.md sorting-counting.md statistics.md

statistics.mddocs/

0
# Statistics and Aggregation
1

2
Statistical functions and array aggregation operations including descriptive statistics, histograms, and correlation analysis. All operations are GPU-accelerated with NumPy-compatible interfaces for efficient data analysis.
3

4
## Capabilities
5

6
### Descriptive Statistics
7

8
Core statistical measures for data analysis and summarization.
9

10
```python { .api }
11
def mean(a, axis=None, dtype=None, out=None, keepdims=False):
12
    """
13
    Arithmetic mean along specified axes.
14
    
15
    Parameters:
16
    - a: array-like, input array
17
    - axis: int or tuple, axes for computation, optional
18
    - dtype: data type, result type, optional
19
    - out: array, output array, optional
20
    - keepdims: bool, keep dimensions
21
    
22
    Returns:
23
    cupy.ndarray: Mean values on GPU
24
    """
25

26
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
27
    """
28
    Standard deviation along specified axes.
29
    
30
    Parameters:
31
    - a: array-like, input array
32
    - axis: int or tuple, axes for computation, optional
33
    - dtype: data type, result type, optional
34
    - out: array, output array, optional
35
    - ddof: int, delta degrees of freedom
36
    - keepdims: bool, keep dimensions
37
    
38
    Returns:
39
    cupy.ndarray: Standard deviation on GPU
40
    """
41

42
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
43
    """
44
    Variance along specified axes.
45
    
46
    Parameters:
47
    - a: array-like, input array
48
    - axis: int or tuple, axes for computation, optional
49
    - dtype: data type, result type, optional
50
    - out: array, output array, optional
51
    - ddof: int, delta degrees of freedom
52
    - keepdims: bool, keep dimensions
53
    
54
    Returns:
55
    cupy.ndarray: Variance on GPU
56
    """
57

58
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
59
    """
60
    Median along specified axes.
61
    
62
    Parameters:
63
    - a: array-like, input array
64
    - axis: int or tuple, axes for computation, optional
65
    - out: array, output array, optional
66
    - overwrite_input: bool, allow input modification
67
    - keepdims: bool, keep dimensions
68
    
69
    Returns:
70
    cupy.ndarray: Median values on GPU
71
    """
72

73
def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
74
    """
75
    Percentile along specified axes.
76
    
77
    Parameters:
78
    - a: array-like, input array
79
    - q: float or array, percentile(s) to compute
80
    - axis: int or tuple, axes for computation, optional
81
    - out: array, output array, optional
82
    - overwrite_input: bool, allow input modification
83
    - interpolation: str, interpolation method
84
    - keepdims: bool, keep dimensions
85
    
86
    Returns:
87
    cupy.ndarray: Percentile values on GPU
88
    """
89

90
def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
91
    """
92
    Quantile along specified axes.
93
    
94
    Parameters:
95
    - a: array-like, input array
96
    - q: float or array, quantile(s) to compute [0, 1]
97
    - axis: int or tuple, axes for computation, optional
98
    - out: array, output array, optional
99
    - overwrite_input: bool, allow input modification
100
    - interpolation: str, interpolation method
101
    - keepdims: bool, keep dimensions
102
    
103
    Returns:
104
    cupy.ndarray: Quantile values on GPU
105
    """
106
```
107

108
### Order Statistics
109

110
Functions for finding minimum, maximum, and order-based statistics.
111

112
```python { .api }
113
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
114
    """
115
    Maximum along specified axes.
116
    
117
    Parameters:
118
    - a: array-like, input array
119
    - axis: int or tuple, axes for computation, optional
120
    - out: array, output array, optional
121
    - keepdims: bool, keep dimensions
122
    - initial: scalar, initial value, optional
123
    - where: array, condition, optional
124
    
125
    Returns:
126
    cupy.ndarray: Maximum values on GPU
127
    """
128

129
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
130
    """
131
    Minimum along specified axes.
132
    
133
    Parameters:
134
    - a: array-like, input array
135
    - axis: int or tuple, axes for computation, optional
136
    - out: array, output array, optional
137
    - keepdims: bool, keep dimensions
138
    - initial: scalar, initial value, optional
139
    - where: array, condition, optional
140
    
141
    Returns:
142
    cupy.ndarray: Minimum values on GPU
143
    """
144

145
def ptp(a, axis=None, out=None, keepdims=False):
146
    """
147
    Peak-to-peak (maximum - minimum) along axes.
148
    
149
    Parameters:
150
    - a: array-like, input array
151
    - axis: int or tuple, axes for computation, optional
152
    - out: array, output array, optional
153
    - keepdims: bool, keep dimensions
154
    
155
    Returns:
156
    cupy.ndarray: Peak-to-peak values on GPU
157
    """
158
```
159

160
### Correlation Analysis
161

162
Functions for computing correlations and covariances between variables.
163

164
```python { .api }
165
def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None, dtype=None):
166
    """
167
    Pearson correlation coefficients.
168
    
169
    Parameters:
170
    - x: array-like, input array
171
    - y: array-like, additional input array, optional
172
    - rowvar: bool, treat rows as variables
173
    - bias: deprecated parameter
174
    - ddof: deprecated parameter
175
    - dtype: data type, optional
176
    
177
    Returns:
178
    cupy.ndarray: Correlation coefficient matrix on GPU
179
    """
180

181
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, dtype=None):
182
    """
183
    Covariance matrix.
184
    
185
    Parameters:
186
    - m: array-like, input array
187
    - y: array-like, additional input array, optional
188
    - rowvar: bool, treat rows as variables
189
    - bias: bool, use biased estimate
190
    - ddof: int, delta degrees of freedom, optional
191
    - fweights: array, frequency weights, optional
192
    - aweights: array, analytic weights, optional
193
    - dtype: data type, optional
194
    
195
    Returns:
196
    cupy.ndarray: Covariance matrix on GPU
197
    """
198

199
def correlate(a, v, mode='valid'):
200
    """
201
    Cross-correlation of two 1-dimensional sequences.
202
    
203
    Parameters:
204
    - a: array-like, first input sequence
205
    - v: array-like, second input sequence
206
    - mode: str, convolution mode ('valid', 'same', 'full')
207
    
208
    Returns:
209
    cupy.ndarray: Cross-correlation on GPU
210
    """
211
```
212

213
### Histograms
214

215
Functions for computing histograms and frequency distributions.
216

217
```python { .api }
218
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
219
    """
220
    Compute histogram of dataset.
221
    
222
    Parameters:
223
    - a: array-like, input data
224
    - bins: int or array, bin specification
225
    - range: tuple, range of bins, optional
226
    - normed: deprecated parameter
227
    - weights: array, weights for each value, optional
228
    - density: bool, normalize to probability density
229
    
230
    Returns:
231
    tuple: (hist, bin_edges) arrays on GPU
232
    """
233

234
def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
235
    """
236
    Compute 2D histogram.
237
    
238
    Parameters:
239
    - x: array-like, first dimension data
240
    - y: array-like, second dimension data
241
    - bins: int or array, bin specification
242
    - range: array, bin ranges, optional
243
    - normed: deprecated parameter
244
    - weights: array, weights for each sample, optional
245
    - density: bool, normalize to probability density
246
    
247
    Returns:
248
    tuple: (H, xedges, yedges) arrays on GPU
249
    """
250

251
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
252
    """
253
    Compute multidimensional histogram.
254
    
255
    Parameters:
256
    - sample: array-like, input samples (N, D) or sequence of D arrays
257
    - bins: int or array, bin specification
258
    - range: sequence, bin ranges, optional
259
    - normed: deprecated parameter
260
    - weights: array, weights for each sample, optional
261
    - density: bool, normalize to probability density
262
    
263
    Returns:
264
    tuple: (H, edges) histogram and bin edges on GPU
265
    """
266

267
def bincount(x, weights=None, minlength=0):
268
    """
269
    Count occurrences of each value in array.
270
    
271
    Parameters:
272
    - x: array-like, non-negative integer array
273
    - weights: array, weights for each value, optional
274
    - minlength: int, minimum length of output
275
    
276
    Returns:
277
    cupy.ndarray: Occurrence counts on GPU
278
    """
279

280
def digitize(x, bins, right=False):
281
    """
282
    Return indices of bins to which each value belongs.
283
    
284
    Parameters:
285
    - x: array-like, input array
286
    - bins: array-like, bin edges
287
    - right: bool, interval closure
288
    
289
    Returns:
290
    cupy.ndarray: Bin indices on GPU
291
    """
292
```
293

294
## Usage Examples
295

296
### Basic Statistical Analysis
297

298
```python
299
import cupy as cp
300

301
# Generate sample data
302
data = cp.random.normal(10, 2, size=10000)
303

304
# Descriptive statistics
305
mean_val = cp.mean(data)
306
std_val = cp.std(data)
307
var_val = cp.var(data)
308
median_val = cp.median(data)
309

310
print(f"Mean: {mean_val}")
311
print(f"Standard deviation: {std_val}")
312
print(f"Variance: {var_val}")
313
print(f"Median: {median_val}")
314

315
# Percentiles
316
q25 = cp.percentile(data, 25)
317
q75 = cp.percentile(data, 75)
318
iqr = q75 - q25
319

320
print(f"25th percentile: {q25}")
321
print(f"75th percentile: {q75}")
322
print(f"Interquartile range: {iqr}")
323
```
324

325
### Multi-dimensional Statistics
326

327
```python
328
# Multi-dimensional data analysis
329
matrix_data = cp.random.normal(0, 1, size=(1000, 5))
330

331
# Statistics along different axes
332
column_means = cp.mean(matrix_data, axis=0)  # Mean of each column
333
row_means = cp.mean(matrix_data, axis=1)     # Mean of each row
334
overall_mean = cp.mean(matrix_data)          # Overall mean
335

336
# Standard deviations
337
column_stds = cp.std(matrix_data, axis=0)
338
row_stds = cp.std(matrix_data, axis=1)
339

340
print(f"Column means: {column_means}")
341
print(f"Column standard deviations: {column_stds}")
342
```
343

344
### Correlation Analysis
345

346
```python
347
# Generate correlated data
348
n_samples = 5000
349
x = cp.random.normal(0, 1, n_samples)
350
y = 2 * x + cp.random.normal(0, 0.5, n_samples)  # y = 2x + noise
351
z = cp.random.normal(0, 1, n_samples)  # Independent variable
352

353
# Combine into matrix (variables as rows)
354
data_matrix = cp.stack([x, y, z])
355

356
# Correlation matrix
357
corr_matrix = cp.corrcoef(data_matrix)
358
print("Correlation matrix:")
359
print(corr_matrix)
360

361
# Covariance matrix
362
cov_matrix = cp.cov(data_matrix)
363
print("Covariance matrix:")
364
print(cov_matrix)
365

366
# Pairwise correlation
367
xy_corr = cp.corrcoef(x, y)[0, 1]
368
xz_corr = cp.corrcoef(x, z)[0, 1]
369
print(f"X-Y correlation: {xy_corr}")
370
print(f"X-Z correlation: {xz_corr}")
371
```
372

373
### Histogram Analysis
374

375
```python
376
# Single variable histogram
377
data = cp.random.exponential(2.0, size=10000)
378

379
# Compute histogram
380
hist, bin_edges = cp.histogram(data, bins=50, density=True)
381
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
382

383
print(f"Histogram shape: {hist.shape}")
384
print(f"Bin edges shape: {bin_edges.shape}")
385

386
# 2D histogram for joint distribution
387
x = cp.random.normal(0, 1, 5000)
388
y = cp.random.normal(0, 1, 5000)
389

390
hist_2d, x_edges, y_edges = cp.histogram2d(x, y, bins=30)
391
print(f"2D histogram shape: {hist_2d.shape}")
392

393
# Multidimensional histogram
394
samples = cp.random.multivariate_normal([0, 0, 0], cp.eye(3), size=1000)
395
hist_nd, edges = cp.histogramdd(samples, bins=10)
396
print(f"ND histogram shape: {hist_nd.shape}")
397
```
398

399
### Advanced Statistical Operations
400

401
```python
402
# Weighted statistics
403
values = cp.array([1, 2, 3, 4, 5])
404
weights = cp.array([1, 2, 3, 2, 1])
405

406
# Weighted histogram
407
hist_weighted, _ = cp.histogram(values, bins=5, weights=weights)
408
print(f"Weighted histogram: {hist_weighted}")
409

410
# Time series analysis
411
time_series = cp.cumsum(cp.random.normal(0, 1, 1000))
412

413
# Rolling statistics (using convolution)
414
window_size = 50
415
kernel = cp.ones(window_size) / window_size
416
rolling_mean = cp.convolve(time_series, kernel, mode='valid')
417

418
# Moving statistics
419
def rolling_std(data, window):
420
    rolling_mean = cp.convolve(data, cp.ones(window)/window, mode='valid')
421
    # Pad for alignment
422
    padded_mean = cp.pad(rolling_mean, (window-1, 0), mode='edge')
423
    
424
    # Compute rolling variance
425
    squared_diff = (data - padded_mean)**2
426
    rolling_var = cp.convolve(squared_diff, cp.ones(window)/window, mode='valid')
427
    return cp.sqrt(rolling_var)
428

429
rolling_std_vals = rolling_std(time_series, window_size)
430
```
431

432
### Statistical Testing and Analysis
433

434
```python
435
# Outlier detection using IQR method
436
data = cp.random.normal(0, 1, 1000)
437
# Add some outliers
438
data = cp.concatenate([data, cp.array([5, -5, 6, -6])])
439

440
q25 = cp.percentile(data, 25)
441
q75 = cp.percentile(data, 75)
442
iqr = q75 - q25
443

444
# Define outliers as values beyond 1.5 * IQR from quartiles
445
lower_bound = q25 - 1.5 * iqr
446
upper_bound = q75 + 1.5 * iqr
447

448
outliers = data[(data < lower_bound) | (data > upper_bound)]
449
normal_data = data[(data >= lower_bound) & (data <= upper_bound)]
450

451
print(f"Number of outliers: {len(outliers)}")
452
print(f"Outlier values: {outliers}")
453

454
# Empirical CDF
455
def empirical_cdf(data, x):
456
    return cp.mean(data <= x)
457

458
# Compute CDF at specific points
459
test_points = cp.linspace(-3, 3, 100)
460
cdf_values = cp.array([empirical_cdf(data, point) for point in test_points])
461
```

Version

Tile

Files

statistics.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics.mddocs/