Tessl Tile for pypi/cupy-cuda12x@12.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-operations.md cuda-interface.md custom-kernels.md fft-operations.md index.md linear-algebra.md math-functions.md random-numbers.md statistics-sorting.md

statistics-sorting.mddocs/

0
# Statistics and Sorting
1

2
Statistical analysis and sorting operations on GPU arrays. Provides descriptive statistics, correlations, histograms, and various sorting algorithms while handling NaN values appropriately and supporting axis-specific operations.
3

4
## Capabilities
5

6
### Descriptive Statistics
7

8
```python { .api }
9
def mean(a, axis=None, dtype=None, out=None, keepdims=False):
10
    """
11
    Compute arithmetic mean along specified axis.
12

13
    Parameters:
14
    - a: input array
15
    - axis: axis or axes along which to compute mean
16
    - dtype: data type for computation
17
    - out: output array
18
    - keepdims: keep dimensions of original array
19

20
    Returns:
21
    cupy.ndarray: arithmetic mean
22
    """
23

24
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
25
    """
26
    Compute standard deviation along specified axis.
27

28
    Parameters:
29
    - a: input array
30
    - axis: axis or axes along which to compute std
31
    - dtype: data type for computation
32
    - out: output array
33
    - ddof: degrees of freedom correction
34
    - keepdims: keep dimensions
35

36
    Returns:
37
    cupy.ndarray: standard deviation
38
    """
39

40
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
41
    """Compute variance along specified axis."""
42

43
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
44
    """Compute median along specified axis."""
45

46
def average(a, axis=None, weights=None, returned=False):
47
    """
48
    Compute weighted average along specified axis.
49

50
    Parameters:
51
    - a: input array
52
    - axis: axis along which to average
53
    - weights: weights for averaging
54
    - returned: return sum of weights
55

56
    Returns:
57
    cupy.ndarray or tuple: weighted average
58
    """
59

60
def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
61
    """Compute mean ignoring NaNs."""
62

63
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
64
    """Compute standard deviation ignoring NaNs."""
65

66
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
67
    """Compute variance ignoring NaNs."""
68

69
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
70
    """Compute median ignoring NaNs."""
71
```
72

73
### Order Statistics
74

75
```python { .api }
76
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
77
    """Return minimum values along axis."""
78

79
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
80
    """Return maximum values along axis."""
81

82
def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
83
    """Alias for amin."""
84

85
def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
86
    """Alias for amax."""
87

88
def nanmin(a, axis=None, out=None, keepdims=False):
89
    """Return minimum values ignoring NaNs."""
90

91
def nanmax(a, axis=None, out=None, keepdims=False):
92
    """Return maximum values ignoring NaNs."""
93

94
def ptp(a, axis=None, out=None, keepdims=False):
95
    """
96
    Range of values (maximum - minimum) along axis.
97

98
    Parameters:
99
    - a: input array
100
    - axis: axis along which to compute range
101
    - out: output array
102
    - keepdims: keep dimensions
103

104
    Returns:
105
    cupy.ndarray: peak-to-peak values
106
    """
107

108
def percentile(a, q, axis=None, out=None, overwrite_input=False, 
109
              method='linear', keepdims=False):
110
    """
111
    Compute qth percentile along specified axis.
112

113
    Parameters:
114
    - a: input array
115
    - q: percentile(s) to compute
116
    - axis: axis along which to compute percentiles
117
    - out: output array
118
    - overwrite_input: allow input modification
119
    - method: interpolation method
120
    - keepdims: keep dimensions
121

122
    Returns:
123
    cupy.ndarray: qth percentiles
124
    """
125

126
def quantile(a, q, axis=None, out=None, overwrite_input=False,
127
            method='linear', keepdims=False):
128
    """Compute quantiles along specified axis."""
129
```
130

131
### Correlations
132

133
```python { .api }
134
def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):
135
    """
136
    Return Pearson correlation coefficients.
137

138
    Parameters:
139
    - x: input array
140
    - y: additional input array
141
    - rowvar: whether rows represent variables
142
    - bias: bias correction (deprecated)
143
    - ddof: degrees of freedom (deprecated)
144

145
    Returns:
146
    cupy.ndarray: correlation coefficient matrix
147
    """
148

149
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
150
    """
151
    Estimate covariance matrix.
152

153
    Parameters:
154
    - m: input array
155
    - y: additional input array
156
    - rowvar: whether rows represent variables
157
    - bias: use biased estimator
158
    - ddof: degrees of freedom correction
159
    - fweights: frequency weights
160
    - aweights: analytic weights
161

162
    Returns:
163
    cupy.ndarray: covariance matrix
164
    """
165

166
def correlate(a, v, mode='valid'):
167
    """
168
    Cross-correlation of two 1-dimensional sequences.
169

170
    Parameters:
171
    - a: first input sequence
172
    - v: second input sequence
173
    - mode: output size ('full', 'valid', 'same')
174

175
    Returns:
176
    cupy.ndarray: cross-correlation
177
    """
178
```
179

180
### Histograms
181

182
```python { .api }
183
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
184
    """
185
    Compute histogram of a set of data.
186

187
    Parameters:
188
    - a: input data
189
    - bins: number of bins or bin edges
190
    - range: lower and upper range of bins
191
    - normed: normalize histogram (deprecated)
192
    - weights: weights for each value
193
    - density: normalize to create probability density
194

195
    Returns:
196
    tuple: (hist, bin_edges)
197
    """
198

199
def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
200
    """
201
    Compute 2D histogram of two data samples.
202

203
    Parameters:
204
    - x, y: input data arrays
205
    - bins: number of bins or bin edges
206
    - range: array of ranges for each dimension
207
    - normed: normalize histogram (deprecated)
208
    - weights: weights for each sample
209
    - density: normalize to create probability density
210

211
    Returns:
212
    tuple: (H, xedges, yedges)
213
    """
214

215
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
216
    """
217
    Compute multidimensional histogram.
218

219
    Parameters:
220
    - sample: input data array
221
    - bins: number of bins for each dimension
222
    - range: sequence of ranges for each dimension
223
    - normed: normalize histogram (deprecated)
224
    - weights: weights for each sample
225
    - density: normalize to create probability density
226

227
    Returns:
228
    tuple: (H, edges)
229
    """
230

231
def bincount(x, weights=None, minlength=0):
232
    """
233
    Count occurrences of each value in array of non-negative ints.
234

235
    Parameters:
236
    - x: input array of non-negative integers
237
    - weights: weights for each value
238
    - minlength: minimum number of bins
239

240
    Returns:
241
    cupy.ndarray: counts for each value
242
    """
243

244
def digitize(x, bins, right=False):
245
    """
246
    Return indices of bins to which each value belongs.
247

248
    Parameters:
249
    - x: input array
250
    - bins: array of bins
251
    - right: whether intervals include right edge
252

253
    Returns:
254
    cupy.ndarray: bin indices
255
    """
256
```
257

258
### Sorting
259

260
```python { .api }
261
def sort(a, axis=-1, kind=None, order=None):
262
    """
263
    Return sorted copy of array.
264

265
    Parameters:
266
    - a: input array
267
    - axis: axis along which to sort
268
    - kind: sorting algorithm (ignored, uses merge sort)
269
    - order: field order for structured arrays
270

271
    Returns:
272
    cupy.ndarray: sorted array
273
    """
274

275
def argsort(a, axis=-1, kind=None, order=None):
276
    """
277
    Return indices that would sort array.
278

279
    Parameters:
280
    - a: input array
281
    - axis: axis along which to sort
282
    - kind: sorting algorithm
283
    - order: field order for structured arrays
284

285
    Returns:
286
    cupy.ndarray: indices for sorted array
287
    """
288

289
def lexsort(keys, axis=-1):
290
    """
291
    Perform indirect stable sort using multiple keys.
292

293
    Parameters:
294
    - keys: sequence of arrays to use as sort keys
295
    - axis: axis along which to sort
296

297
    Returns:
298
    cupy.ndarray: indices for lexicographically sorted array
299
    """
300

301
def msort(a):
302
    """
303
    Return sorted copy along first axis.
304

305
    Parameters:
306
    - a: input array
307

308
    Returns:
309
    cupy.ndarray: sorted array
310
    """
311

312
def sort_complex(a):
313
    """
314
    Sort complex array using real part first, then imaginary part.
315

316
    Parameters:
317
    - a: input complex array
318

319
    Returns:
320
    cupy.ndarray: sorted complex array
321
    """
322

323
def partition(a, kth, axis=-1, kind='introselect', order=None):
324
    """
325
    Return partitioned copy where kth element is in correct position.
326

327
    Parameters:
328
    - a: input array
329
    - kth: element index for partitioning
330
    - axis: axis along which to partition
331
    - kind: partitioning algorithm
332
    - order: field order for structured arrays
333

334
    Returns:
335
    cupy.ndarray: partitioned array
336
    """
337

338
def argpartition(a, kth, axis=-1, kind='introselect', order=None):
339
    """Return indices that would partition array."""
340
```
341

342
### Searching
343

344
```python { .api }
345
def argmax(a, axis=None, out=None):
346
    """
347
    Return indices of maximum values along axis.
348

349
    Parameters:
350
    - a: input array
351
    - axis: axis along which to search
352
    - out: output array
353

354
    Returns:
355
    cupy.ndarray: indices of maximum values
356
    """
357

358
def argmin(a, axis=None, out=None):
359
    """Return indices of minimum values along axis."""
360

361
def nanargmax(a, axis=None):
362
    """Return indices of maximum values ignoring NaNs."""
363

364
def nanargmin(a, axis=None):
365
    """Return indices of minimum values ignoring NaNs."""
366

367
def argwhere(a):
368
    """
369
    Find indices of array elements that are non-zero.
370

371
    Parameters:
372
    - a: input array
373

374
    Returns:
375
    cupy.ndarray: indices of non-zero elements
376
    """
377

378
def nonzero(a):
379
    """
380
    Return indices of elements that are non-zero.
381

382
    Parameters:
383
    - a: input array
384

385
    Returns:
386
    tuple: arrays of indices
387
    """
388

389
def flatnonzero(a):
390
    """Return indices of flattened array that are non-zero."""
391

392
def where(condition, x=None, y=None):
393
    """
394
    Return elements chosen from x or y depending on condition.
395

396
    Parameters:
397
    - condition: boolean array
398
    - x: values where condition is True
399
    - y: values where condition is False
400

401
    Returns:
402
    cupy.ndarray: array with elements from x or y
403
    """
404

405
def searchsorted(a, v, side='left', sorter=None):
406
    """
407
    Find indices where elements should be inserted to maintain order.
408

409
    Parameters:
410
    - a: sorted input array
411
    - v: values to insert
412
    - side: insertion side ('left' or 'right')
413
    - sorter: array of indices that sort a
414

415
    Returns:
416
    cupy.ndarray: insertion indices
417
    """
418
```
419

420
### Counting
421

422
```python { .api }
423
def count_nonzero(a, axis=None, keepdims=False):
424
    """
425
    Count number of non-zero values in array.
426

427
    Parameters:
428
    - a: input array
429
    - axis: axis or axes to count along
430
    - keepdims: keep dimensions of original array
431

432
    Returns:
433
    int or cupy.ndarray: count of non-zero values
434
    """
435
```
436

437
## Usage Examples
438

439
### Basic Statistics
440

441
```python
442
import cupy as cp
443

444
# Create sample data
445
data = cp.random.normal(10, 2, (1000, 50))
446

447
# Compute basic statistics
448
mean_val = cp.mean(data)
449
std_val = cp.std(data)
450
var_val = cp.var(data)
451
median_val = cp.median(data)
452

453
print(f"Mean: {mean_val:.4f}")
454
print(f"Std: {std_val:.4f}")
455
print(f"Variance: {var_val:.4f}")
456
print(f"Median: {median_val:.4f}")
457

458
# Statistics along specific axis
459
row_means = cp.mean(data, axis=1)  # Mean of each row
460
col_stds = cp.std(data, axis=0)    # Std of each column
461

462
print(f"Row means shape: {row_means.shape}")
463
print(f"Column stds shape: {col_stds.shape}")
464
```
465

466
### Order Statistics
467

468
```python
469
import cupy as cp
470

471
# Create test data
472
data = cp.random.random((100, 100))
473

474
# Find min/max values
475
min_val = cp.min(data)
476
max_val = cp.max(data)
477
range_val = cp.ptp(data)  # peak-to-peak
478

479
print(f"Min: {min_val:.4f}")
480
print(f"Max: {max_val:.4f}")
481
print(f"Range: {range_val:.4f}")
482

483
# Percentiles
484
percentiles = cp.percentile(data, [25, 50, 75, 90, 95])
485
print(f"Percentiles (25,50,75,90,95): {percentiles}")
486

487
# Quantiles (same as percentiles but with 0-1 scale)
488
quantiles = cp.quantile(data, [0.25, 0.5, 0.75])
489
print(f"Quantiles (0.25,0.5,0.75): {quantiles}")
490
```
491

492
### Handling NaN Values
493

494
```python
495
import cupy as cp
496

497
# Create data with NaN values
498
data = cp.random.random((100, 100))
499
data[cp.random.random((100, 100)) < 0.1] = cp.nan  # 10% NaN values
500

501
# Regular statistics (will return NaN if any NaN present)
502
regular_mean = cp.mean(data)
503
regular_std = cp.std(data)
504

505
# NaN-aware statistics
506
nan_mean = cp.nanmean(data)
507
nan_std = cp.nanstd(data)
508
nan_min = cp.nanmin(data)
509
nan_max = cp.nanmax(data)
510

511
print(f"Regular mean: {regular_mean}")
512
print(f"NaN-aware mean: {nan_mean:.4f}")
513
print(f"NaN-aware std: {nan_std:.4f}")
514
print(f"NaN-aware range: {nan_min:.4f} to {nan_max:.4f}")
515
```
516

517
### Correlation Analysis
518

519
```python
520
import cupy as cp
521

522
# Create correlated data
523
n_samples = 1000
524
x = cp.random.normal(0, 1, n_samples)
525
y = 0.8 * x + 0.6 * cp.random.normal(0, 1, n_samples)  # Correlated with x
526
z = cp.random.normal(0, 1, n_samples)  # Independent
527

528
# Stack into matrix (each row is a variable)
529
data = cp.stack([x, y, z])
530

531
# Compute correlation matrix
532
corr_matrix = cp.corrcoef(data)
533
print("Correlation matrix:")
534
print(corr_matrix)
535

536
# Compute covariance matrix
537
cov_matrix = cp.cov(data)
538
print("\nCovariance matrix:")
539
print(cov_matrix)
540

541
# Cross-correlation of two sequences
542
x_seq = cp.random.random(100)
543
y_seq = cp.random.random(100)
544
cross_corr = cp.correlate(x_seq, y_seq, mode='full')
545
print(f"\nCross-correlation shape: {cross_corr.shape}")
546
```
547

548
### Histograms
549

550
```python
551
import cupy as cp
552

553
# Create sample data
554
data = cp.random.normal(0, 1, 10000)
555

556
# 1D histogram
557
hist, bin_edges = cp.histogram(data, bins=50, range=(-4, 4))
558
print(f"Histogram shape: {hist.shape}")
559
print(f"Bin edges shape: {bin_edges.shape}")
560

561
# Weighted histogram
562
weights = cp.random.random(len(data))
563
weighted_hist, _ = cp.histogram(data, bins=50, weights=weights)
564

565
# 2D histogram
566
x = cp.random.normal(0, 1, 5000)
567
y = cp.random.normal(0, 1, 5000)
568
hist_2d, x_edges, y_edges = cp.histogram2d(x, y, bins=30)
569
print(f"2D histogram shape: {hist_2d.shape}")
570

571
# Count occurrences
572
integers = cp.random.randint(0, 10, 1000)
573
counts = cp.bincount(integers)
574
print(f"Counts: {counts}")
575

576
# Digitize continuous data
577
bin_indices = cp.digitize(data, bins=cp.linspace(-3, 3, 10))
578
print(f"Bin indices range: {cp.min(bin_indices)} to {cp.max(bin_indices)}")
579
```
580

581
### Sorting Operations
582

583
```python
584
import cupy as cp
585

586
# Create unsorted data
587
data = cp.random.random((5, 10))
588

589
# Sort array
590
sorted_data = cp.sort(data, axis=1)  # Sort each row
591
print("Original data (first row):")
592
print(data[0])
593
print("Sorted data (first row):")
594
print(sorted_data[0])
595

596
# Get sorting indices
597
sort_indices = cp.argsort(data, axis=1)
598
print("Sort indices (first row):")
599
print(sort_indices[0])
600

601
# Verify sorting
602
reconstructed = data[0, sort_indices[0]]
603
print("Reconstructed (should match sorted):")
604
print(reconstructed)
605

606
# Multi-dimensional sort
607
data_3d = cp.random.random((10, 20, 30))
608
sorted_3d = cp.sort(data_3d, axis=2)  # Sort along last axis
609
```
610

611
### Advanced Sorting
612

613
```python
614
import cupy as cp
615

616
# Lexicographic sorting
617
# Sort by multiple keys (e.g., sort by y first, then by x)
618
x = cp.array([1, 3, 2, 1, 3, 2])
619
y = cp.array([3, 1, 2, 1, 3, 1])
620

621
# Sort by y first, then x (note order: primary key last)
622
lex_indices = cp.lexsort([x, y])
623
print("Lexsort indices:", lex_indices)
624
print("x sorted:", x[lex_indices])
625
print("y sorted:", y[lex_indices])
626

627
# Partial sorting (partition)
628
large_array = cp.random.random(1000)
629
k = 100  # Find 100 smallest elements
630

631
# Partition so that k smallest elements are in first k positions
632
partitioned = cp.partition(large_array, k)
633
print(f"100th smallest element: {partitioned[k-1]}")
634
print(f"Verification - max of first 100: {cp.max(partitioned[:k])}")
635
print(f"Verification - min of last 900: {cp.min(partitioned[k:])}")
636
```
637

638
### Search Operations
639

640
```python
641
import cupy as cp
642

643
# Create test data
644
data = cp.random.random((50, 50))
645

646
# Find locations of extreme values
647
max_pos = cp.argmax(data)
648
min_pos = cp.argmin(data)
649

650
# Convert flat indices to 2D coordinates
651
max_coords = cp.unravel_index(max_pos, data.shape)
652
min_coords = cp.unravel_index(min_pos, data.shape)
653

654
print(f"Max value {cp.max(data):.4f} at position {max_coords}")
655
print(f"Min value {cp.min(data):.4f} at position {min_coords}")
656

657
# Find all positions above threshold
658
threshold = 0.9
659
high_positions = cp.argwhere(data > threshold)
660
print(f"Found {len(high_positions)} positions above {threshold}")
661

662
# Search in sorted array
663
sorted_array = cp.sort(cp.random.random(1000))
664
values_to_find = cp.array([0.1, 0.5, 0.9])
665
insertion_points = cp.searchsorted(sorted_array, values_to_find)
666
print(f"Insertion points: {insertion_points}")
667

668
# Count non-zero elements
669
sparse_data = cp.random.random((100, 100))
670
sparse_data[sparse_data < 0.9] = 0  # Make 90% zeros
671
nonzero_count = cp.count_nonzero(sparse_data)
672
print(f"Non-zero elements: {nonzero_count} out of {sparse_data.size}")
673
```
674

675
### Performance Comparison
676

677
```python
678
import cupy as cp
679
import numpy as np
680
import time
681

682
# Large dataset for performance testing
683
n = 10**7
684
data_gpu = cp.random.random(n)
685
data_cpu = cp.asnumpy(data_gpu)
686

687
# GPU sorting
688
start = time.time()
689
sorted_gpu = cp.sort(data_gpu)
690
cp.cuda.Device().synchronize()
691
gpu_time = time.time() - start
692

693
# CPU sorting
694
start = time.time()
695
sorted_cpu = np.sort(data_cpu)
696
cpu_time = time.time() - start
697

698
print(f"GPU sort time: {gpu_time:.4f}s")
699
print(f"CPU sort time: {cpu_time:.4f}s")
700
print(f"Speedup: {cpu_time/gpu_time:.2f}x")
701

702
# Verify correctness
703
gpu_result_cpu = cp.asnumpy(sorted_gpu)
704
max_diff = np.max(np.abs(gpu_result_cpu - sorted_cpu))
705
print(f"Max difference: {max_diff}")
706
```

Version

Tile

Files

statistics-sorting.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistics-sorting.mddocs/