0
# Statistics and Sorting
1
2
Statistical functions, sorting algorithms, and searching operations for data analysis and processing. All functions are GPU-accelerated and maintain NumPy compatibility.
3
4
## Capabilities
5
6
### Descriptive Statistics
7
8
Functions for computing statistical measures of data distributions.
9
10
```python { .api }
11
def mean(a, axis=None, dtype=None, out=None, keepdims=False):
12
"""
13
Compute arithmetic mean along specified axis.
14
15
Parameters:
16
- a: array-like, input array
17
- axis: None or int or tuple, axis to compute mean over
18
- dtype: data type, output data type
19
- out: cupy.ndarray, output array
20
- keepdims: bool, keep reduced dimensions
21
22
Returns:
23
cupy.ndarray: Mean values
24
"""
25
26
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
27
"""
28
Compute standard deviation along specified axis.
29
30
Parameters:
31
- a: array-like, input array
32
- axis: None or int or tuple, axis to compute std over
33
- dtype: data type, output data type
34
- out: cupy.ndarray, output array
35
- ddof: int, delta degrees of freedom
36
- keepdims: bool, keep reduced dimensions
37
38
Returns:
39
cupy.ndarray: Standard deviation
40
"""
41
42
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
43
"""Compute variance along specified axis."""
44
45
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
46
"""
47
Compute median along specified axis.
48
49
Parameters:
50
- a: array-like, input array
51
- axis: None or int or tuple, axis to compute median over
52
- out: cupy.ndarray, output array
53
- overwrite_input: bool, whether input can be overwritten
54
- keepdims: bool, keep reduced dimensions
55
56
Returns:
57
cupy.ndarray: Median values
58
"""
59
60
def average(a, axis=None, weights=None, returned=False):
61
"""
62
Compute weighted average along specified axis.
63
64
Parameters:
65
- a: array-like, input array
66
- axis: None or int or tuple, axis to average over
67
- weights: array-like, weights for each value
68
- returned: bool, return tuple (average, sum_of_weights)
69
70
Returns:
71
cupy.ndarray: Weighted average
72
tuple: (average, sum_of_weights) if returned=True
73
"""
74
```
75
76
### Order Statistics
77
78
Functions for finding minimum, maximum, and quantile values.
79
80
```python { .api }
81
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=None):
82
"""
83
Return maximum of array or maximum along axis.
84
85
Parameters:
86
- a: array-like, input array
87
- axis: None or int or tuple, axis for maximum
88
- out: cupy.ndarray, output array
89
- keepdims: bool, keep reduced dimensions
90
- initial: scalar, minimum value of output
91
- where: array-like, elements to include
92
93
Returns:
94
cupy.ndarray: Maximum values
95
"""
96
97
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=None):
98
"""Return minimum of array or minimum along axis."""
99
100
def max(a, axis=None, out=None, keepdims=False, initial=None, where=None):
101
"""Alias for amax."""
102
103
def min(a, axis=None, out=None, keepdims=False, initial=None, where=None):
104
"""Alias for amin."""
105
106
def ptp(a, axis=None, out=None, keepdims=False):
107
"""
108
Return range (maximum - minimum) along axis.
109
110
Returns:
111
cupy.ndarray: Peak-to-peak values
112
"""
113
114
def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
115
"""
116
Compute qth percentile along specified axis.
117
118
Parameters:
119
- a: array-like, input array
120
- q: float or sequence, percentile(s) to compute (0-100)
121
- axis: None or int or tuple, axis to compute over
122
- out: cupy.ndarray, output array
123
- overwrite_input: bool, whether input can be overwritten
124
- interpolation: str, interpolation method
125
- keepdims: bool, keep reduced dimensions
126
127
Returns:
128
cupy.ndarray: Percentile values
129
"""
130
131
def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):
132
"""
133
Compute qth quantile along specified axis.
134
135
Parameters:
136
- q: float or sequence, quantile(s) to compute (0-1)
137
138
Returns:
139
cupy.ndarray: Quantile values
140
"""
141
```
142
143
### Sorting Functions
144
145
Efficient GPU-accelerated sorting algorithms.
146
147
```python { .api }
148
def sort(a, axis=-1, kind=None, order=None):
149
"""
150
Return sorted copy of array.
151
152
Parameters:
153
- a: array-like, input array
154
- axis: int or None, axis to sort along
155
- kind: str, sorting algorithm (ignored, uses GPU-optimized method)
156
- order: str or list, field order for structured arrays
157
158
Returns:
159
cupy.ndarray: Sorted array
160
"""
161
162
def argsort(a, axis=-1, kind=None, order=None):
163
"""
164
Return indices that would sort array.
165
166
Returns:
167
cupy.ndarray: Indices that sort a along specified axis
168
"""
169
170
def lexsort(keys, axis=-1):
171
"""
172
Perform indirect stable sort using multiple keys.
173
174
Parameters:
175
- keys: tuple of arrays, sort keys (last key is primary)
176
- axis: int, axis to sort along
177
178
Returns:
179
cupy.ndarray: Indices that sort the keys
180
"""
181
182
def msort(a):
183
"""Sort array along first axis."""
184
185
def sort_complex(a):
186
"""Sort complex array using real part first, then imaginary."""
187
188
def partition(a, kth, axis=-1, kind='introselect', order=None):
189
"""
190
Return partitioned copy where kth element is in final sorted position.
191
192
Parameters:
193
- a: array-like, input array
194
- kth: int or sequence, element index(es) for partitioning
195
- axis: int or None, axis to partition along
196
- kind: str, selection algorithm
197
- order: str or list, field order for structured arrays
198
199
Returns:
200
cupy.ndarray: Partitioned array
201
"""
202
203
def argpartition(a, kth, axis=-1, kind='introselect', order=None):
204
"""Return indices that partition array."""
205
```
206
207
### Searching Functions
208
209
Functions for finding elements in arrays.
210
211
```python { .api }
212
def argmax(a, axis=None, out=None):
213
"""
214
Return indices of maximum values along axis.
215
216
Parameters:
217
- a: array-like, input array
218
- axis: int or None, axis to find maximum along
219
- out: cupy.ndarray, output array
220
221
Returns:
222
cupy.ndarray: Indices of maximum values
223
"""
224
225
def argmin(a, axis=None, out=None):
226
"""Return indices of minimum values along axis."""
227
228
def nonzero(a):
229
"""
230
Return indices of non-zero elements.
231
232
Returns:
233
tuple: Tuple of arrays, one for each dimension
234
"""
235
236
def where(condition, x=None, y=None):
237
"""
238
Return elements chosen from x or y depending on condition.
239
240
Parameters:
241
- condition: array-like, boolean condition
242
- x, y: array-like, values to choose from
243
244
Returns:
245
cupy.ndarray: Elements from x where condition is True, y elsewhere
246
tuple: If x and y not given, equivalent to nonzero(condition)
247
"""
248
249
def searchsorted(a, v, side='left', sorter=None):
250
"""
251
Find indices where elements should be inserted to maintain order.
252
253
Parameters:
254
- a: array-like, sorted input array
255
- v: array-like, values to insert
256
- side: {'left', 'right'}, insertion side for equal values
257
- sorter: array-like, indices that sort a
258
259
Returns:
260
cupy.ndarray: Insertion indices
261
"""
262
```
263
264
### Histograms
265
266
Functions for computing histograms and frequency distributions.
267
268
```python { .api }
269
def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
270
"""
271
Compute histogram of array.
272
273
Parameters:
274
- a: array-like, input data
275
- bins: int or sequence, bin specification
276
- range: tuple, range of bins
277
- normed: bool, deprecated, use density
278
- weights: array-like, weights for each value
279
- density: bool, return probability density
280
281
Returns:
282
tuple: (hist, bin_edges)
283
"""
284
285
def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):
286
"""
287
Compute 2D histogram of two arrays.
288
289
Parameters:
290
- x, y: array-like, input data
291
- bins: int or sequence, bin specification
292
- range: array-like, bin ranges
293
- normed: bool, deprecated, use density
294
- weights: array-like, weights for each value
295
- density: bool, return probability density
296
297
Returns:
298
tuple: (H, xedges, yedges)
299
"""
300
301
def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):
302
"""Compute multidimensional histogram."""
303
304
def bincount(x, weights=None, minlength=0):
305
"""
306
Count occurrences of each value in array of non-negative integers.
307
308
Parameters:
309
- x: array-like, input array of non-negative integers
310
- weights: array-like, weights for each value
311
- minlength: int, minimum number of bins
312
313
Returns:
314
cupy.ndarray: Number of occurrences of each value
315
"""
316
317
def digitize(x, bins, right=False):
318
"""
319
Return indices of bins to which each value belongs.
320
321
Parameters:
322
- x: array-like, input array
323
- bins: array-like, bin edges (monotonic)
324
- right: bool, whether intervals include right edge
325
326
Returns:
327
cupy.ndarray: Bin indices for each value in x
328
"""
329
```
330
331
### Correlations
332
333
Functions for computing correlations and covariances.
334
335
```python { .api }
336
def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):
337
"""
338
Return Pearson correlation coefficients.
339
340
Parameters:
341
- x: array-like, input array
342
- y: array-like, additional input array
343
- rowvar: bool, whether rows are variables
344
- bias: deprecated
345
- ddof: deprecated
346
347
Returns:
348
cupy.ndarray: Correlation coefficient matrix
349
"""
350
351
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
352
"""
353
Estimate covariance matrix.
354
355
Parameters:
356
- m: array-like, input array
357
- y: array-like, additional input array
358
- rowvar: bool, whether rows are variables
359
- bias: bool, use biased estimator
360
- ddof: int, delta degrees of freedom
361
- fweights: array-like, frequency weights
362
- aweights: array-like, observation weights
363
364
Returns:
365
cupy.ndarray: Covariance matrix
366
"""
367
368
def correlate(a, v, mode='valid'):
369
"""
370
Cross-correlation of two 1D sequences.
371
372
Parameters:
373
- a, v: array-like, input sequences
374
- mode: {'valid', 'same', 'full'}, output size
375
376
Returns:
377
cupy.ndarray: Cross-correlation result
378
"""
379
```
380
381
## Usage Examples
382
383
### Basic Statistics
384
385
```python
386
import cupy as cp
387
388
# Generate sample data
389
data = cp.random.normal(10, 2, size=10000)
390
391
# Descriptive statistics
392
mean_val = cp.mean(data)
393
std_val = cp.std(data)
394
var_val = cp.var(data)
395
median_val = cp.median(data)
396
397
# Percentiles
398
q25 = cp.percentile(data, 25)
399
q75 = cp.percentile(data, 75)
400
iqr = q75 - q25
401
402
print(f"Mean: {mean_val:.2f}, Std: {std_val:.2f}")
403
print(f"Median: {median_val:.2f}, IQR: {iqr:.2f}")
404
```
405
406
### Sorting and Searching
407
408
```python
409
import cupy as cp
410
411
# Create test array
412
arr = cp.random.randint(0, 100, size=1000)
413
414
# Sort array
415
sorted_arr = cp.sort(arr)
416
sort_indices = cp.argsort(arr)
417
418
# Find extreme values
419
max_idx = cp.argmax(arr)
420
min_idx = cp.argmin(arr)
421
422
# Search for values
423
search_values = cp.array([25, 50, 75])
424
insertion_points = cp.searchsorted(sorted_arr, search_values)
425
426
# Boolean indexing
427
mask = arr > 50
428
high_values = arr[mask]
429
high_indices = cp.nonzero(mask)[0]
430
```
431
432
### Histograms and Distributions
433
434
```python
435
import cupy as cp
436
437
# Generate data from multiple distributions
438
normal_data = cp.random.normal(0, 1, 5000)
439
uniform_data = cp.random.uniform(-3, 3, 5000)
440
441
# Compute histograms
442
hist_normal, bins_normal = cp.histogram(normal_data, bins=50, density=True)
443
hist_uniform, bins_uniform = cp.histogram(uniform_data, bins=50, density=True)
444
445
# 2D histogram
446
x = cp.random.normal(0, 1, 1000)
447
y = x + cp.random.normal(0, 0.5, 1000) # Correlated data
448
hist_2d, xedges, yedges = cp.histogram2d(x, y, bins=20)
449
```