0
# Array Statistics and Aggregations
1
2
Statistical and reduction operations for analyzing array data. These functions compute summary statistics, perform aggregations along specified axes, and include NaN-aware versions for handling missing data.
3
4
## Capabilities
5
6
### Basic Statistical Functions
7
8
Fundamental statistical measures for array data.
9
10
```python { .api }
11
def sum(a, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
12
"""
13
Sum of array elements over given axis.
14
15
Parameters:
16
- a: array_like, input array
17
- axis: None or int or tuple of ints, axis to sum over
18
- dtype: data-type, type of returned array and accumulator
19
- out: ndarray, output array to place result
20
- keepdims: bool, keep reduced dimensions as size 1
21
- initial: scalar, starting value for sum
22
- where: array_like of bool, elements to include in sum
23
24
Returns:
25
ndarray or scalar: Sum of array elements
26
"""
27
28
def prod(a, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
29
"""
30
Return product of array elements over given axis.
31
32
Parameters:
33
- a: array_like, input array
34
- axis: None or int or tuple of ints, axis to compute product over
35
- dtype: data-type, type of returned array and accumulator
36
- out: ndarray, output array to place result
37
- keepdims: bool, keep reduced dimensions as size 1
38
- initial: scalar, starting value for product
39
- where: array_like of bool, elements to include in product
40
41
Returns:
42
ndarray or scalar: Product of array elements
43
"""
44
45
def mean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
46
"""
47
Compute arithmetic mean along specified axis.
48
49
Parameters:
50
- a: array_like, input array
51
- axis: None or int or tuple of ints, axis to compute mean over
52
- dtype: data-type, type of returned array and accumulator
53
- out: ndarray, output array to place result
54
- keepdims: bool, keep reduced dimensions as size 1
55
- where: array_like of bool, elements to include in mean
56
57
Returns:
58
ndarray or scalar: Arithmetic mean of array elements
59
"""
60
61
def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
62
"""
63
Compute standard deviation along specified axis.
64
65
Parameters:
66
- a: array_like, input array
67
- axis: None or int or tuple of ints, axis to compute std over
68
- dtype: data-type, type of returned array and accumulator
69
- out: ndarray, output array to place result
70
- ddof: int, delta degrees of freedom
71
- keepdims: bool, keep reduced dimensions as size 1
72
- where: array_like of bool, elements to include in std
73
74
Returns:
75
ndarray or scalar: Standard deviation of array elements
76
"""
77
78
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
79
"""
80
Compute variance along specified axis.
81
82
Parameters:
83
- a: array_like, input array
84
- axis: None or int or tuple of ints, axis to compute variance over
85
- dtype: data-type, type of returned array and accumulator
86
- out: ndarray, output array to place result
87
- ddof: int, delta degrees of freedom
88
- keepdims: bool, keep reduced dimensions as size 1
89
- where: array_like of bool, elements to include in variance
90
91
Returns:
92
ndarray or scalar: Variance of array elements
93
"""
94
```
95
96
### Minimum and Maximum Functions
97
98
Find minimum and maximum values in arrays.
99
100
```python { .api }
101
def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
102
"""
103
Return minimum of array or minimum along axis.
104
105
Parameters:
106
- a: array_like, input array
107
- axis: None or int or tuple of ints, axis to find minimum over
108
- out: ndarray, output array to place result
109
- keepdims: bool, keep reduced dimensions as size 1
110
- initial: scalar, maximum value of output element
111
- where: array_like of bool, elements to compare for minimum
112
113
Returns:
114
ndarray or scalar: Minimum of array elements
115
"""
116
117
def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
118
"""
119
Return maximum of array or maximum along axis.
120
121
Parameters:
122
- a: array_like, input array
123
- axis: None or int or tuple of ints, axis to find maximum over
124
- out: ndarray, output array to place result
125
- keepdims: bool, keep reduced dimensions as size 1
126
- initial: scalar, minimum value of output element
127
- where: array_like of bool, elements to compare for maximum
128
129
Returns:
130
ndarray or scalar: Maximum of array elements
131
"""
132
133
def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
134
"""
135
Return minimum of array or minimum along axis.
136
137
Parameters:
138
- a: array_like, input array
139
- axis: None or int or tuple of ints, axis to find minimum over
140
- out: ndarray, output array to place result
141
- keepdims: bool, keep reduced dimensions as size 1
142
- initial: scalar, maximum value of output element
143
- where: array_like of bool, elements to compare for minimum
144
145
Returns:
146
ndarray or scalar: Minimum of array elements
147
"""
148
149
def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
150
"""
151
Return maximum of array or maximum along axis.
152
153
Parameters:
154
- a: array_like, input array
155
- axis: None or int or tuple of ints, axis to find maximum over
156
- out: ndarray, output array to place result
157
- keepdims: bool, keep reduced dimensions as size 1
158
- initial: scalar, minimum value of output element
159
- where: array_like of bool, elements to compare for maximum
160
161
Returns:
162
ndarray or scalar: Maximum of array elements
163
"""
164
165
def ptp(a, axis=None, out=None, keepdims=False):
166
"""
167
Range of values (maximum - minimum) along axis.
168
169
Parameters:
170
- a: array_like, input array
171
- axis: None or int or tuple of ints, axis along which to find range
172
- out: ndarray, output array to place result
173
- keepdims: bool, keep reduced dimensions as size 1
174
175
Returns:
176
ndarray or scalar: Range of array elements
177
"""
178
```
179
180
### Percentile and Quantile Functions
181
182
Statistical measures based on data distribution.
183
184
```python { .api }
185
def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
186
"""
187
Compute median along specified axis.
188
189
Parameters:
190
- a: array_like, input array
191
- axis: None or int or tuple of ints, axis to compute median over
192
- out: ndarray, output array to place result
193
- overwrite_input: bool, allow overwriting input array
194
- keepdims: bool, keep reduced dimensions as size 1
195
196
Returns:
197
ndarray or scalar: Median of array elements
198
"""
199
200
def percentile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
201
"""
202
Compute qth percentile along specified axis.
203
204
Parameters:
205
- a: array_like, input array
206
- q: array_like of float, percentile(s) to compute (0-100)
207
- axis: None or int or tuple of ints, axis to compute percentiles over
208
- out: ndarray, output array to place result
209
- overwrite_input: bool, allow overwriting input array
210
- method: str, interpolation method
211
- keepdims: bool, keep reduced dimensions as size 1
212
213
Returns:
214
ndarray or scalar: Percentile(s) of array elements
215
"""
216
217
def quantile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
218
"""
219
Compute qth quantile along specified axis.
220
221
Parameters:
222
- a: array_like, input array
223
- q: array_like of float, quantile(s) to compute (0-1)
224
- axis: None or int or tuple of ints, axis to compute quantiles over
225
- out: ndarray, output array to place result
226
- overwrite_input: bool, allow overwriting input array
227
- method: str, interpolation method
228
- keepdims: bool, keep reduced dimensions as size 1
229
230
Returns:
231
ndarray or scalar: Quantile(s) of array elements
232
"""
233
```
234
235
### Logical Aggregation Functions
236
237
Boolean reduction operations.
238
239
```python { .api }
240
def all(a, axis=None, out=None, keepdims=False, where=True):
241
"""
242
Test whether all array elements along axis evaluate to True.
243
244
Parameters:
245
- a: array_like, input array
246
- axis: None or int or tuple of ints, axis to evaluate over
247
- out: ndarray, output array to place result
248
- keepdims: bool, keep reduced dimensions as size 1
249
- where: array_like of bool, elements to include in evaluation
250
251
Returns:
252
ndarray or bool: True if all elements evaluate to True
253
"""
254
255
def any(a, axis=None, out=None, keepdims=False, where=True):
256
"""
257
Test whether any array element along axis evaluates to True.
258
259
Parameters:
260
- a: array_like, input array
261
- axis: None or int or tuple of ints, axis to evaluate over
262
- out: ndarray, output array to place result
263
- keepdims: bool, keep reduced dimensions as size 1
264
- where: array_like of bool, elements to include in evaluation
265
266
Returns:
267
ndarray or bool: True if any element evaluates to True
268
"""
269
270
def count_nonzero(a, axis=None, keepdims=False):
271
"""
272
Count number of nonzero values in array.
273
274
Parameters:
275
- a: array_like, input array
276
- axis: None or int or tuple of ints, axis to count over
277
- keepdims: bool, keep reduced dimensions as size 1
278
279
Returns:
280
ndarray or int: Number of nonzero values
281
"""
282
```
283
284
### Cumulative Functions
285
286
Cumulative operations along array axes.
287
288
```python { .api }
289
def cumsum(a, axis=None, dtype=None, out=None):
290
"""
291
Return cumulative sum of elements along axis.
292
293
Parameters:
294
- a: array_like, input array
295
- axis: int, axis along which cumulative sum is computed
296
- dtype: data-type, type of returned array
297
- out: ndarray, output array to place result
298
299
Returns:
300
ndarray: Cumulative sum along specified axis
301
"""
302
303
def cumprod(a, axis=None, dtype=None, out=None):
304
"""
305
Return cumulative product of elements along axis.
306
307
Parameters:
308
- a: array_like, input array
309
- axis: int, axis along which cumulative product is computed
310
- dtype: data-type, type of returned array
311
- out: ndarray, output array to place result
312
313
Returns:
314
ndarray: Cumulative product along specified axis
315
"""
316
317
def cumulative_sum(a, axis=None, dtype=None, out=None):
318
"""
319
Return cumulative sum of elements along axis.
320
321
Parameters:
322
- a: array_like, input array
323
- axis: int, axis along which cumulative sum is computed
324
- dtype: data-type, type of returned array
325
- out: ndarray, output array to place result
326
327
Returns:
328
ndarray: Cumulative sum along specified axis
329
"""
330
331
def cumulative_prod(a, axis=None, dtype=None, out=None):
332
"""
333
Return cumulative product of elements along axis.
334
335
Parameters:
336
- a: array_like, input array
337
- axis: int, axis along which cumulative product is computed
338
- dtype: data-type, type of returned array
339
- out: ndarray, output array to place result
340
341
Returns:
342
ndarray: Cumulative product along specified axis
343
"""
344
```
345
346
### NaN-aware Statistical Functions
347
348
Statistical functions that handle NaN values appropriately.
349
350
```python { .api }
351
def nansum(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
352
"""
353
Return sum of array elements over given axis treating NaNs as zero.
354
355
Parameters:
356
- a: array_like, input array
357
- axis: None or int or tuple of ints, axis to sum over
358
- dtype: data-type, type of returned array and accumulator
359
- out: ndarray, output array to place result
360
- keepdims: bool, keep reduced dimensions as size 1
361
- where: array_like of bool, elements to include in sum
362
363
Returns:
364
ndarray or scalar: Sum ignoring NaN values
365
"""
366
367
def nanprod(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
368
"""
369
Return product of array elements over given axis treating NaNs as one.
370
371
Parameters:
372
- a: array_like, input array
373
- axis: None or int or tuple of ints, axis to compute product over
374
- dtype: data-type, type of returned array and accumulator
375
- out: ndarray, output array to place result
376
- keepdims: bool, keep reduced dimensions as size 1
377
- where: array_like of bool, elements to include in product
378
379
Returns:
380
ndarray or scalar: Product ignoring NaN values
381
"""
382
383
def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, where=True):
384
"""
385
Compute arithmetic mean along specified axis, ignoring NaNs.
386
387
Parameters:
388
- a: array_like, input array
389
- axis: None or int or tuple of ints, axis to compute mean over
390
- dtype: data-type, type of returned array and accumulator
391
- out: ndarray, output array to place result
392
- keepdims: bool, keep reduced dimensions as size 1
393
- where: array_like of bool, elements to include in mean
394
395
Returns:
396
ndarray or scalar: Mean ignoring NaN values
397
"""
398
399
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
400
"""
401
Compute standard deviation along specified axis, ignoring NaNs.
402
403
Parameters:
404
- a: array_like, input array
405
- axis: None or int or tuple of ints, axis to compute std over
406
- dtype: data-type, type of returned array and accumulator
407
- out: ndarray, output array to place result
408
- ddof: int, delta degrees of freedom
409
- keepdims: bool, keep reduced dimensions as size 1
410
- where: array_like of bool, elements to include in std
411
412
Returns:
413
ndarray or scalar: Standard deviation ignoring NaN values
414
"""
415
416
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, where=True):
417
"""
418
Compute variance along specified axis, ignoring NaNs.
419
420
Parameters:
421
- a: array_like, input array
422
- axis: None or int or tuple of ints, axis to compute variance over
423
- dtype: data-type, type of returned array and accumulator
424
- out: ndarray, output array to place result
425
- ddof: int, delta degrees of freedom
426
- keepdims: bool, keep reduced dimensions as size 1
427
- where: array_like of bool, elements to include in variance
428
429
Returns:
430
ndarray or scalar: Variance ignoring NaN values
431
"""
432
433
def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
434
"""
435
Return minimum of array or minimum along axis, ignoring NaNs.
436
437
Parameters:
438
- a: array_like, input array
439
- axis: None or int or tuple of ints, axis to find minimum over
440
- out: ndarray, output array to place result
441
- keepdims: bool, keep reduced dimensions as size 1
442
- initial: scalar, maximum value of output element
443
- where: array_like of bool, elements to compare for minimum
444
445
Returns:
446
ndarray or scalar: Minimum ignoring NaN values
447
"""
448
449
def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
450
"""
451
Return maximum of array or maximum along axis, ignoring NaNs.
452
453
Parameters:
454
- a: array_like, input array
455
- axis: None or int or tuple of ints, axis to find maximum over
456
- out: ndarray, output array to place result
457
- keepdims: bool, keep reduced dimensions as size 1
458
- initial: scalar, minimum value of output element
459
- where: array_like of bool, elements to compare for maximum
460
461
Returns:
462
ndarray or scalar: Maximum ignoring NaN values
463
"""
464
465
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
466
"""
467
Compute median along specified axis, ignoring NaNs.
468
469
Parameters:
470
- a: array_like, input array
471
- axis: None or int or tuple of ints, axis to compute median over
472
- out: ndarray, output array to place result
473
- overwrite_input: bool, allow overwriting input array
474
- keepdims: bool, keep reduced dimensions as size 1
475
476
Returns:
477
ndarray or scalar: Median ignoring NaN values
478
"""
479
480
def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
481
"""
482
Compute qth percentile along specified axis, ignoring NaNs.
483
484
Parameters:
485
- a: array_like, input array
486
- q: array_like of float, percentile(s) to compute (0-100)
487
- axis: None or int or tuple of ints, axis to compute percentiles over
488
- out: ndarray, output array to place result
489
- overwrite_input: bool, allow overwriting input array
490
- method: str, interpolation method
491
- keepdims: bool, keep reduced dimensions as size 1
492
493
Returns:
494
ndarray or scalar: Percentile(s) ignoring NaN values
495
"""
496
497
def nanquantile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):
498
"""
499
Compute qth quantile along specified axis, ignoring NaNs.
500
501
Parameters:
502
- a: array_like, input array
503
- q: array_like of float, quantile(s) to compute (0-1)
504
- axis: None or int or tuple of ints, axis to compute quantiles over
505
- out: ndarray, output array to place result
506
- overwrite_input: bool, allow overwriting input array
507
- method: str, interpolation method
508
- keepdims: bool, keep reduced dimensions as size 1
509
510
Returns:
511
ndarray or scalar: Quantile(s) ignoring NaN values
512
"""
513
514
def nancumsum(a, axis=None, dtype=None, out=None):
515
"""
516
Return cumulative sum along axis, treating NaNs as zero.
517
518
Parameters:
519
- a: array_like, input array
520
- axis: int, axis along which cumulative sum is computed
521
- dtype: data-type, type of returned array
522
- out: ndarray, output array to place result
523
524
Returns:
525
ndarray: Cumulative sum ignoring NaN values
526
"""
527
528
def nancumprod(a, axis=None, dtype=None, out=None):
529
"""
530
Return cumulative product along axis, treating NaNs as one.
531
532
Parameters:
533
- a: array_like, input array
534
- axis: int, axis along which cumulative product is computed
535
- dtype: data-type, type of returned array
536
- out: ndarray, output array to place result
537
538
Returns:
539
ndarray: Cumulative product ignoring NaN values
540
"""
541
```
542
543
## Usage Examples
544
545
### Basic Statistics
546
547
```python
548
import numpy as np
549
550
# Sample data
551
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
552
553
# Basic statistics
554
total_sum = np.sum(data) # 45
555
mean_val = np.mean(data) # 5.0
556
std_val = np.std(data) # 2.58
557
min_val = np.min(data) # 1
558
max_val = np.max(data) # 9
559
560
# Along specific axes
561
row_sums = np.sum(data, axis=1) # [6, 15, 24]
562
col_means = np.mean(data, axis=0) # [4.0, 5.0, 6.0]
563
```
564
565
### Percentiles and Quantiles
566
567
```python
568
import numpy as np
569
570
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
571
572
# Percentiles
573
median = np.median(data) # 5.5
574
q25 = np.percentile(data, 25) # 3.25
575
q75 = np.percentile(data, 75) # 7.75
576
577
# Multiple percentiles
578
quartiles = np.percentile(data, [25, 50, 75]) # [3.25, 5.5, 7.75]
579
```
580
581
### Handling NaN Values
582
583
```python
584
import numpy as np
585
586
# Data with NaN values
587
data_with_nan = np.array([1, 2, np.nan, 4, 5, np.nan])
588
589
# Regular functions return NaN
590
regular_mean = np.mean(data_with_nan) # nan
591
regular_sum = np.sum(data_with_nan) # nan
592
593
# NaN-aware functions ignore NaN
594
nan_mean = np.nanmean(data_with_nan) # 3.0
595
nan_sum = np.nansum(data_with_nan) # 12.0
596
nan_max = np.nanmax(data_with_nan) # 5.0
597
```
598
599
### Cumulative Operations
600
601
```python
602
import numpy as np
603
604
arr = np.array([1, 2, 3, 4, 5])
605
606
# Cumulative operations
607
cumsum = np.cumsum(arr) # [1, 3, 6, 10, 15]
608
cumprod = np.cumprod(arr) # [1, 2, 6, 24, 120]
609
610
# Along specific axis for multi-dimensional arrays
611
matrix = np.array([[1, 2], [3, 4]])
612
cumsum_axis0 = np.cumsum(matrix, axis=0) # [[1, 2], [4, 6]]
613
cumsum_axis1 = np.cumsum(matrix, axis=1) # [[1, 3], [3, 7]]
614
```