0
# Mathematical and Statistical Operations
1
2
Full suite of mathematical operations including reductions, element-wise functions, linear algebra, and statistical analysis that handle missing data and nested structures appropriately. These operations extend NumPy's capabilities to work seamlessly with irregular, nested data.
3
4
## Capabilities
5
6
### Reduction Operations
7
8
Functions that reduce arrays along specified axes, properly handling variable-length lists and missing values to compute aggregate statistics.
9
10
```python { .api }
11
def sum(array, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
12
"""
13
Sum array elements along specified axis.
14
15
Parameters:
16
- array: Array-like data to sum
17
- axis: int or None, axis along which to sum (None for all axes)
18
- keepdims: bool, if True keep reduced axes as size-one dimensions
19
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
20
- highlevel: bool, if True return Array, if False return Content layout
21
- behavior: dict, custom behavior for the result
22
- attrs: dict, metadata attributes for the result
23
24
Returns:
25
Array containing sums along specified axis
26
"""
27
28
def prod(array, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
29
"""
30
Product of array elements along specified axis.
31
32
Parameters:
33
- array: Array to compute product of
34
- axis: int or None, axis along which to compute product
35
- keepdims: bool, if True keep reduced axes as size-one dimensions
36
- mask_identity: bool, if False return identity (1) for empty sequences, if True return None
37
- highlevel: bool, if True return Array, if False return Content layout
38
- behavior: dict, custom behavior for the result
39
- attrs: dict, metadata attributes for the result
40
41
Returns:
42
Array containing products along specified axis
43
"""
44
45
def count(array, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
46
"""
47
Count non-None elements along specified axis.
48
49
Parameters:
50
- array: Array to count elements in
51
- axis: int or None, axis along which to count
52
- keepdims: bool, if True keep reduced axes as size-one dimensions
53
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
54
- highlevel: bool, if True return Array, if False return Content layout
55
- behavior: dict, custom behavior for the result
56
- attrs: dict, metadata attributes for the result
57
58
Returns:
59
Array containing counts of non-None elements
60
"""
61
62
def count_nonzero(array, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
63
"""
64
Count non-zero elements along specified axis.
65
66
Parameters:
67
- array: Array to count non-zero elements in
68
- axis: int or None, axis along which to count
69
- keepdims: bool, if True keep reduced axes as size-one dimensions
70
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
71
- highlevel: bool, if True return Array, if False return Content layout
72
- behavior: dict, custom behavior for the result
73
- attrs: dict, metadata attributes for the result
74
75
Returns:
76
Array containing counts of non-zero elements
77
"""
78
79
def any(array, axis=None, keepdims=False, mask_identity=True):
80
"""
81
Test if any elements along axis evaluate to True.
82
83
Parameters:
84
- array: Array to test
85
- axis: int or None, axis along which to test
86
- keepdims: bool, if True keep reduced axes as size-one dimensions
87
- mask_identity: bool, if True return None for empty sequences
88
89
Returns:
90
Array of booleans indicating if any elements are True
91
"""
92
93
def all(array, axis=None, keepdims=False, mask_identity=True):
94
"""
95
Test if all elements along axis evaluate to True.
96
97
Parameters:
98
- array: Array to test
99
- axis: int or None, axis along which to test
100
- keepdims: bool, if True keep reduced axes as size-one dimensions
101
- mask_identity: bool, if True return None for empty sequences
102
103
Returns:
104
Array of booleans indicating if all elements are True
105
"""
106
```
107
108
### Min/Max Operations
109
110
Functions for finding minimum and maximum values and their positions, with support for NaN handling and proper treatment of empty sequences.
111
112
```python { .api }
113
def min(array, axis=None, *, keepdims=False, initial=None, mask_identity=True, highlevel=True, behavior=None, attrs=None):
114
"""
115
Find minimum values along specified axis.
116
117
Parameters:
118
- array: Array to find minimum in
119
- axis: int or None, axis along which to find minimum
120
- keepdims: bool, if True keep reduced axes as size-one dimensions
121
- initial: value, initial value for minimum search
122
- mask_identity: bool, if True return None for empty sequences
123
- highlevel: bool, if True return Array, if False return Content layout
124
- behavior: dict, custom behavior for the result
125
- attrs: dict, metadata attributes for the result
126
127
Returns:
128
Array containing minimum values along specified axis
129
"""
130
131
def max(array, axis=None, *, keepdims=False, initial=None, mask_identity=True, highlevel=True, behavior=None, attrs=None):
132
"""
133
Find maximum values along specified axis.
134
135
Parameters:
136
- array: Array to find maximum in
137
- axis: int or None, axis along which to find maximum
138
- keepdims: bool, if True keep reduced axes as size-one dimensions
139
- initial: value, initial value for maximum search
140
- mask_identity: bool, if True return None for empty sequences
141
- highlevel: bool, if True return Array, if False return Content layout
142
- behavior: dict, custom behavior for the result
143
- attrs: dict, metadata attributes for the result
144
145
Returns:
146
Array containing maximum values along specified axis
147
"""
148
149
def argmin(array, axis=None, *, keepdims=False, mask_identity=True, highlevel=True, behavior=None, attrs=None):
150
"""
151
Find indices of minimum values along specified axis.
152
153
Parameters:
154
- array: Array to find minimum indices in
155
- axis: int or None, axis along which to find argmin
156
- keepdims: bool, if True keep reduced axes as size-one dimensions
157
- mask_identity: bool, if True return None for empty sequences
158
- highlevel: bool, if True return Array, if False return Content layout
159
- behavior: dict, custom behavior for the result
160
- attrs: dict, metadata attributes for the result
161
162
Returns:
163
Array containing indices of minimum values
164
"""
165
166
def argmax(array, axis=None, *, keepdims=False, mask_identity=True, highlevel=True, behavior=None, attrs=None):
167
"""
168
Find indices of maximum values along specified axis.
169
170
Parameters:
171
- array: Array to find maximum indices in
172
- axis: int or None, axis along which to find argmax
173
- keepdims: bool, if True keep reduced axes as size-one dimensions
174
- mask_identity: bool, if True return None for empty sequences
175
- highlevel: bool, if True return Array, if False return Content layout
176
- behavior: dict, custom behavior for the result
177
- attrs: dict, metadata attributes for the result
178
179
Returns:
180
Array containing indices of maximum values
181
"""
182
183
def nanmin(array, axis=None, keepdims=False, mask_identity=True):
184
"""
185
Find minimum values ignoring NaN along specified axis.
186
187
Parameters:
188
- array: Array to find minimum in
189
- axis: int or None, axis along which to find minimum
190
- keepdims: bool, if True keep reduced axes as size-one dimensions
191
- mask_identity: bool, if True return None for empty sequences
192
193
Returns:
194
Array containing minimum values ignoring NaN
195
"""
196
197
def nanmax(array, axis=None, keepdims=False, mask_identity=True):
198
"""
199
Find maximum values ignoring NaN along specified axis.
200
201
Parameters:
202
- array: Array to find maximum in
203
- axis: int or None, axis along which to find maximum
204
- keepdims: bool, if True keep reduced axes as size-one dimensions
205
- mask_identity: bool, if True return None for empty sequences
206
207
Returns:
208
Array containing maximum values ignoring NaN
209
"""
210
211
def nanargmin(array, axis=None, keepdims=False, mask_identity=True):
212
"""
213
Find indices of minimum values ignoring NaN.
214
215
Parameters:
216
- array: Array to find minimum indices in
217
- axis: int or None, axis along which to find argmin
218
- keepdims: bool, if True keep reduced axes as size-one dimensions
219
- mask_identity: bool, if True return None for empty sequences
220
221
Returns:
222
Array containing indices of minimum values ignoring NaN
223
"""
224
225
def nanargmax(array, axis=None, keepdims=False, mask_identity=True):
226
"""
227
Find indices of maximum values ignoring NaN.
228
229
Parameters:
230
- array: Array to find maximum indices in
231
- axis: int or None, axis along which to find argmax
232
- keepdims: bool, if True keep reduced axes as size-one dimensions
233
- mask_identity: bool, if True return None for empty sequences
234
235
Returns:
236
Array containing indices of maximum values ignoring NaN
237
"""
238
239
def ptp(array, axis=None, keepdims=False, mask_identity=True):
240
"""
241
Range (peak-to-peak) along specified axis.
242
243
Parameters:
244
- array: Array to compute range for
245
- axis: int or None, axis along which to compute range
246
- keepdims: bool, if True keep reduced axes as size-one dimensions
247
- mask_identity: bool, if True return None for empty sequences
248
249
Returns:
250
Array containing range (max - min) along specified axis
251
"""
252
```
253
254
### Statistical Functions
255
256
Comprehensive statistical operations including moments, variance, standard deviation, and correlation analysis with proper handling of nested data structures.
257
258
```python { .api }
259
def mean(x, weight=None, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
260
"""
261
Arithmetic mean along specified axis.
262
263
Parameters:
264
- x: Array to compute mean of
265
- weight: Array, optional weights for weighted mean
266
- axis: int or None, axis along which to compute mean
267
- keepdims: bool, if True keep reduced axes as size-one dimensions
268
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
269
- highlevel: bool, if True return Array, if False return Content layout
270
- behavior: dict, custom behavior for the result
271
- attrs: dict, metadata attributes for the result
272
273
Returns:
274
Array containing arithmetic mean along specified axis
275
"""
276
277
def var(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
278
"""
279
Variance along specified axis.
280
281
Parameters:
282
- x: Array to compute variance of
283
- weight: Array, optional weights for weighted variance
284
- ddof: int, delta degrees of freedom for denominator
285
- axis: int or None, axis along which to compute variance
286
- keepdims: bool, if True keep reduced axes as size-one dimensions
287
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
288
- highlevel: bool, if True return Array, if False return Content layout
289
- behavior: dict, custom behavior for the result
290
- attrs: dict, metadata attributes for the result
291
292
Returns:
293
Array containing variance along specified axis
294
"""
295
296
def std(x, weight=None, ddof=0, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
297
"""
298
Standard deviation along specified axis.
299
300
Parameters:
301
- x: Array to compute standard deviation of
302
- weight: Array, optional weights for weighted standard deviation
303
- ddof: int, delta degrees of freedom for denominator
304
- axis: int or None, axis along which to compute std dev
305
- keepdims: bool, if True keep reduced axes as size-one dimensions
306
- mask_identity: bool, if False return identity (0) for empty sequences, if True return None
307
- highlevel: bool, if True return Array, if False return Content layout
308
- behavior: dict, custom behavior for the result
309
- attrs: dict, metadata attributes for the result
310
311
Returns:
312
Array containing standard deviation along specified axis
313
"""
314
315
def moment(array, n, axis=None, keepdims=False, mask_identity=True):
316
"""
317
Calculate the nth moment about the mean.
318
319
Parameters:
320
- array: Array to compute moment of
321
- n: int, order of the moment
322
- axis: int or None, axis along which to compute moment
323
- keepdims: bool, if True keep reduced axes as size-one dimensions
324
- mask_identity: bool, if True return None for empty sequences
325
326
Returns:
327
Array containing nth moment along specified axis
328
"""
329
330
def nansum(array, axis=None, keepdims=False):
331
"""
332
Sum ignoring NaN values along specified axis.
333
334
Parameters:
335
- array: Array to sum
336
- axis: int or None, axis along which to sum
337
- keepdims: bool, if True keep reduced axes as size-one dimensions
338
339
Returns:
340
Array containing sums ignoring NaN values
341
"""
342
343
def nanprod(array, axis=None, keepdims=False):
344
"""
345
Product ignoring NaN values along specified axis.
346
347
Parameters:
348
- array: Array to compute product of
349
- axis: int or None, axis along which to compute product
350
- keepdims: bool, if True keep reduced axes as size-one dimensions
351
352
Returns:
353
Array containing products ignoring NaN values
354
"""
355
356
def nanmean(array, axis=None, keepdims=False):
357
"""
358
Arithmetic mean ignoring NaN values.
359
360
Parameters:
361
- array: Array to compute mean of
362
- axis: int or None, axis along which to compute mean
363
- keepdims: bool, if True keep reduced axes as size-one dimensions
364
365
Returns:
366
Array containing arithmetic mean ignoring NaN values
367
"""
368
369
def nanvar(array, axis=None, ddof=0, keepdims=False):
370
"""
371
Variance ignoring NaN values.
372
373
Parameters:
374
- array: Array to compute variance of
375
- axis: int or None, axis along which to compute variance
376
- ddof: int, delta degrees of freedom for denominator
377
- keepdims: bool, if True keep reduced axes as size-one dimensions
378
379
Returns:
380
Array containing variance ignoring NaN values
381
"""
382
383
def nanstd(array, axis=None, ddof=0, keepdims=False):
384
"""
385
Standard deviation ignoring NaN values.
386
387
Parameters:
388
- array: Array to compute standard deviation of
389
- axis: int or None, axis along which to compute std dev
390
- ddof: int, delta degrees of freedom for denominator
391
- keepdims: bool, if True keep reduced axes as size-one dimensions
392
393
Returns:
394
Array containing standard deviation ignoring NaN values
395
"""
396
```
397
398
### Linear Algebra and Regression
399
400
Functions for linear algebra operations and statistical modeling on nested data structures.
401
402
```python { .api }
403
def linear_fit(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
404
"""
405
Perform linear regression fit.
406
407
Parameters:
408
- x: Array of independent variable values
409
- y: Array of dependent variable values
410
- weight: Array, optional weights for weighted regression
411
- axis: int or None, axis along which to perform regression
412
- keepdims: bool, if True keep reduced axes as size-one dimensions
413
- mask_identity: bool, if False return identity for empty sequences, if True return None
414
- highlevel: bool, if True return Array, if False return Content layout
415
- behavior: dict, custom behavior for the result
416
- attrs: dict, metadata attributes for the result
417
418
Returns:
419
Array containing regression parameters (intercept, slope)
420
"""
421
422
def corr(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
423
"""
424
Calculate Pearson correlation coefficient.
425
426
Parameters:
427
- x: Array of first variable values
428
- y: Array of second variable values
429
- weight: Array, optional weights for weighted correlation
430
- axis: int or None, axis along which to compute correlation
431
- keepdims: bool, if True keep reduced axes as size-one dimensions
432
- mask_identity: bool, if False return identity for empty sequences, if True return None
433
- highlevel: bool, if True return Array, if False return Content layout
434
- behavior: dict, custom behavior for the result
435
- attrs: dict, metadata attributes for the result
436
437
Returns:
438
Array containing correlation coefficients
439
"""
440
441
def covar(x, y, weight=None, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None):
442
"""
443
Calculate covariance.
444
445
Parameters:
446
- x: Array of first variable values
447
- y: Array of second variable values
448
- weight: Array, optional weights for weighted covariance
449
- axis: int or None, axis along which to compute covariance
450
- keepdims: bool, if True keep reduced axes as size-one dimensions
451
- mask_identity: bool, if False return identity for empty sequences, if True return None
452
- highlevel: bool, if True return Array, if False return Content layout
453
- behavior: dict, custom behavior for the result
454
- attrs: dict, metadata attributes for the result
455
456
Returns:
457
Array containing covariance values
458
"""
459
```
460
461
### Complex Number Operations
462
463
Mathematical functions specifically for complex number arrays, including component extraction and phase calculations.
464
465
```python { .api }
466
def real(array):
467
"""
468
Extract real part of complex numbers.
469
470
Parameters:
471
- array: Array of complex numbers
472
473
Returns:
474
Array containing real parts
475
"""
476
477
def imag(array):
478
"""
479
Extract imaginary part of complex numbers.
480
481
Parameters:
482
- array: Array of complex numbers
483
484
Returns:
485
Array containing imaginary parts
486
"""
487
488
def angle(array, deg=False):
489
"""
490
Calculate phase angle of complex numbers.
491
492
Parameters:
493
- array: Array of complex numbers
494
- deg: bool, if True return angles in degrees (default radians)
495
496
Returns:
497
Array containing phase angles
498
"""
499
```
500
501
### Special Mathematical Functions
502
503
Specialized mathematical operations including rounding, special value handling, and activation functions.
504
505
```python { .api }
506
def round(array, decimals=0):
507
"""
508
Round array elements to specified number of decimals.
509
510
Parameters:
511
- array: Array to round
512
- decimals: int, number of decimal places to round to
513
514
Returns:
515
Array with rounded values
516
"""
517
518
def nan_to_none(array):
519
"""
520
Convert NaN values to None.
521
522
Parameters:
523
- array: Array to convert
524
525
Returns:
526
Array with NaN values replaced by None
527
"""
528
529
def nan_to_num(array, nan=0.0, posinf=None, neginf=None):
530
"""
531
Convert NaN and infinity values to finite numbers.
532
533
Parameters:
534
- array: Array to convert
535
- nan: float, value to replace NaN with
536
- posinf: float, value to replace positive infinity with
537
- neginf: float, value to replace negative infinity with
538
539
Returns:
540
Array with special values replaced by finite numbers
541
"""
542
543
def softmax(array, axis=None):
544
"""
545
Compute softmax activation function.
546
547
Parameters:
548
- array: Array to apply softmax to
549
- axis: int or None, axis along which to compute softmax
550
551
Returns:
552
Array with softmax values (probabilities summing to 1)
553
"""
554
```
555
556
### Array Comparison and Equality
557
558
Functions for comparing arrays and testing approximate equality with appropriate handling of nested structures and missing values.
559
560
```python { .api }
561
def array_equal(a, b, check_parameters=True, check_type=True):
562
"""
563
Test if two arrays are exactly equal.
564
565
Parameters:
566
- a: First Array to compare
567
- b: Second Array to compare
568
- check_parameters: bool, if True compare type parameters
569
- check_type: bool, if True compare type information
570
571
Returns:
572
bool indicating if arrays are equal
573
"""
574
575
def almost_equal(a, b, rtol=1e-05, atol=1e-08, check_parameters=True, check_type=True):
576
"""
577
Test if two arrays are approximately equal.
578
579
Parameters:
580
- a: First Array to compare
581
- b: Second Array to compare
582
- rtol: float, relative tolerance
583
- atol: float, absolute tolerance
584
- check_parameters: bool, if True compare type parameters
585
- check_type: bool, if True compare type information
586
587
Returns:
588
bool indicating if arrays are approximately equal
589
"""
590
591
def isclose(a, b, rtol=1e-05, atol=1e-08):
592
"""
593
Element-wise test for approximate equality.
594
595
Parameters:
596
- a: First Array to compare
597
- b: Second Array to compare
598
- rtol: float, relative tolerance
599
- atol: float, absolute tolerance
600
601
Returns:
602
Array of booleans indicating element-wise approximate equality
603
"""
604
```
605
606
### Sorting and Ordering
607
608
Functions for sorting array elements and finding element positions, with proper handling of nested structures.
609
610
```python { .api }
611
def sort(array, axis=-1, ascending=True, stable=True):
612
"""
613
Sort array elements along specified axis.
614
615
Parameters:
616
- array: Array to sort
617
- axis: int, axis along which to sort
618
- ascending: bool, if True sort in ascending order
619
- stable: bool, if True use stable sorting algorithm
620
621
Returns:
622
Array with sorted elements
623
"""
624
625
def argsort(array, axis=-1, ascending=True, stable=True):
626
"""
627
Get indices that would sort array along specified axis.
628
629
Parameters:
630
- array: Array to get sort indices for
631
- axis: int, axis along which to sort
632
- ascending: bool, if True sort in ascending order
633
- stable: bool, if True use stable sorting algorithm
634
635
Returns:
636
Array containing indices that sort the input array
637
"""
638
639
def local_index(array, axis=-1):
640
"""
641
Get local position indices within each list.
642
643
Parameters:
644
- array: Array to get local indices for
645
- axis: int, axis along which to compute local indices
646
647
Returns:
648
Array containing local position indices (0, 1, 2, ... within each list)
649
"""
650
651
def run_lengths(array):
652
"""
653
Compute run-length encoding of array.
654
655
Parameters:
656
- array: Array to compute run lengths for
657
658
Returns:
659
tuple of (values, lengths) arrays representing run-length encoding
660
"""
661
```
662
663
## Usage Examples
664
665
### Basic Reductions
666
667
```python
668
import awkward as ak
669
670
# Nested data with variable-length lists
671
data = ak.Array([[1, 2, 3], [4], [5, 6, 7, 8]])
672
673
# Sum along inner axis (sum each list)
674
inner_sums = ak.sum(data, axis=1) # [6, 4, 26]
675
676
# Sum all elements
677
total = ak.sum(data) # 36
678
679
# Count elements in each list
680
counts = ak.count(data, axis=1) # [3, 1, 4]
681
682
# Mean of each list
683
means = ak.mean(data, axis=1) # [2.0, 4.0, 6.5]
684
```
685
686
### Statistical Analysis
687
688
```python
689
import awkward as ak
690
import numpy as np
691
692
# Data with some missing values
693
data = ak.Array([[1.0, 2.0, np.nan], [4.0], [5.0, 6.0, 7.0]])
694
695
# Standard statistics
696
std_devs = ak.std(data, axis=1) # Standard deviation per list
697
variances = ak.var(data, axis=1) # Variance per list
698
699
# NaN-aware operations
700
nan_means = ak.nanmean(data, axis=1) # [1.5, 4.0, 6.0]
701
nan_sums = ak.nansum(data, axis=1) # [3.0, 4.0, 18.0]
702
```
703
704
### Complex Operations
705
706
```python
707
import awkward as ak
708
709
# Record data
710
records = ak.Array([
711
{"pt": [10.0, 20.0, 15.0], "eta": [1.0, -0.5, 2.0]},
712
{"pt": [25.0, 30.0], "eta": [0.0, 1.5]}
713
])
714
715
# Cross-field operations
716
pt_eta_corr = ak.corr(records.pt, records.eta, axis=1)
717
718
# Min/max across fields
719
min_pt = ak.min(records.pt, axis=1) # [10.0, 25.0]
720
max_eta = ak.max(records.eta, axis=1) # [2.0, 1.5]
721
```
722
723
### Comparisons and Sorting
724
725
```python
726
import awkward as ak
727
728
data = ak.Array([[3, 1, 4], [2], [1, 5, 9, 2]])
729
730
# Sort each list
731
sorted_data = ak.sort(data, axis=1) # [[1, 3, 4], [2], [1, 2, 5, 9]]
732
733
# Get sort indices
734
sort_indices = ak.argsort(data, axis=1) # [[1, 0, 2], [0], [0, 3, 1, 2]]
735
736
# Array comparison
737
other = ak.Array([[3, 1, 4], [2], [1, 5, 9, 2]])
738
are_equal = ak.array_equal(data, other) # True
739
```