0
# Type System and Metadata
1
2
Rich type system providing precise descriptions of nested data structures, enabling static analysis, optimization, and cross-language interoperability. The type system includes schema management, metadata handling, and comprehensive validation capabilities for complex heterogeneous data.
3
4
## Capabilities
5
6
### Type Information and Inspection
7
8
Functions for examining and working with array type information, enabling introspection and type-driven programming patterns.
9
10
```python { .api }
11
def type(array):
12
"""
13
Get complete type information for array.
14
15
Parameters:
16
- array: Array to get type information for
17
18
Returns:
19
Type object describing the array's structure and element types
20
"""
21
22
def typeof(array):
23
"""
24
Get type information as string representation.
25
26
Parameters:
27
- array: Array to get type string for
28
29
Returns:
30
str containing human-readable type description
31
"""
32
33
def typestr(array):
34
"""
35
Get concise type string representation.
36
37
Parameters:
38
- array: Array to get type string for
39
40
Returns:
41
str containing compact type description
42
"""
43
```
44
45
### High-Level Type Classes
46
47
Core type classes that represent the structure and semantics of awkward arrays, providing a rich type system for nested, heterogeneous data.
48
49
```python { .api }
50
class Type:
51
"""
52
Base class for all awkward array types.
53
"""
54
55
@property
56
def parameters(self):
57
"""Get type parameters dict."""
58
59
def __repr__(self):
60
"""String representation of type."""
61
62
def __eq__(self, other):
63
"""Test type equality."""
64
65
class ArrayType(Type):
66
"""
67
Type representing a complete array with known length.
68
"""
69
70
def __init__(self, content_type, length, parameters=None):
71
"""
72
Parameters:
73
- content_type: Type of array elements
74
- length: int, length of array
75
- parameters: dict, optional type parameters
76
"""
77
78
@property
79
def content(self):
80
"""Get content type."""
81
82
@property
83
def length(self):
84
"""Get array length."""
85
86
class ScalarType(Type):
87
"""
88
Type wrapper for scalar (single element) values.
89
"""
90
91
def __init__(self, content_type, parameters=None):
92
"""
93
Parameters:
94
- content_type: Type of the scalar value
95
- parameters: dict, optional type parameters
96
"""
97
98
@property
99
def content(self):
100
"""Get content type."""
101
102
class ListType(Type):
103
"""
104
Type for variable-length lists.
105
"""
106
107
def __init__(self, content_type, parameters=None):
108
"""
109
Parameters:
110
- content_type: Type of list elements
111
- parameters: dict, optional type parameters
112
"""
113
114
@property
115
def content(self):
116
"""Get element type."""
117
118
class RegularType(Type):
119
"""
120
Type for fixed-length arrays/lists.
121
"""
122
123
def __init__(self, content_type, size, parameters=None):
124
"""
125
Parameters:
126
- content_type: Type of array elements
127
- size: int, fixed size of arrays
128
- parameters: dict, optional type parameters
129
"""
130
131
@property
132
def content(self):
133
"""Get element type."""
134
135
@property
136
def size(self):
137
"""Get fixed size."""
138
139
class RecordType(Type):
140
"""
141
Type for record/struct data with named fields.
142
"""
143
144
def __init__(self, contents, fields=None, parameters=None):
145
"""
146
Parameters:
147
- contents: dict mapping field names to types, or list of types
148
- fields: list of str, field names (if contents is list)
149
- parameters: dict, optional type parameters
150
"""
151
152
@property
153
def contents(self):
154
"""Get field types."""
155
156
@property
157
def fields(self):
158
"""Get field names."""
159
160
def field(self, name):
161
"""Get type of specific field."""
162
163
class OptionType(Type):
164
"""
165
Type for data that may contain None/missing values.
166
"""
167
168
def __init__(self, content_type, parameters=None):
169
"""
170
Parameters:
171
- content_type: Type of non-None values
172
- parameters: dict, optional type parameters
173
"""
174
175
@property
176
def content(self):
177
"""Get content type (when not None)."""
178
179
class UnionType(Type):
180
"""
181
Type for data that can be one of several different types.
182
"""
183
184
def __init__(self, contents, parameters=None):
185
"""
186
Parameters:
187
- contents: list of Types that can appear in the union
188
- parameters: dict, optional type parameters
189
"""
190
191
@property
192
def contents(self):
193
"""Get possible types."""
194
195
def content(self, index):
196
"""Get type at specific index."""
197
198
class NumpyType(Type):
199
"""
200
Type for NumPy primitive data types.
201
"""
202
203
def __init__(self, primitive, parameters=None):
204
"""
205
Parameters:
206
- primitive: str, NumPy dtype name (e.g., 'int64', 'float32')
207
- parameters: dict, optional type parameters
208
"""
209
210
@property
211
def primitive(self):
212
"""Get primitive type name."""
213
214
class UnknownType(Type):
215
"""
216
Type for data with undetermined or unknown structure.
217
"""
218
219
def __init__(self, parameters=None):
220
"""
221
Parameters:
222
- parameters: dict, optional type parameters
223
"""
224
```
225
226
### Type Utility Functions
227
228
Helper functions for working with types, converting between type representations, and type checking operations.
229
230
```python { .api }
231
def from_datashape(datashape_str):
232
"""
233
Create Type from datashape string representation.
234
235
Parameters:
236
- datashape_str: str, datashape type specification
237
238
Returns:
239
Type object representing the datashape
240
"""
241
242
def dtype_to_primitive(dtype):
243
"""
244
Convert NumPy dtype to primitive type name.
245
246
Parameters:
247
- dtype: numpy.dtype object
248
249
Returns:
250
str representing primitive type name
251
"""
252
253
def primitive_to_dtype(primitive):
254
"""
255
Convert primitive type name to NumPy dtype.
256
257
Parameters:
258
- primitive: str, primitive type name
259
260
Returns:
261
numpy.dtype object
262
"""
263
264
def is_primitive(type_obj):
265
"""
266
Test if type represents a primitive (non-composite) type.
267
268
Parameters:
269
- type_obj: Type object to test
270
271
Returns:
272
bool indicating if type is primitive
273
"""
274
```
275
276
### Parameter Management
277
278
Functions for managing type parameters that provide metadata and customization for array behavior and interpretation.
279
280
```python { .api }
281
def parameters(array):
282
"""
283
Get parameters from array's type.
284
285
Parameters:
286
- array: Array to get parameters from
287
288
Returns:
289
dict containing type parameters
290
"""
291
292
def with_parameter(array, key, value, highlevel=True, behavior=None):
293
"""
294
Add or modify a parameter in array's type.
295
296
Parameters:
297
- array: Array to modify
298
- key: str, parameter name
299
- value: parameter value (any JSON-serializable type)
300
- highlevel: bool, if True return Array, if False return Content layout
301
- behavior: dict, custom behavior for the result
302
303
Returns:
304
Array with parameter added to type
305
"""
306
307
def without_parameters(array, highlevel=True, behavior=None):
308
"""
309
Remove all parameters from array's type.
310
311
Parameters:
312
- array: Array to modify
313
- highlevel: bool, if True return Array, if False return Content layout
314
- behavior: dict, custom behavior for the result
315
316
Returns:
317
Array with all parameters removed from type
318
"""
319
320
def with_name(array, name, highlevel=True, behavior=None):
321
"""
322
Add a name to the array's type for semantic identification.
323
324
Parameters:
325
- array: Array to name
326
- name: str, name to assign to type
327
- highlevel: bool, if True return Array, if False return Content layout
328
- behavior: dict, custom behavior for the result
329
330
Returns:
331
Array with named type
332
"""
333
```
334
335
### Form Classes (Schema Description)
336
337
Form classes provide schema descriptions that can be serialized and used to reconstruct arrays from buffers, enabling efficient serialization and cross-language interoperability.
338
339
```python { .api }
340
class Form:
341
"""
342
Base class for describing array structure/schema.
343
"""
344
345
def to_dict(self):
346
"""Convert form to dictionary representation."""
347
348
def to_json(self):
349
"""Convert form to JSON string."""
350
351
@classmethod
352
def from_dict(cls, data):
353
"""Create form from dictionary."""
354
355
@classmethod
356
def from_json(cls, json_str):
357
"""Create form from JSON string."""
358
359
class NumpyForm(Form):
360
"""
361
Form for NumPy array structure.
362
"""
363
364
def __init__(self, primitive, shape=(), has_identifier=False, parameters=None, form_key=None):
365
"""
366
Parameters:
367
- primitive: str, NumPy dtype name
368
- shape: tuple, shape of inner dimensions
369
- has_identifier: bool, whether form has identifier
370
- parameters: dict, form parameters
371
- form_key: str, unique form identifier
372
"""
373
374
class ListForm(Form):
375
"""
376
Form for variable-length list structure.
377
"""
378
379
def __init__(self, starts, stops, content, has_identifier=False, parameters=None, form_key=None):
380
"""
381
Parameters:
382
- starts: str, index type for list starts
383
- stops: str, index type for list stops
384
- content: Form, form of list elements
385
- has_identifier: bool, whether form has identifier
386
- parameters: dict, form parameters
387
- form_key: str, unique form identifier
388
"""
389
390
class ListOffsetForm(Form):
391
"""
392
Form for offset-based list structure.
393
"""
394
395
def __init__(self, offsets, content, has_identifier=False, parameters=None, form_key=None):
396
"""
397
Parameters:
398
- offsets: str, index type for offsets
399
- content: Form, form of list elements
400
- has_identifier: bool, whether form has identifier
401
- parameters: dict, form parameters
402
- form_key: str, unique form identifier
403
"""
404
405
class RegularForm(Form):
406
"""
407
Form for regular (fixed-length) array structure.
408
"""
409
410
def __init__(self, content, size, has_identifier=False, parameters=None, form_key=None):
411
"""
412
Parameters:
413
- content: Form, form of array elements
414
- size: int, fixed size of arrays
415
- has_identifier: bool, whether form has identifier
416
- parameters: dict, form parameters
417
- form_key: str, unique form identifier
418
"""
419
420
class RecordForm(Form):
421
"""
422
Form for record/struct structure.
423
"""
424
425
def __init__(self, contents, fields=None, has_identifier=False, parameters=None, form_key=None):
426
"""
427
Parameters:
428
- contents: list of Forms for each field
429
- fields: list of str, field names (None for tuple-like records)
430
- has_identifier: bool, whether form has identifier
431
- parameters: dict, form parameters
432
- form_key: str, unique form identifier
433
"""
434
435
class IndexedForm(Form):
436
"""
437
Form for indexed array structure.
438
"""
439
440
def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):
441
"""
442
Parameters:
443
- index: str, index type
444
- content: Form, form of indexed content
445
- has_identifier: bool, whether form has identifier
446
- parameters: dict, form parameters
447
- form_key: str, unique form identifier
448
"""
449
450
class IndexedOptionForm(Form):
451
"""
452
Form for indexed array with optional/missing values.
453
"""
454
455
def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):
456
"""
457
Parameters:
458
- index: str, index type
459
- content: Form, form of non-None content
460
- has_identifier: bool, whether form has identifier
461
- parameters: dict, form parameters
462
- form_key: str, unique form identifier
463
"""
464
465
class UnionForm(Form):
466
"""
467
Form for union type structure.
468
"""
469
470
def __init__(self, tags, index, contents, has_identifier=False, parameters=None, form_key=None):
471
"""
472
Parameters:
473
- tags: str, tag index type
474
- index: str, content index type
475
- contents: list of Forms for union alternatives
476
- has_identifier: bool, whether form has identifier
477
- parameters: dict, form parameters
478
- form_key: str, unique form identifier
479
"""
480
481
class UnmaskedForm(Form):
482
"""
483
Form for unmasked optional array structure.
484
"""
485
486
def __init__(self, content, has_identifier=False, parameters=None, form_key=None):
487
"""
488
Parameters:
489
- content: Form, form of content that could be None
490
- has_identifier: bool, whether form has identifier
491
- parameters: dict, form parameters
492
- form_key: str, unique form identifier
493
"""
494
495
class ByteMaskedForm(Form):
496
"""
497
Form for byte-masked array structure.
498
"""
499
500
def __init__(self, mask, content, valid_when, has_identifier=False, parameters=None, form_key=None):
501
"""
502
Parameters:
503
- mask: str, mask array type
504
- content: Form, form of masked content
505
- valid_when: bool, mask value indicating valid data
506
- has_identifier: bool, whether form has identifier
507
- parameters: dict, form parameters
508
- form_key: str, unique form identifier
509
"""
510
511
class BitMaskedForm(Form):
512
"""
513
Form for bit-masked array structure.
514
"""
515
516
def __init__(self, mask, content, valid_when, lsb_order, has_identifier=False, parameters=None, form_key=None):
517
"""
518
Parameters:
519
- mask: str, mask array type
520
- content: Form, form of masked content
521
- valid_when: bool, mask bit value indicating valid data
522
- lsb_order: bool, bit order (LSB first if True)
523
- has_identifier: bool, whether form has identifier
524
- parameters: dict, form parameters
525
- form_key: str, unique form identifier
526
"""
527
528
class EmptyForm(Form):
529
"""
530
Form for empty array structure.
531
"""
532
533
def __init__(self, has_identifier=False, parameters=None, form_key=None):
534
"""
535
Parameters:
536
- has_identifier: bool, whether form has identifier
537
- parameters: dict, form parameters
538
- form_key: str, unique form identifier
539
"""
540
```
541
542
### Form Utility Functions
543
544
Functions for creating forms from various sources and converting between form representations.
545
546
```python { .api }
547
def from_type(type_obj):
548
"""
549
Create Form from Type object.
550
551
Parameters:
552
- type_obj: Type object to convert
553
554
Returns:
555
Form representing the type structure
556
"""
557
558
def from_dtype(dtype):
559
"""
560
Create Form from NumPy dtype.
561
562
Parameters:
563
- dtype: numpy.dtype to convert
564
565
Returns:
566
NumpyForm representing the dtype
567
"""
568
```
569
570
### Array Validation
571
572
Functions for validating array structure and detecting inconsistencies or errors in data layout.
573
574
```python { .api }
575
def validity_error(array, exception=False):
576
"""
577
Check array for validity errors.
578
579
Parameters:
580
- array: Array to validate
581
- exception: bool, if True raise exception on error
582
583
Returns:
584
str describing any validity errors (empty string if valid)
585
586
Raises:
587
Exception if exception=True and array is invalid
588
"""
589
590
def is_valid(array):
591
"""
592
Test if array has valid structure.
593
594
Parameters:
595
- array: Array to test
596
597
Returns:
598
Array of booleans indicating validity of each element
599
"""
600
601
def is_none(array):
602
"""
603
Test which elements are None/missing.
604
605
Parameters:
606
- array: Array to test
607
608
Returns:
609
Array of booleans indicating which elements are None
610
"""
611
612
def is_categorical(array):
613
"""
614
Test if array uses categorical representation.
615
616
Parameters:
617
- array: Array to test
618
619
Returns:
620
bool indicating if array is categorical
621
"""
622
623
def is_tuple(array):
624
"""
625
Test if array represents tuple data (records without field names).
626
627
Parameters:
628
- array: Array to test
629
630
Returns:
631
bool indicating if array contains tuples
632
"""
633
```
634
635
### Type Enforcement and Conversion
636
637
Functions for enforcing specific types and converting between compatible type representations.
638
639
```python { .api }
640
def enforce_type(array, type_obj, highlevel=True, behavior=None):
641
"""
642
Convert array to match specified type structure.
643
644
Parameters:
645
- array: Array to convert
646
- type_obj: Type or str specifying target type
647
- highlevel: bool, if True return Array, if False return Content layout
648
- behavior: dict, custom behavior for the result
649
650
Returns:
651
Array converted to match target type
652
653
Raises:
654
TypeError if conversion is not possible
655
"""
656
657
def merge_option_of_records(array, highlevel=True, behavior=None):
658
"""
659
Merge record fields that may be None into a single optional record.
660
661
Parameters:
662
- array: Array with optional records to merge
663
- highlevel: bool, if True return Array, if False return Content layout
664
- behavior: dict, custom behavior for the result
665
666
Returns:
667
Array with merged optional record structure
668
"""
669
670
def merge_union_of_records(array, highlevel=True, behavior=None):
671
"""
672
Merge records in a union type into a single record type.
673
674
Parameters:
675
- array: Array with union of records to merge
676
- highlevel: bool, if True return Array, if False return Content layout
677
- behavior: dict, custom behavior for the result
678
679
Returns:
680
Array with merged record structure
681
"""
682
```
683
684
## Usage Examples
685
686
### Type Inspection
687
688
```python
689
import awkward as ak
690
691
# Create nested array with mixed types
692
data = ak.Array([
693
{"x": [1, 2, 3], "y": 3.14, "name": "alice"},
694
{"x": [4], "y": 2.71, "name": "bob"}
695
])
696
697
# Inspect type information
698
print(ak.type(data))
699
# 2 * {"x": var * int64, "y": float64, "name": string}
700
701
print(ak.typeof(data))
702
# "2 * {x: var * int64, y: float64, name: string}"
703
704
# Check specific properties
705
print(ak.is_tuple(data)) # False (has field names)
706
print(ak.is_categorical(data)) # False
707
print(ak.fields(data)) # ["x", "y", "name"]
708
```
709
710
### Type Parameters
711
712
```python
713
import awkward as ak
714
715
# Add semantic meaning via parameters
716
physics_data = ak.Array([[1.0, 2.0], [3.0, 4.0]])
717
momentum = ak.with_parameter(physics_data, "units", "GeV/c")
718
momentum = ak.with_parameter(momentum, "quantity", "momentum")
719
720
# Access parameters
721
print(ak.parameters(momentum))
722
# {"units": "GeV/c", "quantity": "momentum"}
723
724
# Name the type for clarity
725
named_momentum = ak.with_name(momentum, "Momentum")
726
print(ak.typeof(named_momentum))
727
# Contains type name "Momentum"
728
```
729
730
### Form Serialization
731
732
```python
733
import awkward as ak
734
735
# Get form from array
736
data = ak.Array([{"a": [1, 2], "b": 3}, {"a": [4], "b": 5}])
737
form = data.layout.form
738
739
# Serialize to JSON
740
form_json = form.to_json()
741
print(form_json)
742
743
# Recreate form from JSON
744
restored_form = ak.forms.Form.from_json(form_json)
745
746
# Forms can be used with buffers to reconstruct arrays
747
```
748
749
### Type Validation
750
751
```python
752
import awkward as ak
753
import numpy as np
754
755
# Create potentially invalid data
756
data = ak.Array([[1, 2, 3], [4, np.nan], []])
757
758
# Check validity
759
validity = ak.is_valid(data) # [True, True, True]
760
none_check = ak.is_none(data) # [False, False, False]
761
762
# Check for structural errors
763
error_msg = ak.validity_error(data)
764
if error_msg:
765
print(f"Validation error: {error_msg}")
766
else:
767
print("Array is valid")
768
```
769
770
### Type Enforcement
771
772
```python
773
import awkward as ak
774
775
# Create array that could be regularized
776
irregular = ak.Array([[1, 2], [3, 4], [5, 6]]) # All length 2
777
778
# Convert to regular array
779
regular = ak.to_regular(irregular)
780
print(ak.type(regular)) # 3 * 2 * int64
781
782
# Enforce specific type
783
target_type = "var * float64"
784
float_array = ak.enforce_type(ak.Array([[1, 2], [3]]), target_type)
785
print(ak.type(float_array)) # 2 * var * float64
786
```
787
788
### Complex Type Structures
789
790
```python
791
import awkward as ak
792
793
# Union type (multiple possible types per element)
794
mixed = ak.Array([1, "hello", [1, 2, 3], {"x": 5}])
795
print(ak.type(mixed)) # Shows union of int64, string, list, record
796
797
# Optional records (may be None)
798
optional_records = ak.Array([{"a": 1}, None, {"a": 2}])
799
print(ak.type(optional_records)) # option type containing record
800
801
# Nested complex structures
802
nested = ak.Array([
803
[{"particles": [{"pt": 10.0, "eta": 1.0}]}, None],
804
[{"particles": []}]
805
])
806
print(ak.type(nested)) # Deep nesting with options
807
```