or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdarray-manipulation.mddata-conversion.mdindex.mdintegration.mdmathematical-operations.mdstring-operations.mdtype-system.md

type-system.mddocs/

0

# Type System and Metadata

1

2

Rich type system providing precise descriptions of nested data structures, enabling static analysis, optimization, and cross-language interoperability. The type system includes schema management, metadata handling, and comprehensive validation capabilities for complex heterogeneous data.

3

4

## Capabilities

5

6

### Type Information and Inspection

7

8

Functions for examining and working with array type information, enabling introspection and type-driven programming patterns.

9

10

```python { .api }

11

def type(array):

12

"""

13

Get complete type information for array.

14

15

Parameters:

16

- array: Array to get type information for

17

18

Returns:

19

Type object describing the array's structure and element types

20

"""

21

22

def typeof(array):

23

"""

24

Get type information as string representation.

25

26

Parameters:

27

- array: Array to get type string for

28

29

Returns:

30

str containing human-readable type description

31

"""

32

33

def typestr(array):

34

"""

35

Get concise type string representation.

36

37

Parameters:

38

- array: Array to get type string for

39

40

Returns:

41

str containing compact type description

42

"""

43

```

44

45

### High-Level Type Classes

46

47

Core type classes that represent the structure and semantics of awkward arrays, providing a rich type system for nested, heterogeneous data.

48

49

```python { .api }

50

class Type:

51

"""

52

Base class for all awkward array types.

53

"""

54

55

@property

56

def parameters(self):

57

"""Get type parameters dict."""

58

59

def __repr__(self):

60

"""String representation of type."""

61

62

def __eq__(self, other):

63

"""Test type equality."""

64

65

class ArrayType(Type):

66

"""

67

Type representing a complete array with known length.

68

"""

69

70

def __init__(self, content_type, length, parameters=None):

71

"""

72

Parameters:

73

- content_type: Type of array elements

74

- length: int, length of array

75

- parameters: dict, optional type parameters

76

"""

77

78

@property

79

def content(self):

80

"""Get content type."""

81

82

@property

83

def length(self):

84

"""Get array length."""

85

86

class ScalarType(Type):

87

"""

88

Type wrapper for scalar (single element) values.

89

"""

90

91

def __init__(self, content_type, parameters=None):

92

"""

93

Parameters:

94

- content_type: Type of the scalar value

95

- parameters: dict, optional type parameters

96

"""

97

98

@property

99

def content(self):

100

"""Get content type."""

101

102

class ListType(Type):

103

"""

104

Type for variable-length lists.

105

"""

106

107

def __init__(self, content_type, parameters=None):

108

"""

109

Parameters:

110

- content_type: Type of list elements

111

- parameters: dict, optional type parameters

112

"""

113

114

@property

115

def content(self):

116

"""Get element type."""

117

118

class RegularType(Type):

119

"""

120

Type for fixed-length arrays/lists.

121

"""

122

123

def __init__(self, content_type, size, parameters=None):

124

"""

125

Parameters:

126

- content_type: Type of array elements

127

- size: int, fixed size of arrays

128

- parameters: dict, optional type parameters

129

"""

130

131

@property

132

def content(self):

133

"""Get element type."""

134

135

@property

136

def size(self):

137

"""Get fixed size."""

138

139

class RecordType(Type):

140

"""

141

Type for record/struct data with named fields.

142

"""

143

144

def __init__(self, contents, fields=None, parameters=None):

145

"""

146

Parameters:

147

- contents: dict mapping field names to types, or list of types

148

- fields: list of str, field names (if contents is list)

149

- parameters: dict, optional type parameters

150

"""

151

152

@property

153

def contents(self):

154

"""Get field types."""

155

156

@property

157

def fields(self):

158

"""Get field names."""

159

160

def field(self, name):

161

"""Get type of specific field."""

162

163

class OptionType(Type):

164

"""

165

Type for data that may contain None/missing values.

166

"""

167

168

def __init__(self, content_type, parameters=None):

169

"""

170

Parameters:

171

- content_type: Type of non-None values

172

- parameters: dict, optional type parameters

173

"""

174

175

@property

176

def content(self):

177

"""Get content type (when not None)."""

178

179

class UnionType(Type):

180

"""

181

Type for data that can be one of several different types.

182

"""

183

184

def __init__(self, contents, parameters=None):

185

"""

186

Parameters:

187

- contents: list of Types that can appear in the union

188

- parameters: dict, optional type parameters

189

"""

190

191

@property

192

def contents(self):

193

"""Get possible types."""

194

195

def content(self, index):

196

"""Get type at specific index."""

197

198

class NumpyType(Type):

199

"""

200

Type for NumPy primitive data types.

201

"""

202

203

def __init__(self, primitive, parameters=None):

204

"""

205

Parameters:

206

- primitive: str, NumPy dtype name (e.g., 'int64', 'float32')

207

- parameters: dict, optional type parameters

208

"""

209

210

@property

211

def primitive(self):

212

"""Get primitive type name."""

213

214

class UnknownType(Type):

215

"""

216

Type for data with undetermined or unknown structure.

217

"""

218

219

def __init__(self, parameters=None):

220

"""

221

Parameters:

222

- parameters: dict, optional type parameters

223

"""

224

```

225

226

### Type Utility Functions

227

228

Helper functions for working with types, converting between type representations, and type checking operations.

229

230

```python { .api }

231

def from_datashape(datashape_str):

232

"""

233

Create Type from datashape string representation.

234

235

Parameters:

236

- datashape_str: str, datashape type specification

237

238

Returns:

239

Type object representing the datashape

240

"""

241

242

def dtype_to_primitive(dtype):

243

"""

244

Convert NumPy dtype to primitive type name.

245

246

Parameters:

247

- dtype: numpy.dtype object

248

249

Returns:

250

str representing primitive type name

251

"""

252

253

def primitive_to_dtype(primitive):

254

"""

255

Convert primitive type name to NumPy dtype.

256

257

Parameters:

258

- primitive: str, primitive type name

259

260

Returns:

261

numpy.dtype object

262

"""

263

264

def is_primitive(type_obj):

265

"""

266

Test if type represents a primitive (non-composite) type.

267

268

Parameters:

269

- type_obj: Type object to test

270

271

Returns:

272

bool indicating if type is primitive

273

"""

274

```

275

276

### Parameter Management

277

278

Functions for managing type parameters that provide metadata and customization for array behavior and interpretation.

279

280

```python { .api }

281

def parameters(array):

282

"""

283

Get parameters from array's type.

284

285

Parameters:

286

- array: Array to get parameters from

287

288

Returns:

289

dict containing type parameters

290

"""

291

292

def with_parameter(array, key, value, highlevel=True, behavior=None):

293

"""

294

Add or modify a parameter in array's type.

295

296

Parameters:

297

- array: Array to modify

298

- key: str, parameter name

299

- value: parameter value (any JSON-serializable type)

300

- highlevel: bool, if True return Array, if False return Content layout

301

- behavior: dict, custom behavior for the result

302

303

Returns:

304

Array with parameter added to type

305

"""

306

307

def without_parameters(array, highlevel=True, behavior=None):

308

"""

309

Remove all parameters from array's type.

310

311

Parameters:

312

- array: Array to modify

313

- highlevel: bool, if True return Array, if False return Content layout

314

- behavior: dict, custom behavior for the result

315

316

Returns:

317

Array with all parameters removed from type

318

"""

319

320

def with_name(array, name, highlevel=True, behavior=None):

321

"""

322

Add a name to the array's type for semantic identification.

323

324

Parameters:

325

- array: Array to name

326

- name: str, name to assign to type

327

- highlevel: bool, if True return Array, if False return Content layout

328

- behavior: dict, custom behavior for the result

329

330

Returns:

331

Array with named type

332

"""

333

```

334

335

### Form Classes (Schema Description)

336

337

Form classes provide schema descriptions that can be serialized and used to reconstruct arrays from buffers, enabling efficient serialization and cross-language interoperability.

338

339

```python { .api }

340

class Form:

341

"""

342

Base class for describing array structure/schema.

343

"""

344

345

def to_dict(self):

346

"""Convert form to dictionary representation."""

347

348

def to_json(self):

349

"""Convert form to JSON string."""

350

351

@classmethod

352

def from_dict(cls, data):

353

"""Create form from dictionary."""

354

355

@classmethod

356

def from_json(cls, json_str):

357

"""Create form from JSON string."""

358

359

class NumpyForm(Form):

360

"""

361

Form for NumPy array structure.

362

"""

363

364

def __init__(self, primitive, shape=(), has_identifier=False, parameters=None, form_key=None):

365

"""

366

Parameters:

367

- primitive: str, NumPy dtype name

368

- shape: tuple, shape of inner dimensions

369

- has_identifier: bool, whether form has identifier

370

- parameters: dict, form parameters

371

- form_key: str, unique form identifier

372

"""

373

374

class ListForm(Form):

375

"""

376

Form for variable-length list structure.

377

"""

378

379

def __init__(self, starts, stops, content, has_identifier=False, parameters=None, form_key=None):

380

"""

381

Parameters:

382

- starts: str, index type for list starts

383

- stops: str, index type for list stops

384

- content: Form, form of list elements

385

- has_identifier: bool, whether form has identifier

386

- parameters: dict, form parameters

387

- form_key: str, unique form identifier

388

"""

389

390

class ListOffsetForm(Form):

391

"""

392

Form for offset-based list structure.

393

"""

394

395

def __init__(self, offsets, content, has_identifier=False, parameters=None, form_key=None):

396

"""

397

Parameters:

398

- offsets: str, index type for offsets

399

- content: Form, form of list elements

400

- has_identifier: bool, whether form has identifier

401

- parameters: dict, form parameters

402

- form_key: str, unique form identifier

403

"""

404

405

class RegularForm(Form):

406

"""

407

Form for regular (fixed-length) array structure.

408

"""

409

410

def __init__(self, content, size, has_identifier=False, parameters=None, form_key=None):

411

"""

412

Parameters:

413

- content: Form, form of array elements

414

- size: int, fixed size of arrays

415

- has_identifier: bool, whether form has identifier

416

- parameters: dict, form parameters

417

- form_key: str, unique form identifier

418

"""

419

420

class RecordForm(Form):

421

"""

422

Form for record/struct structure.

423

"""

424

425

def __init__(self, contents, fields=None, has_identifier=False, parameters=None, form_key=None):

426

"""

427

Parameters:

428

- contents: list of Forms for each field

429

- fields: list of str, field names (None for tuple-like records)

430

- has_identifier: bool, whether form has identifier

431

- parameters: dict, form parameters

432

- form_key: str, unique form identifier

433

"""

434

435

class IndexedForm(Form):

436

"""

437

Form for indexed array structure.

438

"""

439

440

def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):

441

"""

442

Parameters:

443

- index: str, index type

444

- content: Form, form of indexed content

445

- has_identifier: bool, whether form has identifier

446

- parameters: dict, form parameters

447

- form_key: str, unique form identifier

448

"""

449

450

class IndexedOptionForm(Form):

451

"""

452

Form for indexed array with optional/missing values.

453

"""

454

455

def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):

456

"""

457

Parameters:

458

- index: str, index type

459

- content: Form, form of non-None content

460

- has_identifier: bool, whether form has identifier

461

- parameters: dict, form parameters

462

- form_key: str, unique form identifier

463

"""

464

465

class UnionForm(Form):

466

"""

467

Form for union type structure.

468

"""

469

470

def __init__(self, tags, index, contents, has_identifier=False, parameters=None, form_key=None):

471

"""

472

Parameters:

473

- tags: str, tag index type

474

- index: str, content index type

475

- contents: list of Forms for union alternatives

476

- has_identifier: bool, whether form has identifier

477

- parameters: dict, form parameters

478

- form_key: str, unique form identifier

479

"""

480

481

class UnmaskedForm(Form):

482

"""

483

Form for unmasked optional array structure.

484

"""

485

486

def __init__(self, content, has_identifier=False, parameters=None, form_key=None):

487

"""

488

Parameters:

489

- content: Form, form of content that could be None

490

- has_identifier: bool, whether form has identifier

491

- parameters: dict, form parameters

492

- form_key: str, unique form identifier

493

"""

494

495

class ByteMaskedForm(Form):

496

"""

497

Form for byte-masked array structure.

498

"""

499

500

def __init__(self, mask, content, valid_when, has_identifier=False, parameters=None, form_key=None):

501

"""

502

Parameters:

503

- mask: str, mask array type

504

- content: Form, form of masked content

505

- valid_when: bool, mask value indicating valid data

506

- has_identifier: bool, whether form has identifier

507

- parameters: dict, form parameters

508

- form_key: str, unique form identifier

509

"""

510

511

class BitMaskedForm(Form):

512

"""

513

Form for bit-masked array structure.

514

"""

515

516

def __init__(self, mask, content, valid_when, lsb_order, has_identifier=False, parameters=None, form_key=None):

517

"""

518

Parameters:

519

- mask: str, mask array type

520

- content: Form, form of masked content

521

- valid_when: bool, mask bit value indicating valid data

522

- lsb_order: bool, bit order (LSB first if True)

523

- has_identifier: bool, whether form has identifier

524

- parameters: dict, form parameters

525

- form_key: str, unique form identifier

526

"""

527

528

class EmptyForm(Form):

529

"""

530

Form for empty array structure.

531

"""

532

533

def __init__(self, has_identifier=False, parameters=None, form_key=None):

534

"""

535

Parameters:

536

- has_identifier: bool, whether form has identifier

537

- parameters: dict, form parameters

538

- form_key: str, unique form identifier

539

"""

540

```

541

542

### Form Utility Functions

543

544

Functions for creating forms from various sources and converting between form representations.

545

546

```python { .api }

547

def from_type(type_obj):

548

"""

549

Create Form from Type object.

550

551

Parameters:

552

- type_obj: Type object to convert

553

554

Returns:

555

Form representing the type structure

556

"""

557

558

def from_dtype(dtype):

559

"""

560

Create Form from NumPy dtype.

561

562

Parameters:

563

- dtype: numpy.dtype to convert

564

565

Returns:

566

NumpyForm representing the dtype

567

"""

568

```

569

570

### Array Validation

571

572

Functions for validating array structure and detecting inconsistencies or errors in data layout.

573

574

```python { .api }

575

def validity_error(array, exception=False):

576

"""

577

Check array for validity errors.

578

579

Parameters:

580

- array: Array to validate

581

- exception: bool, if True raise exception on error

582

583

Returns:

584

str describing any validity errors (empty string if valid)

585

586

Raises:

587

Exception if exception=True and array is invalid

588

"""

589

590

def is_valid(array):

591

"""

592

Test if array has valid structure.

593

594

Parameters:

595

- array: Array to test

596

597

Returns:

598

Array of booleans indicating validity of each element

599

"""

600

601

def is_none(array):

602

"""

603

Test which elements are None/missing.

604

605

Parameters:

606

- array: Array to test

607

608

Returns:

609

Array of booleans indicating which elements are None

610

"""

611

612

def is_categorical(array):

613

"""

614

Test if array uses categorical representation.

615

616

Parameters:

617

- array: Array to test

618

619

Returns:

620

bool indicating if array is categorical

621

"""

622

623

def is_tuple(array):

624

"""

625

Test if array represents tuple data (records without field names).

626

627

Parameters:

628

- array: Array to test

629

630

Returns:

631

bool indicating if array contains tuples

632

"""

633

```

634

635

### Type Enforcement and Conversion

636

637

Functions for enforcing specific types and converting between compatible type representations.

638

639

```python { .api }

640

def enforce_type(array, type_obj, highlevel=True, behavior=None):

641

"""

642

Convert array to match specified type structure.

643

644

Parameters:

645

- array: Array to convert

646

- type_obj: Type or str specifying target type

647

- highlevel: bool, if True return Array, if False return Content layout

648

- behavior: dict, custom behavior for the result

649

650

Returns:

651

Array converted to match target type

652

653

Raises:

654

TypeError if conversion is not possible

655

"""

656

657

def merge_option_of_records(array, highlevel=True, behavior=None):

658

"""

659

Merge record fields that may be None into a single optional record.

660

661

Parameters:

662

- array: Array with optional records to merge

663

- highlevel: bool, if True return Array, if False return Content layout

664

- behavior: dict, custom behavior for the result

665

666

Returns:

667

Array with merged optional record structure

668

"""

669

670

def merge_union_of_records(array, highlevel=True, behavior=None):

671

"""

672

Merge records in a union type into a single record type.

673

674

Parameters:

675

- array: Array with union of records to merge

676

- highlevel: bool, if True return Array, if False return Content layout

677

- behavior: dict, custom behavior for the result

678

679

Returns:

680

Array with merged record structure

681

"""

682

```

683

684

## Usage Examples

685

686

### Type Inspection

687

688

```python

689

import awkward as ak

690

691

# Create nested array with mixed types

692

data = ak.Array([

693

{"x": [1, 2, 3], "y": 3.14, "name": "alice"},

694

{"x": [4], "y": 2.71, "name": "bob"}

695

])

696

697

# Inspect type information

698

print(ak.type(data))

699

# 2 * {"x": var * int64, "y": float64, "name": string}

700

701

print(ak.typeof(data))

702

# "2 * {x: var * int64, y: float64, name: string}"

703

704

# Check specific properties

705

print(ak.is_tuple(data)) # False (has field names)

706

print(ak.is_categorical(data)) # False

707

print(ak.fields(data)) # ["x", "y", "name"]

708

```

709

710

### Type Parameters

711

712

```python

713

import awkward as ak

714

715

# Add semantic meaning via parameters

716

physics_data = ak.Array([[1.0, 2.0], [3.0, 4.0]])

717

momentum = ak.with_parameter(physics_data, "units", "GeV/c")

718

momentum = ak.with_parameter(momentum, "quantity", "momentum")

719

720

# Access parameters

721

print(ak.parameters(momentum))

722

# {"units": "GeV/c", "quantity": "momentum"}

723

724

# Name the type for clarity

725

named_momentum = ak.with_name(momentum, "Momentum")

726

print(ak.typeof(named_momentum))

727

# Contains type name "Momentum"

728

```

729

730

### Form Serialization

731

732

```python

733

import awkward as ak

734

735

# Get form from array

736

data = ak.Array([{"a": [1, 2], "b": 3}, {"a": [4], "b": 5}])

737

form = data.layout.form

738

739

# Serialize to JSON

740

form_json = form.to_json()

741

print(form_json)

742

743

# Recreate form from JSON

744

restored_form = ak.forms.Form.from_json(form_json)

745

746

# Forms can be used with buffers to reconstruct arrays

747

```

748

749

### Type Validation

750

751

```python

752

import awkward as ak

753

import numpy as np

754

755

# Create potentially invalid data

756

data = ak.Array([[1, 2, 3], [4, np.nan], []])

757

758

# Check validity

759

validity = ak.is_valid(data) # [True, True, True]

760

none_check = ak.is_none(data) # [False, False, False]

761

762

# Check for structural errors

763

error_msg = ak.validity_error(data)

764

if error_msg:

765

print(f"Validation error: {error_msg}")

766

else:

767

print("Array is valid")

768

```

769

770

### Type Enforcement

771

772

```python

773

import awkward as ak

774

775

# Create array that could be regularized

776

irregular = ak.Array([[1, 2], [3, 4], [5, 6]]) # All length 2

777

778

# Convert to regular array

779

regular = ak.to_regular(irregular)

780

print(ak.type(regular)) # 3 * 2 * int64

781

782

# Enforce specific type

783

target_type = "var * float64"

784

float_array = ak.enforce_type(ak.Array([[1, 2], [3]]), target_type)

785

print(ak.type(float_array)) # 2 * var * float64

786

```

787

788

### Complex Type Structures

789

790

```python

791

import awkward as ak

792

793

# Union type (multiple possible types per element)

794

mixed = ak.Array([1, "hello", [1, 2, 3], {"x": 5}])

795

print(ak.type(mixed)) # Shows union of int64, string, list, record

796

797

# Optional records (may be None)

798

optional_records = ak.Array([{"a": 1}, None, {"a": 2}])

799

print(ak.type(optional_records)) # option type containing record

800

801

# Nested complex structures

802

nested = ak.Array([

803

[{"particles": [{"pt": 10.0, "eta": 1.0}]}, None],

804

[{"particles": []}]

805

])

806

print(ak.type(nested)) # Deep nesting with options

807

```