or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdarray-manipulation.mddata-conversion.mdindex.mdintegration.mdmathematical-operations.mdstring-operations.mdtype-system.md

array-creation.mddocs/

0

# Array Creation and Construction

1

2

Comprehensive functions for creating awkward arrays from various data sources including Python iterables, NumPy arrays, JSON data, binary formats, and other array libraries. Supports both direct construction and incremental building for complex nested structures.

3

4

## Capabilities

5

6

### From Python Data

7

8

Create arrays directly from Python lists, tuples, dictionaries, and other iterables, automatically inferring the appropriate nested structure and data types.

9

10

```python { .api }

11

def from_iter(iterable, *, allow_record=True, highlevel=True, behavior=None, attrs=None, initial=1024, resize=8):

12

"""

13

Create an array from a Python iterable.

14

15

Parameters:

16

- iterable: Nested Python data structure (lists, tuples, dicts, etc.)

17

- allow_record: bool, if False, prohibit record types at the outermost level

18

- highlevel: bool, if True return Array, if False return Content layout

19

- behavior: dict, custom behavior for the array

20

- attrs: dict, metadata attributes for the array

21

- initial: int, initial size in bytes for buffers

22

- resize: float, resize multiplier for buffers (> 1.0)

23

24

Returns:

25

Array or Content layout containing the iterable data

26

"""

27

28

def from_numpy(array, highlevel=True, behavior=None):

29

"""

30

Create an array from a NumPy array.

31

32

Parameters:

33

- array: numpy.ndarray to convert

34

- highlevel: bool, if True return Array, if False return Content layout

35

- behavior: dict, custom behavior for the array

36

37

Returns:

38

Array or Content layout wrapping the NumPy data

39

"""

40

41

def from_regular(array, axis=1, highlevel=True, behavior=None):

42

"""

43

Create an array from a regular (rectangular) nested structure.

44

45

Parameters:

46

- array: Regular array-like structure

47

- axis: int, axis along which to interpret regularity

48

- highlevel: bool, if True return Array, if False return Content layout

49

- behavior: dict, custom behavior for the array

50

51

Returns:

52

Array with RegularArray layout for the specified axis

53

"""

54

```

55

56

### From Structured Data Formats

57

58

Create arrays from structured data formats like JSON, maintaining the hierarchical structure and supporting mixed data types within the same array.

59

60

```python { .api }

61

def from_json(source, highlevel=True, behavior=None, nan_string=None,

62

infinity_string=None, minus_infinity_string=None):

63

"""

64

Parse JSON data into an array.

65

66

Parameters:

67

- source: JSON string, bytes, or file-like object

68

- highlevel: bool, if True return Array, if False return Content layout

69

- behavior: dict, custom behavior for the array

70

- nan_string: str, string to interpret as NaN

71

- infinity_string: str, string to interpret as positive infinity

72

- minus_infinity_string: str, string to interpret as negative infinity

73

74

Returns:

75

Array containing the parsed JSON data

76

"""

77

78

def from_buffers(form, length, container, buffer_key=None, highlevel=True, behavior=None):

79

"""

80

Create an array from Form description and data buffers.

81

82

Parameters:

83

- form: Form describing the array structure

84

- length: int, length of the array

85

- container: dict-like object containing named buffers

86

- buffer_key: callable, function to generate buffer keys

87

- highlevel: bool, if True return Array, if False return Content layout

88

- behavior: dict, custom behavior for the array

89

90

Returns:

91

Array reconstructed from the form and buffers

92

"""

93

```

94

95

### From File Formats

96

97

Direct reading from various file formats commonly used in scientific computing and data analysis, with support for chunked reading and metadata preservation.

98

99

```python { .api }

100

def from_parquet(path, columns=None, row_groups=None, lazy=False,

101

lazy_cache="new", lazy_cache_key=None, highlevel=True, behavior=None):

102

"""

103

Read array data from Parquet files.

104

105

Parameters:

106

- path: str or file-like, Parquet file path or object

107

- columns: list of str, columns to read (None for all)

108

- row_groups: list of int, row groups to read (None for all)

109

- lazy: bool, if True create lazy array

110

- lazy_cache: str or dict, cache configuration for lazy arrays

111

- lazy_cache_key: str, cache key for lazy arrays

112

- highlevel: bool, if True return Array, if False return Content layout

113

- behavior: dict, custom behavior for the array

114

115

Returns:

116

Array containing the Parquet data

117

"""

118

119

def from_feather(file, columns=None, highlevel=True, behavior=None):

120

"""

121

Read array data from Feather/Arrow IPC files.

122

123

Parameters:

124

- file: str or file-like, Feather file path or object

125

- columns: list of str, columns to read (None for all)

126

- highlevel: bool, if True return Array, if False return Content layout

127

- behavior: dict, custom behavior for the array

128

129

Returns:

130

Array containing the Feather data

131

"""

132

133

def from_avro_file(file, highlevel=True, behavior=None):

134

"""

135

Read array data from Avro files.

136

137

Parameters:

138

- file: str or file-like, Avro file path or object

139

- highlevel: bool, if True return Array, if False return Content layout

140

- behavior: dict, custom behavior for the array

141

142

Returns:

143

Array containing the Avro data

144

"""

145

146

def metadata_from_parquet(path):

147

"""

148

Extract metadata from Parquet files without reading data.

149

150

Parameters:

151

- path: str, path to Parquet file

152

153

Returns:

154

dict containing Parquet metadata information

155

"""

156

```

157

158

### From Other Array Libraries

159

160

Seamless integration with popular array libraries and machine learning frameworks, preserving data structure and enabling cross-ecosystem workflows.

161

162

```python { .api }

163

def from_arrow(array, highlevel=True, behavior=None):

164

"""

165

Create an array from Apache Arrow data.

166

167

Parameters:

168

- array: pyarrow.Array or pyarrow.ChunkedArray

169

- highlevel: bool, if True return Array, if False return Content layout

170

- behavior: dict, custom behavior for the array

171

172

Returns:

173

Array containing the Arrow data

174

"""

175

176

def from_arrow_schema(schema, highlevel=True, behavior=None):

177

"""

178

Create an empty array from Apache Arrow schema.

179

180

Parameters:

181

- schema: pyarrow.Schema describing the array structure

182

- highlevel: bool, if True return Array, if False return Content layout

183

- behavior: dict, custom behavior for the array

184

185

Returns:

186

Empty Array with the specified schema

187

"""

188

189

def from_torch(array, highlevel=True, behavior=None):

190

"""

191

Create an array from PyTorch tensor.

192

193

Parameters:

194

- array: torch.Tensor to convert

195

- highlevel: bool, if True return Array, if False return Content layout

196

- behavior: dict, custom behavior for the array

197

198

Returns:

199

Array containing the PyTorch tensor data

200

"""

201

202

def from_tensorflow(array, highlevel=True, behavior=None):

203

"""

204

Create an array from TensorFlow tensor.

205

206

Parameters:

207

- array: tf.Tensor to convert

208

- highlevel: bool, if True return Array, if False return Content layout

209

- behavior: dict, custom behavior for the array

210

211

Returns:

212

Array containing the TensorFlow tensor data

213

"""

214

215

def from_raggedtensor(tensor, highlevel=True, behavior=None):

216

"""

217

Create an array from TensorFlow RaggedTensor.

218

219

Parameters:

220

- tensor: tf.RaggedTensor to convert

221

- highlevel: bool, if True return Array, if False return Content layout

222

- behavior: dict, custom behavior for the array

223

224

Returns:

225

Array containing the RaggedTensor data

226

"""

227

228

def from_jax(array, highlevel=True, behavior=None):

229

"""

230

Create an array from JAX array.

231

232

Parameters:

233

- array: jax.numpy.ndarray to convert

234

- highlevel: bool, if True return Array, if False return Content layout

235

- behavior: dict, custom behavior for the array

236

237

Returns:

238

Array containing the JAX array data

239

"""

240

241

def from_cupy(array, highlevel=True, behavior=None):

242

"""

243

Create an array from CuPy array.

244

245

Parameters:

246

- array: cupy.ndarray to convert

247

- highlevel: bool, if True return Array, if False return Content layout

248

- behavior: dict, custom behavior for the array

249

250

Returns:

251

Array containing the CuPy array data

252

"""

253

254

def from_dlpack(tensor, highlevel=True, behavior=None):

255

"""

256

Create an array from DLPack tensor.

257

258

Parameters:

259

- tensor: DLPack tensor capsule

260

- highlevel: bool, if True return Array, if False return Content layout

261

- behavior: dict, custom behavior for the array

262

263

Returns:

264

Array containing the DLPack tensor data

265

"""

266

```

267

268

### Specialized Construction

269

270

Functions for creating arrays with specific patterns or from specialized data sources common in scientific computing workflows.

271

272

```python { .api }

273

def from_categorical(array, highlevel=True, behavior=None):

274

"""

275

Create an array from categorical data representation.

276

277

Parameters:

278

- array: Categorical data structure

279

- highlevel: bool, if True return Array, if False return Content layout

280

- behavior: dict, custom behavior for the array

281

282

Returns:

283

Array with categorical data structure

284

"""

285

286

def from_rdataframe(df, highlevel=True, behavior=None):

287

"""

288

Create an array from ROOT RDataFrame.

289

290

Parameters:

291

- df: ROOT.RDataFrame object

292

- highlevel: bool, if True return Array, if False return Content layout

293

- behavior: dict, custom behavior for the array

294

295

Returns:

296

Array containing the RDataFrame data

297

"""

298

299

def zeros_like(array, highlevel=True, behavior=None):

300

"""

301

Create an array of zeros with the same structure as input.

302

303

Parameters:

304

- array: Array whose structure to copy

305

- highlevel: bool, if True return Array, if False return Content layout

306

- behavior: dict, custom behavior for the array

307

308

Returns:

309

Array filled with zeros matching input structure

310

"""

311

312

def ones_like(array, highlevel=True, behavior=None):

313

"""

314

Create an array of ones with the same structure as input.

315

316

Parameters:

317

- array: Array whose structure to copy

318

- highlevel: bool, if True return Array, if False return Content layout

319

- behavior: dict, custom behavior for the array

320

321

Returns:

322

Array filled with ones matching input structure

323

"""

324

325

def full_like(array, fill_value, highlevel=True, behavior=None):

326

"""

327

Create an array filled with a value, matching input structure.

328

329

Parameters:

330

- array: Array whose structure to copy

331

- fill_value: Value to fill the array with

332

- highlevel: bool, if True return Array, if False return Content layout

333

- behavior: dict, custom behavior for the array

334

335

Returns:

336

Array filled with fill_value matching input structure

337

"""

338

```

339

340

### Incremental Building

341

342

ArrayBuilder provides a flexible way to construct complex arrays incrementally, supporting nested structures, mixed types, and efficient memory management.

343

344

```python { .api }

345

class ArrayBuilder:

346

"""

347

Builder for incrementally constructing arrays with complex nested structures.

348

"""

349

350

def __init__(self, behavior=None):

351

"""

352

Initialize a new ArrayBuilder.

353

354

Parameters:

355

- behavior: dict, custom behavior for built arrays

356

"""

357

358

# Primitive value methods

359

def null(self):

360

"""Append a null/None value."""

361

362

def boolean(self, x):

363

"""

364

Append a boolean value.

365

366

Parameters:

367

- x: bool, boolean value to append

368

"""

369

370

def integer(self, x):

371

"""

372

Append an integer value.

373

374

Parameters:

375

- x: int, integer value to append

376

"""

377

378

def real(self, x):

379

"""

380

Append a real (float) value.

381

382

Parameters:

383

- x: float, real value to append

384

"""

385

386

def complex(self, real, imag=0):

387

"""

388

Append a complex value.

389

390

Parameters:

391

- real: float, real part

392

- imag: float, imaginary part (default 0)

393

"""

394

395

def string(self, x):

396

"""

397

Append a string value.

398

399

Parameters:

400

- x: str, string value to append

401

"""

402

403

def bytestring(self, x):

404

"""

405

Append a byte string value.

406

407

Parameters:

408

- x: bytes, byte string value to append

409

"""

410

411

def datetime(self, x):

412

"""

413

Append a datetime value.

414

415

Parameters:

416

- x: datetime, datetime value to append

417

"""

418

419

def timedelta(self, x):

420

"""

421

Append a timedelta value.

422

423

Parameters:

424

- x: timedelta, timedelta value to append

425

"""

426

427

def append(self, x):

428

"""

429

Generic method for appending various types of data.

430

431

Parameters:

432

- x: Various types (None, bool, int, float, str, Array, Record, or Python data)

433

"""

434

435

def extend(self, iterable):

436

"""

437

Append all items from an iterable.

438

439

Parameters:

440

- iterable: Iterable containing data to append

441

"""

442

443

# Nested structure methods

444

def begin_list(self):

445

"""Begin building a list (variable-length)."""

446

447

def end_list(self):

448

"""End building the current list."""

449

450

def begin_tuple(self, numfields):

451

"""

452

Begin building a tuple (fixed-length).

453

454

Parameters:

455

- numfields: int, number of fields in tuple

456

"""

457

458

def end_tuple(self):

459

"""End building the current tuple."""

460

461

def begin_record(self, name=None):

462

"""

463

Begin building a record (named fields).

464

465

Parameters:

466

- name: str, optional name for the record type

467

"""

468

469

def end_record(self):

470

"""End building the current record."""

471

472

def field(self, key):

473

"""

474

Set the field key for the next value in a record.

475

476

Parameters:

477

- key: str, field name

478

"""

479

480

def index(self, i):

481

"""

482

Set the index for the next value in a tuple.

483

484

Parameters:

485

- i: int, index position

486

"""

487

488

# Context managers for convenience

489

def list(self):

490

"""

491

Context manager for building a list.

492

493

Returns:

494

Context manager that calls begin_list/end_list

495

"""

496

497

def tuple(self, numfields):

498

"""

499

Context manager for building a tuple.

500

501

Parameters:

502

- numfields: int, number of fields in tuple

503

504

Returns:

505

Context manager that calls begin_tuple/end_tuple

506

"""

507

508

def record(self, name=None):

509

"""

510

Context manager for building a record.

511

512

Parameters:

513

- name: str, optional name for the record type

514

515

Returns:

516

Context manager that calls begin_record/end_record

517

"""

518

519

# Finalization

520

def snapshot(self):

521

"""

522

Create an Array from the current builder state.

523

524

Returns:

525

Array containing the built data

526

"""

527

```

528

529

## Usage Examples

530

531

### Basic Construction

532

533

```python

534

import awkward as ak

535

import numpy as np

536

537

# From Python lists

538

data = [[1, 2, 3], [4], [5, 6]]

539

array = ak.from_iter(data)

540

541

# From NumPy arrays

542

np_array = np.array([[1, 2], [3, 4]])

543

ak_array = ak.from_numpy(np_array)

544

545

# From JSON

546

json_data = '[{"x": [1, 2], "y": 3}, {"x": [4], "y": 5}]'

547

array = ak.from_json(json_data)

548

```

549

550

### Incremental Building

551

552

```python

553

import awkward as ak

554

555

builder = ak.ArrayBuilder()

556

557

# Build nested structure

558

with builder.list():

559

builder.integer(1)

560

builder.integer(2)

561

562

with builder.record():

563

builder.field("x")

564

builder.real(3.14)

565

builder.field("y")

566

with builder.list():

567

builder.string("hello")

568

builder.string("world")

569

570

array = builder.snapshot()

571

```

572

573

### File I/O

574

575

```python

576

import awkward as ak

577

578

# Read from Parquet

579

array = ak.from_parquet("data.parquet")

580

581

# Read from JSON file

582

with open("data.json") as f:

583

array = ak.from_json(f)

584

585

# From Arrow

586

import pyarrow as pa

587

arrow_array = pa.array([1, 2, 3])

588

ak_array = ak.from_arrow(arrow_array)

589

```