or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-types.mdconfiguration.mdcore-data-structures.mddata-io.mddata-manipulation.mddata-types.mderrors.mdindex.mdplotting.mdstatistics-math.mdtime-series.md

data-types.mddocs/

0

# Data Types and Missing Data

1

2

Extension data types, missing data handling, and type conversion utilities including nullable integer/boolean types, categorical data, and advanced missing value operations.

3

4

## Core Imports

5

6

```python

7

import pandas as pd

8

from pandas import isna, notna, Categorical, NA

9

```

10

11

## Capabilities

12

13

### Missing Data Detection

14

15

Functions to detect and handle missing values in pandas data structures.

16

17

```python { .api }

18

def isna(obj):

19

"""

20

Detect missing values for an array-like object.

21

22

Parameters:

23

- obj: scalar or array-like, object to check for null or missing values

24

25

Returns:

26

bool or array-like of bool, boolean mask indicating missing values

27

"""

28

29

def isnull(obj):

30

"""

31

Detect missing values for an array-like object.

32

33

Alias for isna().

34

35

Parameters:

36

- obj: scalar or array-like, object to check for null or missing values

37

38

Returns:

39

bool or array-like of bool, boolean mask indicating missing values

40

"""

41

42

def notna(obj):

43

"""

44

Detect existing (non-missing) values.

45

46

Parameters:

47

- obj: scalar or array-like, object to check for non-null values

48

49

Returns:

50

bool or array-like of bool, boolean mask indicating non-missing values

51

"""

52

53

def notnull(obj):

54

"""

55

Detect existing (non-missing) values.

56

57

Alias for notna().

58

59

Parameters:

60

- obj: scalar or array-like, object to check for non-null values

61

62

Returns:

63

bool or array-like of bool, boolean mask indicating non-missing values

64

"""

65

```

66

67

### Categorical Data

68

69

Categorical data type for efficient storage and computation of repetitive data.

70

71

```python { .api }

72

class Categorical:

73

def __init__(self, values, categories=None, ordered=None, dtype=None, fastpath=False):

74

"""

75

Represent a categorical variable in classic R / S-plus fashion.

76

77

Parameters:

78

- values: list-like, values for the categorical

79

- categories: Index-like, unique categories for this categorical

80

- ordered: bool, whether categories have meaningful order

81

- dtype: CategoricalDtype, dtype for the categorical

82

"""

83

84

def add_categories(self, new_categories, inplace=False):

85

"""Add new categories."""

86

87

def remove_categories(self, removals, inplace=False):

88

"""Remove categories."""

89

90

def rename_categories(self, new_categories, inplace=False):

91

"""Rename categories."""

92

93

def reorder_categories(self, new_categories, ordered=None, inplace=False):

94

"""Reorder categories."""

95

96

def remove_unused_categories(self, inplace=False):

97

"""Remove categories not in use."""

98

99

def set_categories(self, new_categories, ordered=None, rename=False, inplace=False):

100

"""Set categories to specified new_categories."""

101

102

def as_ordered(self, inplace=False):

103

"""Set Categorical to be ordered."""

104

105

def as_unordered(self, inplace=False):

106

"""Set Categorical to be unordered."""

107

108

@property

109

def categories(self):

110

"""The categories of this categorical."""

111

112

@property

113

def ordered(self):

114

"""Whether the categories have an ordered relationship."""

115

116

@property

117

def codes(self):

118

"""The category codes of this categorical."""

119

120

def value_counts(self, sort=True, ascending=False, dropna=True):

121

"""Return counts of each category."""

122

123

class CategoricalDtype:

124

def __init__(self, categories=None, ordered=None):

125

"""

126

Type for categorical data with categories and ordered attributes.

127

128

Parameters:

129

- categories: sequence, categories for the dtype

130

- ordered: bool, whether the categories are ordered

131

"""

132

133

@property

134

def categories(self):

135

"""Categorical categories."""

136

137

@property

138

def ordered(self):

139

"""Whether categories are ordered."""

140

```

141

142

### Extension Data Types

143

144

Specialized data types that extend pandas' capabilities beyond NumPy types.

145

146

```python { .api }

147

class StringDtype:

148

def __init__(self, storage=None):

149

"""

150

Extension dtype for string data.

151

152

Parameters:

153

- storage: str, storage type ('python' or 'pyarrow')

154

"""

155

156

class BooleanDtype:

157

def __init__(self):

158

"""Extension dtype for boolean data with missing value support."""

159

160

class Int8Dtype:

161

def __init__(self):

162

"""Extension dtype for nullable 8-bit integer data."""

163

164

class Int16Dtype:

165

def __init__(self):

166

"""Extension dtype for nullable 16-bit integer data."""

167

168

class Int32Dtype:

169

def __init__(self):

170

"""Extension dtype for nullable 32-bit integer data."""

171

172

class Int64Dtype:

173

def __init__(self):

174

"""Extension dtype for nullable 64-bit integer data."""

175

176

class UInt8Dtype:

177

def __init__(self):

178

"""Extension dtype for nullable 8-bit unsigned integer data."""

179

180

class UInt16Dtype:

181

def __init__(self):

182

"""Extension dtype for nullable 16-bit unsigned integer data."""

183

184

class UInt32Dtype:

185

def __init__(self):

186

"""Extension dtype for nullable 32-bit unsigned integer data."""

187

188

class UInt64Dtype:

189

def __init__(self):

190

"""Extension dtype for nullable 64-bit unsigned integer data."""

191

192

class Float32Dtype:

193

def __init__(self):

194

"""Extension dtype for nullable 32-bit floating point data."""

195

196

class Float64Dtype:

197

def __init__(self):

198

"""Extension dtype for nullable 64-bit floating point data."""

199

200

class PeriodDtype:

201

def __init__(self, freq=None):

202

"""

203

Extension dtype for Period data.

204

205

Parameters:

206

- freq: str or DateOffset, frequency of the Period

207

"""

208

209

class IntervalDtype:

210

def __init__(self, subtype=None, closed=None):

211

"""

212

Extension dtype for Interval data.

213

214

Parameters:

215

- subtype: str or numpy dtype, subtype of interval

216

- closed: str, whether intervals are closed ('left', 'right', 'both', 'neither')

217

"""

218

219

class DatetimeTZDtype:

220

def __init__(self, tz=None, unit='ns'):

221

"""

222

Extension dtype for timezone-aware datetime data.

223

224

Parameters:

225

- tz: str or tzinfo, timezone information

226

- unit: str, unit of precision ('ns', 'us', 'ms', 's')

227

"""

228

229

class SparseDtype:

230

def __init__(self, dtype=numpy.float64, fill_value=None):

231

"""

232

Extension dtype for sparse data.

233

234

Parameters:

235

- dtype: str, numpy.dtype, ExtensionDtype, the dtype of non-sparse values

236

- fill_value: scalar, value used for sparse locations

237

"""

238

```

239

240

### Arrow Integration

241

242

Apache Arrow-backed data types for improved performance and interoperability.

243

244

```python { .api }

245

class ArrowDtype:

246

def __init__(self, pyarrow_dtype):

247

"""

248

Extension dtype for PyArrow data types.

249

250

Parameters:

251

- pyarrow_dtype: pyarrow.DataType, PyArrow data type

252

"""

253

254

@property

255

def pyarrow_dtype(self):

256

"""Return the PyArrow data type."""

257

258

@property

259

def name(self):

260

"""Return the name of the data type."""

261

262

@property

263

def type(self):

264

"""Return the scalar type for the array."""

265

```

266

267

### Array Creation and Conversion

268

269

Functions to create pandas arrays and convert between different array types.

270

271

```python { .api }

272

def array(data, dtype=None, copy=True):

273

"""

274

Create an ExtensionArray from the input data.

275

276

Parameters:

277

- data: Sequence, 1-dimensional list, Series, Index, or ExtensionArray

278

- dtype: str, np.dtype, or ExtensionDtype, dtype for the array

279

- copy: bool, whether to copy the data

280

281

Returns:

282

ExtensionArray, newly created array

283

"""

284

285

def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None):

286

"""

287

Encode the object as an enumerated type or categorical variable.

288

289

Parameters:

290

- values: sequence, 1-d array-like

291

- sort: bool, sort uniques

292

- na_sentinel: int, value to mark missing values

293

- use_na_sentinel: bool, use na_sentinel for missing values

294

- size_hint: int, hint to the hashtable sizer

295

296

Returns:

297

tuple of (codes, uniques)

298

"""

299

300

def unique(values):

301

"""

302

Return unique values based on a hash table.

303

304

Parameters:

305

- values: 1d array-like

306

307

Returns:

308

ndarray or ExtensionArray, unique values

309

"""

310

311

def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True):

312

"""

313

Compute a histogram of the 1D array values.

314

315

Parameters:

316

- values: 1d array-like

317

- sort: bool, sort by values

318

- ascending: bool, sort in ascending order

319

- normalize: bool, return relative frequencies

320

- bins: int, rather than count values, group them into half-open bins

321

- dropna: bool, don't include counts of NaN

322

323

Returns:

324

Series

325

"""

326

```

327

328

### Type Checking Functions

329

330

Functions to check data types and properties of pandas objects.

331

332

```python { .api }

333

# Available in pandas.api.types

334

def infer_dtype(value, skipna=True):

335

"""

336

Efficiently infer the type of a passed val.

337

338

Parameters:

339

- value: object, object whose type is to be inferred

340

- skipna: bool, ignore NaN values when inferring type

341

342

Returns:

343

str, type of the object

344

"""

345

346

def is_any_real_numeric_dtype(arr_or_dtype):

347

"""Check whether the provided array or dtype is a real number data type."""

348

349

def is_bool_dtype(arr_or_dtype):

350

"""Check whether the provided array or dtype is a boolean data type."""

351

352

def is_categorical_dtype(arr_or_dtype):

353

"""Check whether the provided array or dtype is Categorical data type."""

354

355

def is_complex_dtype(arr_or_dtype):

356

"""Check whether the provided array or dtype is a complex data type."""

357

358

def is_datetime64_any_dtype(arr_or_dtype):

359

"""Check whether the provided array or dtype is datetime64 data type."""

360

361

def is_datetime64_dtype(arr_or_dtype):

362

"""Check whether the provided array or dtype is datetime64[ns] data type."""

363

364

def is_datetime64_ns_dtype(arr_or_dtype):

365

"""Check whether the provided array or dtype is datetime64[ns] data type."""

366

367

def is_datetime64tz_dtype(arr_or_dtype):

368

"""Check whether the provided array or dtype has a timezone-aware datetime64 data type."""

369

370

def is_extension_array_dtype(arr_or_dtype):

371

"""Check whether the provided array or dtype is an extension data type."""

372

373

def is_float_dtype(arr_or_dtype):

374

"""Check whether the provided array or dtype is a float data type."""

375

376

def is_integer_dtype(arr_or_dtype):

377

"""Check whether the provided array or dtype is an integer data type."""

378

379

def is_interval_dtype(arr_or_dtype):

380

"""Check whether the provided array or dtype is Interval data type."""

381

382

def is_numeric_dtype(arr_or_dtype):

383

"""Check whether the provided array or dtype is a numeric data type."""

384

385

def is_object_dtype(arr_or_dtype):

386

"""Check whether the provided array or dtype is object data type."""

387

388

def is_period_dtype(arr_or_dtype):

389

"""Check whether the provided array or dtype is Period data type."""

390

391

def is_signed_integer_dtype(arr_or_dtype):

392

"""Check whether the provided array or dtype is a signed integer data type."""

393

394

def is_string_dtype(arr_or_dtype):

395

"""Check whether the provided array or dtype is a string data type."""

396

397

def is_timedelta64_dtype(arr_or_dtype):

398

"""Check whether the provided array or dtype is timedelta64 data type."""

399

400

def is_timedelta64_ns_dtype(arr_or_dtype):

401

"""Check whether the provided array or dtype is timedelta64[ns] data type."""

402

403

def is_unsigned_integer_dtype(arr_or_dtype):

404

"""Check whether the provided array or dtype is an unsigned integer data type."""

405

406

def pandas_dtype(dtype):

407

"""

408

Convert input into a pandas only dtype object or a numpy dtype object.

409

410

Parameters:

411

- dtype: object to be converted

412

413

Returns:

414

np.dtype or pandas dtype

415

"""

416

```

417

418

### Extension Arrays

419

420

Specialized array classes that provide the foundation for extension data types.

421

422

```python { .api }

423

class BooleanArray:

424

def __init__(self, values, mask, copy=False):

425

"""

426

Array of boolean (True/False) data with missing values.

427

428

Parameters:

429

- values: numpy.ndarray, boolean array

430

- mask: numpy.ndarray, boolean array indicating missing values

431

- copy: bool, copy the input arrays

432

"""

433

434

class IntegerArray:

435

def __init__(self, values, mask, copy=False):

436

"""

437

Array of integer values with missing value support.

438

439

Parameters:

440

- values: numpy.ndarray, integer array

441

- mask: numpy.ndarray, boolean array indicating missing values

442

- copy: bool, copy the input arrays

443

"""

444

445

class FloatingArray:

446

def __init__(self, values, mask, copy=False):

447

"""

448

Array of floating point values with missing value support.

449

450

Parameters:

451

- values: numpy.ndarray, float array

452

- mask: numpy.ndarray, boolean array indicating missing values

453

- copy: bool, copy the input arrays

454

"""

455

456

class StringArray:

457

def __init__(self, values, copy=False):

458

"""

459

Extension array for string data in a pandas Series or DataFrame.

460

461

Parameters:

462

- values: array-like, sequence of strings

463

- copy: bool, copy the input array

464

"""

465

466

class IntervalArray:

467

def __init__(self, data, closed=None, dtype=None, copy=False, verify_integrity=True):

468

"""

469

Pandas array for interval data that are closed on the same side.

470

471

Parameters:

472

- data: array-like (1-dimensional), array of Interval objects

473

- closed: str, whether intervals are closed ('left', 'right', 'both', 'neither')

474

- dtype: IntervalDtype, dtype for the IntervalArray

475

- copy: bool, copy the input data

476

- verify_integrity: bool, verify data integrity

477

"""

478

479

class PeriodArray:

480

def __init__(self, values, dtype=None, freq=None, copy=False):

481

"""

482

Pandas array for storing Period data.

483

484

Parameters:

485

- values: Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]

486

- dtype: PeriodDtype, optional

487

- freq: str or period object, frequency

488

- copy: bool, copy the input data

489

"""

490

491

class DatetimeArray:

492

def __init__(self, values, dtype=None, freq=None, copy=False):

493

"""

494

Pandas array for datetime64 data.

495

496

Parameters:

497

- values: Series, Index, DatetimeArray, ndarray

498

- dtype: numpy.dtype or DatetimeTZDtype

499

- freq: str or Offset

500

- copy: bool, copy the input data

501

"""

502

503

class TimedeltaArray:

504

def __init__(self, values, dtype=None, freq=None, copy=False):

505

"""

506

Pandas array for timedelta64 data.

507

508

Parameters:

509

- values: array-like, sequence of timedelta-like objects

510

- dtype: numpy.dtype

511

- freq: str or Offset

512

- copy: bool, copy the input data

513

"""

514

515

class SparseArray:

516

def __init__(self, data, sparse_index=None, fill_value=None, kind='integer', dtype=None, copy=False):

517

"""

518

An ExtensionArray for storing sparse data.

519

520

Parameters:

521

- data: array-like or scalar

522

- sparse_index: SparseIndex, locations of non-fill_value entries

523

- fill_value: scalar, entries matching this value are omitted from representation

524

- kind: str, sparse index kind ('integer' or 'block')

525

- dtype: numpy.dtype

526

- copy: bool, copy the input data

527

"""

528

```

529

530

## Advanced Type Operations

531

532

### Categorical Utilities

533

534

```python { .api }

535

def union_categoricals(to_union, sort_categories=False, ignore_order=False):

536

"""

537

Combine list-like of Categorical-like into a single Categorical.

538

539

Parameters:

540

- to_union: list-like, Categorical, CategoricalIndex, or Series with categorical dtype

541

- sort_categories: bool, sort resulting categories

542

- ignore_order: bool, ignore category order

543

544

Returns:

545

Categorical

546

"""

547

548

def concat_categoricals(to_concat, axis=0, join='outer', ignore_index=False):

549

"""

550

Concatenate Categoricals.

551

552

Parameters:

553

- to_concat: list of Categoricals

554

- axis: int, axis to concatenate along

555

- join: str, join method for categories

556

- ignore_index: bool, reset index in result

557

558

Returns:

559

Categorical

560

"""

561

```

562

563

### Nullable Integer Construction

564

565

```python { .api }

566

# Constructor functions for nullable integer arrays

567

def Int8Array(values, mask=None, copy=False):

568

"""Construct Int8Array."""

569

570

def Int16Array(values, mask=None, copy=False):

571

"""Construct Int16Array."""

572

573

def Int32Array(values, mask=None, copy=False):

574

"""Construct Int32Array."""

575

576

def Int64Array(values, mask=None, copy=False):

577

"""Construct Int64Array."""

578

579

def UInt8Array(values, mask=None, copy=False):

580

"""Construct UInt8Array."""

581

582

def UInt16Array(values, mask=None, copy=False):

583

"""Construct UInt16Array."""

584

585

def UInt32Array(values, mask=None, copy=False):

586

"""Construct UInt32Array."""

587

588

def UInt64Array(values, mask=None, copy=False):

589

"""Construct UInt64Array."""

590

```

591

592

## Types

593

594

```python { .api }

595

# Missing value sentinels

596

NA: object # Pandas missing value for extension dtypes

597

NaT: object # Not-a-Time for datetime/timedelta

598

599

# Extension dtype base classes

600

class ExtensionDtype:

601

"""Base class for custom data types."""

602

603

@property

604

def name(self):

605

"""Return a string representation of the dtype."""

606

607

@property

608

def type(self):

609

"""Return the scalar type for the array."""

610

611

@classmethod

612

def construct_from_string(cls, string):

613

"""Construct this type from a string."""

614

615

# Categorical ordering

616

CategoricalOrdering = bool

617

618

# Dtype inference results

619

InferredType = Literal[

620

'boolean', 'integer', 'floating', 'complex', 'string', 'unicode',

621

'mixed', 'mixed-integer', 'mixed-integer-float', 'decimal',

622

'datetime', 'datetime64', 'timedelta', 'timedelta64',

623

'period', 'categorical', 'interval', 'bytes', 'empty'

624

]

625

626

# Arrow dtype string representations

627

ArrowDtypeStr = str # PyArrow dtype string like 'int64[pyarrow]'

628

629

# Sparse array kinds

630

SparseKind = Literal['integer', 'block']

631

```