or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-types.mdconfiguration.mdcore-data-structures.mddata-io.mddata-manipulation.mddata-types.mderrors.mdindex.mdplotting.mdstatistics-math.mdtime-series.md

api-types.mddocs/

0

# pandas.api.types - Type Checking and Data Validation

1

2

The `pandas.api.types` module provides comprehensive type checking functionality for pandas data structures and NumPy arrays. This module is essential for data validation, type inference, and conditional operations based on data types.

3

4

## Core Imports

5

6

```python

7

# Type checking functions

8

from pandas.api.types import (

9

# Data type checking

10

is_bool_dtype, is_integer_dtype, is_float_dtype, is_numeric_dtype,

11

is_object_dtype, is_string_dtype, is_complex_dtype,

12

13

# Temporal type checking

14

is_datetime64_dtype, is_datetime64_any_dtype, is_datetime64_ns_dtype,

15

is_timedelta64_dtype, is_timedelta64_ns_dtype,

16

17

# Extension type checking

18

is_categorical_dtype, is_period_dtype, is_interval_dtype,

19

is_extension_array_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype,

20

21

# Value type checking

22

is_bool, is_integer, is_float, is_complex, is_number, is_scalar,

23

24

# Structure checking

25

is_array_like, is_list_like, is_dict_like, is_file_like, is_hashable,

26

is_iterator, is_named_tuple, is_re, is_re_compilable,

27

28

# Type inference and utilities

29

infer_dtype, pandas_dtype, is_dtype_equal,

30

31

# Categorical operations

32

union_categoricals,

33

34

# Extension dtypes

35

CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype

36

)

37

```

38

39

## Data Type Checking

40

41

### Numeric Type Checking

42

43

```python

44

# Basic numeric type detection

45

is_bool_dtype(arr_or_dtype) -> bool { .api }

46

is_integer_dtype(arr_or_dtype) -> bool { .api }

47

is_float_dtype(arr_or_dtype) -> bool { .api }

48

is_numeric_dtype(arr_or_dtype) -> bool { .api }

49

is_complex_dtype(arr_or_dtype) -> bool { .api }

50

51

# Specific numeric type checking

52

is_signed_integer_dtype(arr_or_dtype) -> bool { .api }

53

is_unsigned_integer_dtype(arr_or_dtype) -> bool { .api }

54

is_any_real_numeric_dtype(arr_or_dtype) -> bool { .api }

55

56

# Deprecated (pandas 2.1.0+)

57

is_int64_dtype(arr_or_dtype) -> bool { .api } # Use dtype == np.int64 instead

58

```

59

60

### Temporal Type Checking

61

62

```python

63

# DateTime type detection

64

is_datetime64_dtype(arr_or_dtype) -> bool { .api }

65

is_datetime64_any_dtype(arr_or_dtype) -> bool { .api }

66

is_datetime64_ns_dtype(arr_or_dtype) -> bool { .api }

67

68

# TimeDelta type detection

69

is_timedelta64_dtype(arr_or_dtype) -> bool { .api }

70

is_timedelta64_ns_dtype(arr_or_dtype) -> bool { .api }

71

72

# Deprecated timezone-aware datetime checking (pandas 2.1.0+)

73

is_datetime64tz_dtype(arr_or_dtype) -> bool { .api } # Use isinstance(dtype, pd.DatetimeTZDtype) instead

74

```

75

76

### Extension Type Checking

77

78

```python

79

# Pandas extension types

80

is_categorical_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.CategoricalDtype)

81

is_period_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.PeriodDtype)

82

is_interval_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.IntervalDtype)

83

is_extension_array_dtype(arr_or_dtype) -> bool { .api }

84

85

# String and object types

86

is_object_dtype(arr_or_dtype) -> bool { .api }

87

is_string_dtype(arr_or_dtype) -> bool { .api }

88

89

# Sparse arrays (deprecated pandas 2.1.0+)

90

is_sparse(arr) -> bool { .api } # Use isinstance(dtype, pd.SparseDtype) instead

91

```

92

93

## Value Type Checking

94

95

### Scalar Type Detection

96

97

```python

98

# Basic scalar type checking

99

is_bool(obj) -> bool { .api }

100

is_integer(obj) -> bool { .api }

101

is_float(obj) -> bool { .api }

102

is_complex(obj) -> bool { .api }

103

is_number(obj) -> bool { .api }

104

is_scalar(obj) -> bool { .api }

105

```

106

107

### Collection Type Detection

108

109

```python

110

# Container type checking

111

is_array_like(obj) -> bool { .api }

112

is_list_like(obj) -> bool { .api }

113

is_dict_like(obj) -> bool { .api }

114

is_iterator(obj) -> bool { .api }

115

116

# Specific structure checking

117

is_named_tuple(obj) -> bool { .api }

118

is_hashable(obj) -> bool { .api }

119

is_file_like(obj) -> bool { .api }

120

121

# Regular expression checking

122

is_re(obj) -> bool { .api }

123

is_re_compilable(obj) -> bool { .api }

124

```

125

126

## Type Inference and Utilities

127

128

### Data Type Inference

129

130

```python

131

# Infer the type of scalar or array-like data

132

infer_dtype(value, skipna: bool = True) -> str { .api }

133

"""

134

Returns string labels for detected types:

135

- 'string', 'bytes', 'floating', 'integer', 'mixed-integer', 'mixed-integer-float'

136

- 'decimal', 'complex', 'categorical', 'boolean'

137

- 'datetime64', 'datetime', 'date', 'timedelta64', 'timedelta', 'time', 'period'

138

- 'mixed', 'unknown-array'

139

"""

140

141

# Convert input to pandas/numpy dtype

142

pandas_dtype(dtype) -> DtypeObj { .api }

143

144

# Compare two dtypes for equality

145

is_dtype_equal(source, target) -> bool { .api }

146

```

147

148

## Categorical Operations

149

150

### Categorical Data Manipulation

151

152

```python

153

# Combine multiple categorical arrays

154

union_categoricals(

155

to_union,

156

sort_categories: bool = False,

157

ignore_order: bool = False

158

) -> Categorical { .api }

159

"""

160

Combine list-like of Categorical-like objects, unioning categories.

161

All categories must have the same dtype.

162

"""

163

```

164

165

## Extension Dtype Classes

166

167

### Core Extension Dtypes

168

169

```python

170

# Categorical data type

171

class CategoricalDtype(categories=None, ordered=None) { .api }

172

"""

173

Type for categorical data with categories and orderedness.

174

Parameters:

175

- categories: Index-like, optional

176

- ordered: bool, default False

177

"""

178

179

# Timezone-aware datetime type

180

class DatetimeTZDtype(unit='ns', tz=None) { .api }

181

"""

182

ExtensionDtype for timezone-aware datetime data.

183

Parameters:

184

- unit: str, default 'ns'

185

- tz: str, tzinfo, optional

186

"""

187

188

# Interval data type

189

class IntervalDtype(subtype=None, closed='right') { .api }

190

"""

191

ExtensionDtype for Interval data.

192

Parameters:

193

- subtype: numpy dtype, optional

194

- closed: {'left', 'right', 'both', 'neither'}, default 'right'

195

"""

196

197

# Period data type

198

class PeriodDtype(freq=None) { .api }

199

"""

200

ExtensionDtype for Period data.

201

Parameters:

202

- freq: str or DateOffset, optional

203

"""

204

```

205

206

## Type Definitions

207

208

```python

209

from typing import Union, Any

210

from numpy import dtype as np_dtype

211

from pandas.core.dtypes.base import ExtensionDtype

212

213

# Core type aliases

214

ArrayLike = Union[np.ndarray, 'Series', 'Index', 'ExtensionArray']

215

DtypeObj = Union[np_dtype, ExtensionDtype]

216

Dtype = Union[str, np_dtype, ExtensionDtype, type]

217

218

# Function signatures for key validation functions

219

def is_numeric_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ...

220

def is_datetime64_any_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ...

221

def is_categorical_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ... # Deprecated

222

def infer_dtype(value: Any, skipna: bool = True) -> str: ...

223

```

224

225

## Usage Examples

226

227

### Basic Type Checking

228

229

```python

230

import pandas as pd

231

import numpy as np

232

from pandas.api.types import is_numeric_dtype, is_datetime64_any_dtype

233

234

# Check series dtypes

235

numeric_series = pd.Series([1, 2, 3])

236

string_series = pd.Series(['a', 'b', 'c'])

237

datetime_series = pd.Series(pd.date_range('2023-01-01', periods=3))

238

239

assert is_numeric_dtype(numeric_series)

240

assert not is_numeric_dtype(string_series)

241

assert is_datetime64_any_dtype(datetime_series)

242

```

243

244

### Type Inference

245

246

```python

247

from pandas.api.types import infer_dtype

248

249

# Infer types from mixed data

250

mixed_data = [1, 2.5, 3]

251

print(infer_dtype(mixed_data)) # 'mixed-integer-float'

252

253

string_data = ['a', 'b', 'c']

254

print(infer_dtype(string_data)) # 'string'

255

256

datetime_data = pd.date_range('2023-01-01', periods=3)

257

print(infer_dtype(datetime_data)) # 'datetime64'

258

```

259

260

### Extension Dtype Usage

261

262

```python

263

from pandas.api.types import CategoricalDtype, union_categoricals

264

265

# Create categorical dtype

266

cat_dtype = CategoricalDtype(['low', 'medium', 'high'], ordered=True)

267

cat_series = pd.Series(['low', 'high', 'medium'], dtype=cat_dtype)

268

269

# Combine categoricals

270

cat1 = pd.Categorical(['a', 'b'])

271

cat2 = pd.Categorical(['b', 'c'])

272

combined = union_categoricals([cat1, cat2])

273

```

274

275

### Data Validation Pipeline

276

277

```python

278

from pandas.api.types import (

279

is_numeric_dtype, is_string_dtype, is_datetime64_any_dtype,

280

is_categorical_dtype

281

)

282

283

def validate_dataframe_dtypes(df: pd.DataFrame) -> dict:

284

"""Validate and report column dtypes."""

285

report = {}

286

287

for col in df.columns:

288

if is_numeric_dtype(df[col]):

289

report[col] = 'numeric'

290

elif is_string_dtype(df[col]):

291

report[col] = 'string'

292

elif is_datetime64_any_dtype(df[col]):

293

report[col] = 'datetime'

294

elif isinstance(df[col].dtype, pd.CategoricalDtype): # Modern approach

295

report[col] = 'categorical'

296

else:

297

report[col] = 'other'

298

299

return report

300

```

301

302

## Migration Notes

303

304

Several functions in pandas.api.types have been deprecated in favor of more explicit type checking:

305

306

- `is_categorical_dtype()``isinstance(dtype, pd.CategoricalDtype)`

307

- `is_period_dtype()``isinstance(dtype, pd.PeriodDtype)`

308

- `is_interval_dtype()``isinstance(dtype, pd.IntervalDtype)`

309

- `is_datetime64tz_dtype()``isinstance(dtype, pd.DatetimeTZDtype)`

310

- `is_sparse()``isinstance(dtype, pd.SparseDtype)`

311

- `is_int64_dtype()``dtype == np.int64`

312

313

The modern approach provides better type safety and clearer intent.

314

315

## See Also

316

317

- [pandas.DataFrame.dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) - Access column dtypes

318

- [pandas.Series.dtype](https://pandas.pydata.org/docs/reference/api/pandas.Series.dtype.html) - Access series dtype

319

- [pandas.core.dtypes](https://pandas.pydata.org/docs/reference/api/pandas.core.dtypes.html) - Core dtype functionality

320

- [numpy.dtype](https://numpy.org/doc/stable/reference/arrays.dtypes.html) - NumPy dtype reference