Powerful data structures for data analysis, time series, and statistics
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
The pandas.api.types module provides comprehensive type checking functionality for pandas data structures and NumPy arrays. This module is essential for data validation, type inference, and conditional operations based on data types.
# Type checking functions
from pandas.api.types import (
# Data type checking
is_bool_dtype, is_integer_dtype, is_float_dtype, is_numeric_dtype,
is_object_dtype, is_string_dtype, is_complex_dtype,
# Temporal type checking
is_datetime64_dtype, is_datetime64_any_dtype, is_datetime64_ns_dtype,
is_timedelta64_dtype, is_timedelta64_ns_dtype,
# Extension type checking
is_categorical_dtype, is_period_dtype, is_interval_dtype,
is_extension_array_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype,
# Value type checking
is_bool, is_integer, is_float, is_complex, is_number, is_scalar,
# Structure checking
is_array_like, is_list_like, is_dict_like, is_file_like, is_hashable,
is_iterator, is_named_tuple, is_re, is_re_compilable,
# Type inference and utilities
infer_dtype, pandas_dtype, is_dtype_equal,
# Categorical operations
union_categoricals,
# Extension dtypes
CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype
)# Basic numeric type detection
is_bool_dtype(arr_or_dtype) -> bool { .api }
is_integer_dtype(arr_or_dtype) -> bool { .api }
is_float_dtype(arr_or_dtype) -> bool { .api }
is_numeric_dtype(arr_or_dtype) -> bool { .api }
is_complex_dtype(arr_or_dtype) -> bool { .api }
# Specific numeric type checking
is_signed_integer_dtype(arr_or_dtype) -> bool { .api }
is_unsigned_integer_dtype(arr_or_dtype) -> bool { .api }
is_any_real_numeric_dtype(arr_or_dtype) -> bool { .api }
# Deprecated (pandas 2.1.0+)
is_int64_dtype(arr_or_dtype) -> bool { .api } # Use dtype == np.int64 instead# DateTime type detection
is_datetime64_dtype(arr_or_dtype) -> bool { .api }
is_datetime64_any_dtype(arr_or_dtype) -> bool { .api }
is_datetime64_ns_dtype(arr_or_dtype) -> bool { .api }
# TimeDelta type detection
is_timedelta64_dtype(arr_or_dtype) -> bool { .api }
is_timedelta64_ns_dtype(arr_or_dtype) -> bool { .api }
# Deprecated timezone-aware datetime checking (pandas 2.1.0+)
is_datetime64tz_dtype(arr_or_dtype) -> bool { .api } # Use isinstance(dtype, pd.DatetimeTZDtype) instead# Pandas extension types
is_categorical_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.CategoricalDtype)
is_period_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.PeriodDtype)
is_interval_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.IntervalDtype)
is_extension_array_dtype(arr_or_dtype) -> bool { .api }
# String and object types
is_object_dtype(arr_or_dtype) -> bool { .api }
is_string_dtype(arr_or_dtype) -> bool { .api }
# Sparse arrays (deprecated pandas 2.1.0+)
is_sparse(arr) -> bool { .api } # Use isinstance(dtype, pd.SparseDtype) instead# Basic scalar type checking
is_bool(obj) -> bool { .api }
is_integer(obj) -> bool { .api }
is_float(obj) -> bool { .api }
is_complex(obj) -> bool { .api }
is_number(obj) -> bool { .api }
is_scalar(obj) -> bool { .api }# Container type checking
is_array_like(obj) -> bool { .api }
is_list_like(obj) -> bool { .api }
is_dict_like(obj) -> bool { .api }
is_iterator(obj) -> bool { .api }
# Specific structure checking
is_named_tuple(obj) -> bool { .api }
is_hashable(obj) -> bool { .api }
is_file_like(obj) -> bool { .api }
# Regular expression checking
is_re(obj) -> bool { .api }
is_re_compilable(obj) -> bool { .api }# Infer the type of scalar or array-like data
infer_dtype(value, skipna: bool = True) -> str { .api }
"""
Returns string labels for detected types:
- 'string', 'bytes', 'floating', 'integer', 'mixed-integer', 'mixed-integer-float'
- 'decimal', 'complex', 'categorical', 'boolean'
- 'datetime64', 'datetime', 'date', 'timedelta64', 'timedelta', 'time', 'period'
- 'mixed', 'unknown-array'
"""
# Convert input to pandas/numpy dtype
pandas_dtype(dtype) -> DtypeObj { .api }
# Compare two dtypes for equality
is_dtype_equal(source, target) -> bool { .api }# Combine multiple categorical arrays
union_categoricals(
to_union,
sort_categories: bool = False,
ignore_order: bool = False
) -> Categorical { .api }
"""
Combine list-like of Categorical-like objects, unioning categories.
All categories must have the same dtype.
"""# Categorical data type
class CategoricalDtype(categories=None, ordered=None) { .api }
"""
Type for categorical data with categories and orderedness.
Parameters:
- categories: Index-like, optional
- ordered: bool, default False
"""
# Timezone-aware datetime type
class DatetimeTZDtype(unit='ns', tz=None) { .api }
"""
ExtensionDtype for timezone-aware datetime data.
Parameters:
- unit: str, default 'ns'
- tz: str, tzinfo, optional
"""
# Interval data type
class IntervalDtype(subtype=None, closed='right') { .api }
"""
ExtensionDtype for Interval data.
Parameters:
- subtype: numpy dtype, optional
- closed: {'left', 'right', 'both', 'neither'}, default 'right'
"""
# Period data type
class PeriodDtype(freq=None) { .api }
"""
ExtensionDtype for Period data.
Parameters:
- freq: str or DateOffset, optional
"""from typing import Union, Any
from numpy import dtype as np_dtype
from pandas.core.dtypes.base import ExtensionDtype
# Core type aliases
ArrayLike = Union[np.ndarray, 'Series', 'Index', 'ExtensionArray']
DtypeObj = Union[np_dtype, ExtensionDtype]
Dtype = Union[str, np_dtype, ExtensionDtype, type]
# Function signatures for key validation functions
def is_numeric_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ...
def is_datetime64_any_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ...
def is_categorical_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ... # Deprecated
def infer_dtype(value: Any, skipna: bool = True) -> str: ...import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype, is_datetime64_any_dtype
# Check series dtypes
numeric_series = pd.Series([1, 2, 3])
string_series = pd.Series(['a', 'b', 'c'])
datetime_series = pd.Series(pd.date_range('2023-01-01', periods=3))
assert is_numeric_dtype(numeric_series)
assert not is_numeric_dtype(string_series)
assert is_datetime64_any_dtype(datetime_series)from pandas.api.types import infer_dtype
# Infer types from mixed data
mixed_data = [1, 2.5, 3]
print(infer_dtype(mixed_data)) # 'mixed-integer-float'
string_data = ['a', 'b', 'c']
print(infer_dtype(string_data)) # 'string'
datetime_data = pd.date_range('2023-01-01', periods=3)
print(infer_dtype(datetime_data)) # 'datetime64'from pandas.api.types import CategoricalDtype, union_categoricals
# Create categorical dtype
cat_dtype = CategoricalDtype(['low', 'medium', 'high'], ordered=True)
cat_series = pd.Series(['low', 'high', 'medium'], dtype=cat_dtype)
# Combine categoricals
cat1 = pd.Categorical(['a', 'b'])
cat2 = pd.Categorical(['b', 'c'])
combined = union_categoricals([cat1, cat2])from pandas.api.types import (
is_numeric_dtype, is_string_dtype, is_datetime64_any_dtype,
is_categorical_dtype
)
def validate_dataframe_dtypes(df: pd.DataFrame) -> dict:
"""Validate and report column dtypes."""
report = {}
for col in df.columns:
if is_numeric_dtype(df[col]):
report[col] = 'numeric'
elif is_string_dtype(df[col]):
report[col] = 'string'
elif is_datetime64_any_dtype(df[col]):
report[col] = 'datetime'
elif isinstance(df[col].dtype, pd.CategoricalDtype): # Modern approach
report[col] = 'categorical'
else:
report[col] = 'other'
return reportSeveral functions in pandas.api.types have been deprecated in favor of more explicit type checking:
is_categorical_dtype() → isinstance(dtype, pd.CategoricalDtype)is_period_dtype() → isinstance(dtype, pd.PeriodDtype)is_interval_dtype() → isinstance(dtype, pd.IntervalDtype)is_datetime64tz_dtype() → isinstance(dtype, pd.DatetimeTZDtype)is_sparse() → isinstance(dtype, pd.SparseDtype)is_int64_dtype() → dtype == np.int64The modern approach provides better type safety and clearer intent.
Install with Tessl CLI
npx tessl i tessl/pypi-pandas