GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise manipulating data
—
cuDF provides comprehensive type checking utilities for validating and working with GPU data types. The type system extends pandas' type checking to handle cuDF-specific types including nested data structures and GPU-accelerated dtypes.
# Main type utilities
from cudf.api.types import dtype
# Data type checking functions
from cudf.api.types import (
is_numeric_dtype, is_string_dtype, is_integer_dtype, is_float_dtype,
is_bool_dtype, is_categorical_dtype, is_datetime64_dtype, is_timedelta64_dtype
)
# cuDF-specific type checking
from cudf.api.types import (
is_decimal_dtype, is_list_dtype, is_struct_dtype, is_interval_dtype
)
# Value type checking
from cudf.api.types import is_scalar, is_list_likeCore utilities for working with cuDF data types and conversions.
def dtype(dtype_obj) -> cudf.core.dtypes.ExtensionDtype:
"""
Convert input to cuDF-compatible data type
Normalizes various dtype specifications into cuDF ExtensionDtype objects.
Handles pandas dtypes, numpy dtypes, and cuDF-specific extension types.
Parameters:
dtype_obj: str, numpy.dtype, pandas.ExtensionDtype, or cuDF ExtensionDtype
Data type specification to convert
Returns:
cudf.core.dtypes.ExtensionDtype: Normalized cuDF data type
Raises:
TypeError: If dtype cannot be converted to cuDF-compatible type
Examples:
# String dtype specifications
dt = cudf.api.types.dtype('int64')
dt = cudf.api.types.dtype('float32')
dt = cudf.api.types.dtype('category')
# NumPy dtype conversion
import numpy as np
dt = cudf.api.types.dtype(np.dtype('datetime64[ns]'))
# cuDF extension types
dt = cudf.api.types.dtype(cudf.ListDtype('int32'))
dt = cudf.api.types.dtype(cudf.Decimal64Dtype(10, 2))
# Pandas compatibility
import pandas as pd
dt = cudf.api.types.dtype(pd.CategoricalDtype(['a', 'b', 'c']))
"""Functions for checking standard data types with GPU acceleration.
def is_numeric_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is numeric
Returns True for integer, float, complex, and decimal dtypes.
Compatible with cuDF extension types and GPU arrays.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is numeric, False otherwise
Examples:
# Check Series dtype
s_int = cudf.Series([1, 2, 3])
assert cudf.api.types.is_numeric_dtype(s_int) # True
s_str = cudf.Series(['a', 'b', 'c'])
assert not cudf.api.types.is_numeric_dtype(s_str) # False
# Check dtype directly
assert cudf.api.types.is_numeric_dtype('int64') # True
assert cudf.api.types.is_numeric_dtype('float32') # True
assert not cudf.api.types.is_numeric_dtype('object') # False
# cuDF decimal types
decimal_dtype = cudf.Decimal64Dtype(10, 2)
assert cudf.api.types.is_numeric_dtype(decimal_dtype) # True
"""
def is_string_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is string
Returns True for string/object dtypes that contain text data.
Handles cuDF string columns and object columns with string data.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype contains string data, False otherwise
Examples:
# String Series
s_str = cudf.Series(['hello', 'world'])
assert cudf.api.types.is_string_dtype(s_str) # True
# Object Series with strings
s_obj = cudf.Series(['a', 'b'], dtype='object')
assert cudf.api.types.is_string_dtype(s_obj) # True
# Non-string data
s_int = cudf.Series([1, 2, 3])
assert not cudf.api.types.is_string_dtype(s_int) # False
# Check dtype string
assert cudf.api.types.is_string_dtype('object') # True
assert not cudf.api.types.is_string_dtype('int64') # False
"""
def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is integer
Returns True for signed and unsigned integer dtypes of all bit widths.
Excludes floating-point and other numeric types.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is integer, False otherwise
Examples:
# Integer Series
s_int32 = cudf.Series([1, 2, 3], dtype='int32')
assert cudf.api.types.is_integer_dtype(s_int32) # True
s_uint64 = cudf.Series([1, 2, 3], dtype='uint64')
assert cudf.api.types.is_integer_dtype(s_uint64) # True
# Non-integer numeric types
s_float = cudf.Series([1.0, 2.0, 3.0])
assert not cudf.api.types.is_integer_dtype(s_float) # False
# Check various integer dtypes
assert cudf.api.types.is_integer_dtype('int8') # True
assert cudf.api.types.is_integer_dtype('uint32') # True
assert not cudf.api.types.is_integer_dtype('float64') # False
"""
def is_float_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is floating point
Returns True for single and double precision floating-point dtypes.
Excludes integer, decimal, and other numeric types.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is floating point, False otherwise
Examples:
# Float Series
s_float32 = cudf.Series([1.1, 2.2, 3.3], dtype='float32')
assert cudf.api.types.is_float_dtype(s_float32) # True
s_float64 = cudf.Series([1.0, 2.0, 3.0]) # Default float64
assert cudf.api.types.is_float_dtype(s_float64) # True
# Non-float types
s_int = cudf.Series([1, 2, 3])
assert not cudf.api.types.is_float_dtype(s_int) # False
# Check dtype strings
assert cudf.api.types.is_float_dtype('float32') # True
assert cudf.api.types.is_float_dtype('float64') # True
assert not cudf.api.types.is_float_dtype('int32') # False
"""
def is_bool_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is boolean
Returns True for boolean dtypes. Handles cuDF boolean columns
and boolean masks used in filtering operations.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is boolean, False otherwise
Examples:
# Boolean Series
s_bool = cudf.Series([True, False, True])
assert cudf.api.types.is_bool_dtype(s_bool) # True
# Boolean mask from comparison
s_int = cudf.Series([1, 2, 3])
mask = s_int > 1 # Boolean Series
assert cudf.api.types.is_bool_dtype(mask) # True
# Non-boolean types
assert not cudf.api.types.is_bool_dtype(s_int) # False
# Check dtype
assert cudf.api.types.is_bool_dtype('bool') # True
assert not cudf.api.types.is_bool_dtype('int64') # False
"""
def is_categorical_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is categorical
Returns True for cuDF categorical dtypes and pandas CategoricalDtype.
Handles both ordered and unordered categorical data.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is categorical, False otherwise
Examples:
# Categorical Series
categories = ['red', 'blue', 'green']
s_cat = cudf.Series(['red', 'blue', 'red'], dtype='category')
assert cudf.api.types.is_categorical_dtype(s_cat) # True
# CategoricalIndex
idx_cat = cudf.CategoricalIndex(['a', 'b', 'c'])
assert cudf.api.types.is_categorical_dtype(idx_cat) # True
# Non-categorical
s_str = cudf.Series(['red', 'blue', 'green'])
assert not cudf.api.types.is_categorical_dtype(s_str) # False
# Check CategoricalDtype
cat_dtype = cudf.CategoricalDtype(categories)
assert cudf.api.types.is_categorical_dtype(cat_dtype) # True
"""Specialized functions for temporal data types.
def is_datetime64_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is datetime64
Returns True for datetime64 dtypes with any time unit resolution.
Handles cuDF DatetimeIndex and datetime columns.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is datetime64, False otherwise
Examples:
# Datetime Series
dates = cudf.to_datetime(['2023-01-01', '2023-01-02'])
assert cudf.api.types.is_datetime64_dtype(dates) # True
# DatetimeIndex
date_idx = cudf.DatetimeIndex(['2023-01-01', '2023-01-02'])
assert cudf.api.types.is_datetime64_dtype(date_idx) # True
# Non-datetime types
s_str = cudf.Series(['2023-01-01', '2023-01-02']) # String, not parsed
assert not cudf.api.types.is_datetime64_dtype(s_str) # False
# Check dtype strings
assert cudf.api.types.is_datetime64_dtype('datetime64[ns]') # True
assert cudf.api.types.is_datetime64_dtype('datetime64[ms]') # True
assert not cudf.api.types.is_datetime64_dtype('int64') # False
"""
def is_timedelta64_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is timedelta64
Returns True for timedelta64 dtypes representing time durations.
Handles cuDF TimedeltaIndex and timedelta columns.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is timedelta64, False otherwise
Examples:
# Timedelta Series
deltas = cudf.Series(['1 day', '2 hours', '30 minutes'])
deltas = cudf.to_timedelta(deltas)
assert cudf.api.types.is_timedelta64_dtype(deltas) # True
# TimedeltaIndex
td_idx = cudf.TimedeltaIndex(['1D', '2H', '30min'])
assert cudf.api.types.is_timedelta64_dtype(td_idx) # True
# Computed timedeltas
date1 = cudf.to_datetime('2023-01-02')
date2 = cudf.to_datetime('2023-01-01')
diff = date1 - date2 # Timedelta
assert cudf.api.types.is_timedelta64_dtype(diff) # True
# Check dtype
assert cudf.api.types.is_timedelta64_dtype('timedelta64[ns]') # True
"""Functions for checking cuDF-specific extension data types.
def is_decimal_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is decimal
Returns True for cuDF decimal dtypes (Decimal32, Decimal64, Decimal128).
These provide exact decimal arithmetic without floating-point errors.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is decimal, False otherwise
Examples:
# Decimal Series
decimal_dtype = cudf.Decimal64Dtype(precision=10, scale=2)
s_decimal = cudf.Series([1.23, 4.56], dtype=decimal_dtype)
assert cudf.api.types.is_decimal_dtype(s_decimal) # True
# Different decimal precisions
dec32 = cudf.Decimal32Dtype(7, 2)
dec128 = cudf.Decimal128Dtype(20, 4)
assert cudf.api.types.is_decimal_dtype(dec32) # True
assert cudf.api.types.is_decimal_dtype(dec128) # True
# Non-decimal numeric types
s_float = cudf.Series([1.23, 4.56], dtype='float64')
assert not cudf.api.types.is_decimal_dtype(s_float) # False
# Check from dtype object
assert cudf.api.types.is_decimal_dtype(decimal_dtype) # True
"""
def is_list_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is list
Returns True for cuDF list dtypes representing nested list data.
Each row contains a variable-length list of elements.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is list, False otherwise
Examples:
# List Series
list_dtype = cudf.ListDtype('int64')
s_list = cudf.Series([[1, 2, 3], [4, 5], [6]], dtype=list_dtype)
assert cudf.api.types.is_list_dtype(s_list) # True
# Nested lists with different element types
str_list_dtype = cudf.ListDtype('str')
s_str_list = cudf.Series([['a', 'b'], ['c']], dtype=str_list_dtype)
assert cudf.api.types.is_list_dtype(s_str_list) # True
# Non-list types
s_regular = cudf.Series([1, 2, 3])
assert not cudf.api.types.is_list_dtype(s_regular) # False
# Check dtype object
assert cudf.api.types.is_list_dtype(list_dtype) # True
"""
def is_struct_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is struct
Returns True for cuDF struct dtypes representing nested structured data.
Each row contains multiple named fields with potentially different types.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is struct, False otherwise
Examples:
# Struct dtype
struct_dtype = cudf.StructDtype({
'x': 'int64',
'y': 'float64',
'name': 'str'
})
s_struct = cudf.Series([
{'x': 1, 'y': 1.1, 'name': 'first'},
{'x': 2, 'y': 2.2, 'name': 'second'}
], dtype=struct_dtype)
assert cudf.api.types.is_struct_dtype(s_struct) # True
# Check dtype object directly
assert cudf.api.types.is_struct_dtype(struct_dtype) # True
# Non-struct types
s_dict = cudf.Series([{'a': 1}, {'b': 2}]) # Object, not struct
assert not cudf.api.types.is_struct_dtype(s_dict) # False
# Regular Series
s_int = cudf.Series([1, 2, 3])
assert not cudf.api.types.is_struct_dtype(s_int) # False
"""
def is_interval_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is interval
Returns True for cuDF interval dtypes representing interval data.
Intervals have left and right bounds with configurable closure.
Parameters:
arr_or_dtype: array-like, Series, Index, or data type
Object or dtype to check
Returns:
bool: True if array/dtype is interval, False otherwise
Examples:
# Interval dtype and data
interval_dtype = cudf.IntervalDtype('int64', closed='right')
intervals = cudf.interval_range(0, 10, periods=5)
assert cudf.api.types.is_interval_dtype(intervals) # True
# IntervalIndex
idx_interval = cudf.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
assert cudf.api.types.is_interval_dtype(idx_interval) # True
# Check dtype object
assert cudf.api.types.is_interval_dtype(interval_dtype) # True
# Non-interval types
s_float = cudf.Series([1.0, 2.0, 3.0])
assert not cudf.api.types.is_interval_dtype(s_float) # False
"""Functions for checking properties of values and objects.
def is_scalar(val) -> bool:
"""
Check whether the provided value is scalar
Returns True for single values (not collections). Handles cuDF-specific
scalar types including decimal and datetime scalars.
Parameters:
val: Any
Value to check for scalar nature
Returns:
bool: True if value is scalar, False otherwise
Examples:
# Scalar values
assert cudf.api.types.is_scalar(1) # True
assert cudf.api.types.is_scalar(1.5) # True
assert cudf.api.types.is_scalar('hello') # True
assert cudf.api.types.is_scalar(True) # True
# cuDF-specific scalars
assert cudf.api.types.is_scalar(cudf.NA) # True
assert cudf.api.types.is_scalar(cudf.NaT) # True
# Date/time scalars
date_scalar = cudf.to_datetime('2023-01-01')
assert cudf.api.types.is_scalar(date_scalar) # True (single date)
# Non-scalar collections
assert not cudf.api.types.is_scalar([1, 2, 3]) # False
assert not cudf.api.types.is_scalar(cudf.Series([1, 2])) # False
assert not cudf.api.types.is_scalar({'a': 1}) # False
# Edge cases
import numpy as np
assert cudf.api.types.is_scalar(np.int64(5)) # True
assert not cudf.api.types.is_scalar(np.array([1])) # False (array)
"""
def is_list_like(obj) -> bool:
"""
Check whether the provided object is list-like
Returns True for objects that can be iterated over like lists,
excluding strings and dicts. Includes cuDF Series, Index, and arrays.
Parameters:
obj: Any
Object to check for list-like properties
Returns:
bool: True if object is list-like, False otherwise
Examples:
# List-like objects
assert cudf.api.types.is_list_like([1, 2, 3]) # True
assert cudf.api.types.is_list_like((1, 2, 3)) # True
assert cudf.api.types.is_list_like({1, 2, 3}) # True (set)
# cuDF objects
s = cudf.Series([1, 2, 3])
assert cudf.api.types.is_list_like(s) # True
idx = cudf.Index([1, 2, 3])
assert cudf.api.types.is_list_like(idx) # True
# NumPy/CuPy arrays
import numpy as np
assert cudf.api.types.is_list_like(np.array([1, 2, 3])) # True
# Non-list-like objects
assert not cudf.api.types.is_list_like('hello') # False (string)
assert not cudf.api.types.is_list_like({'a': 1}) # False (dict)
assert not cudf.api.types.is_list_like(5) # False (scalar)
assert not cudf.api.types.is_list_like(None) # False
# DataFrame (debatable, but typically False)
df = cudf.DataFrame({'A': [1, 2]})
assert not cudf.api.types.is_list_like(df) # False
"""Common patterns for type validation in cuDF code:
def process_numeric_data(data):
"""Example function with type validation"""
if not cudf.api.types.is_numeric_dtype(data):
raise TypeError("Input data must be numeric")
# Safe to perform numeric operations
return data.sum()
def process_categorical_data(data):
"""Handle categorical data specifically"""
if cudf.api.types.is_categorical_dtype(data):
# Use categorical-specific operations
return data.cat.categories
else:
# Convert to categorical first
return cudf.Series(data, dtype='category').cat.categoriesdef describe_column(series):
"""Provide type-aware column description"""
if cudf.api.types.is_numeric_dtype(series):
return series.describe() # Statistical summary
elif cudf.api.types.is_categorical_dtype(series):
return series.value_counts() # Category frequencies
elif cudf.api.types.is_datetime64_dtype(series):
return {
'min': series.min(),
'max': series.max(),
'range': series.max() - series.min()
}
else:
return series.value_counts() # General frequency countdef process_nested_data(series):
"""Handle cuDF extension types"""
if cudf.api.types.is_list_dtype(series):
# Process list data
return series.list.len().mean() # Average list length
elif cudf.api.types.is_struct_dtype(series):
# Process struct data
return list(series.dtype.fields.keys()) # Field names
elif cudf.api.types.is_decimal_dtype(series):
# Exact decimal arithmetic
return series.sum() # No precision loss
else:
# Standard processing
return series.describe()Install with Tessl CLI
npx tessl i tessl/pypi-cudf-cu12