CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-tables

Hierarchical datasets for Python with HDF5 library for managing extremely large amounts of data

Overview
Eval results
Files

type-system-descriptions.mddocs/

Type System and Descriptions

PyTables provides a comprehensive type system with Atom types for defining individual data elements and Column types for table structure definitions. This system supports all NumPy data types plus specialized types for time, strings, and complex data structures, enabling precise control over data storage and memory usage.

Capabilities

Table Descriptions

Base classes for defining table structures with strongly-typed column definitions.

class IsDescription:
    """
    Base class for user-defined table descriptions.
    Inherit from this class to define table structures.
    """
    pass

class Description:
    """
    Runtime table description created from dictionaries or existing tables.
    """
    def __init__(self, description, validate=True):
        """
        Create description from dictionary or class.
        
        Parameters:
        - description (dict or class): Column definitions
        - validate (bool): Validate column definitions
        """
        
    @classmethod
    def from_dtype(cls, dtype, ptparams=None):
        """
        Create description from NumPy dtype.
        
        Parameters:
        - dtype (numpy.dtype): NumPy structured dtype
        - ptparams (dict): PyTables-specific parameters
        
        Returns:
        Description: Table description object
        """

Atom Types

Atom types define the data type and storage characteristics for individual elements.

class Atom:
    """Base class for all atom types."""
    def __init__(self, type, shape=(), dflt=None):
        """
        Base atom constructor.
        
        Parameters:
        - type (str): Type identifier
        - shape (tuple): Element shape for multidimensional atoms
        - dflt (any): Default value
        """
        
    @property
    def type(self):
        """String identifier for the atom type."""
        
    @property
    def shape(self):
        """Shape tuple for multidimensional atoms."""
        
    @property
    def size(self):
        """Size in bytes of a single element."""

# String Atoms
class StringAtom(Atom):
    """Fixed-length string atom."""
    def __init__(self, itemsize, shape=(), dflt=b''):
        """
        Parameters:
        - itemsize (int): Maximum string length in bytes
        - shape (tuple): Shape for arrays of strings
        - dflt (bytes): Default value
        """

class VLStringAtom(Atom):
    """Variable-length string atom (raw bytes)."""
    def __init__(self, dflt=b''):
        """
        Parameters:
        - dflt (bytes): Default value
        """

class VLUnicodeAtom(Atom):  
    """Variable-length Unicode string atom."""
    def __init__(self, dflt=''):
        """
        Parameters:
        - dflt (str): Default value
        """

# Boolean Atoms
class BoolAtom(Atom):
    """Boolean atom (True/False)."""
    def __init__(self, shape=(), dflt=False):
        """
        Parameters:
        - shape (tuple): Shape for arrays of booleans
        - dflt (bool): Default value
        """

# Integer Atoms  
class IntAtom(Atom):
    """Generic signed integer atom (platform-dependent size)."""
    def __init__(self, shape=(), dflt=0):
        """
        Parameters:
        - shape (tuple): Shape for arrays of integers
        - dflt (int): Default value
        """

class UIntAtom(Atom):
    """Generic unsigned integer atom (platform-dependent size)."""
    def __init__(self, shape=(), dflt=0): ...

class Int8Atom(Atom):
    """8-bit signed integer atom (-128 to 127)."""
    def __init__(self, shape=(), dflt=0): ...

class UInt8Atom(Atom):
    """8-bit unsigned integer atom (0 to 255)."""
    def __init__(self, shape=(), dflt=0): ...

class Int16Atom(Atom):
    """16-bit signed integer atom (-32768 to 32767)."""
    def __init__(self, shape=(), dflt=0): ...

class UInt16Atom(Atom):
    """16-bit unsigned integer atom (0 to 65535)."""
    def __init__(self, shape=(), dflt=0): ...

class Int32Atom(Atom):
    """32-bit signed integer atom."""
    def __init__(self, shape=(), dflt=0): ...

class UInt32Atom(Atom):
    """32-bit unsigned integer atom."""
    def __init__(self, shape=(), dflt=0): ...

class Int64Atom(Atom):
    """64-bit signed integer atom."""
    def __init__(self, shape=(), dflt=0): ...

class UInt64Atom(Atom):
    """64-bit unsigned integer atom."""
    def __init__(self, shape=(), dflt=0): ...

# Floating Point Atoms
class FloatAtom(Atom):
    """Generic floating point atom (platform-dependent precision)."""
    def __init__(self, shape=(), dflt=0.0): ...

class Float32Atom(Atom):
    """32-bit floating point atom (IEEE 754 single precision)."""
    def __init__(self, shape=(), dflt=0.0): ...

class Float64Atom(Atom):
    """64-bit floating point atom (IEEE 754 double precision)."""
    def __init__(self, shape=(), dflt=0.0): ...

class Float16Atom(Atom):
    """16-bit floating point atom (IEEE 754 half precision)."""
    def __init__(self, shape=(), dflt=0.0):
        """
        Note: Available when NumPy supports float16 type
        """

class Float96Atom(Atom):
    """96-bit extended precision floating point atom."""
    def __init__(self, shape=(), dflt=0.0):
        """
        Note: Platform-dependent availability
        """

class Float128Atom(Atom):
    """128-bit quadruple precision floating point atom."""
    def __init__(self, shape=(), dflt=0.0):
        """
        Note: Platform-dependent availability
        """

# Complex Number Atoms
class ComplexAtom(Atom):
    """Generic complex number atom (platform-dependent precision)."""
    def __init__(self, shape=(), dflt=0.0+0j): ...

class Complex32Atom(Atom):
    """32-bit complex atom (two 16-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j): ...

class Complex64Atom(Atom):
    """64-bit complex atom (two 32-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j): ...

class Complex128Atom(Atom):
    """128-bit complex atom (two 64-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j): ...

class Complex192Atom(Atom):
    """192-bit complex atom (two 96-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j):
        """
        Note: Platform-dependent availability
        """

class Complex256Atom(Atom):
    """256-bit complex atom (two 128-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j):
        """
        Note: Platform-dependent availability
        """

# Time Atoms
class TimeAtom(Atom):
    """Generic time atom (platform-dependent precision)."""
    def __init__(self, shape=(), dflt=0.0): ...

class Time32Atom(Atom):
    """32-bit time atom (seconds since epoch)."""
    def __init__(self, shape=(), dflt=0.0): ...

class Time64Atom(Atom):
    """64-bit time atom (microseconds since epoch).""" 
    def __init__(self, shape=(), dflt=0.0): ...

# Special Atoms
class EnumAtom(Atom):
    """Enumerated type atom with named values."""
    def __init__(self, enum, dflt, base=None, shape=()):
        """
        Parameters:
        - enum (Enum): Enumeration definition
        - dflt (any): Default enumeration value
        - base (Atom): Base atom type for storage
        - shape (tuple): Shape for arrays of enums
        """

class PseudoAtom(Atom):
    """Pseudo-atom for complex data types."""
    def __init__(self, kind, shape=(), dflt=None): ...

class ObjectAtom(Atom):
    """Object atom for Python object storage (with pickle)."""
    def __init__(self, shape=(), dflt=None): ...

Column Types

Column types are used in table descriptions to define the structure and data types for table columns.

class Col:
    """Base class for all column types."""
    def __init__(self, type=None, itemsize=None, shape=(), dflt=None, pos=None):
        """
        Base column constructor.
        
        Parameters:
        - type (str): Column type identifier
        - itemsize (int): Size specification for variable types
        - shape (tuple): Shape for multidimensional columns
        - dflt (any): Default value
        - pos (int): Column position in table
        """

# String Columns
class StringCol(Col):
    """Fixed-length string column."""
    def __init__(self, itemsize, shape=(), dflt=b'', pos=None):
        """
        Parameters:
        - itemsize (int): Maximum string length
        - shape (tuple): Shape for string arrays
        - dflt (bytes): Default value
        - pos (int): Column position
        """

# Boolean Columns
class BoolCol(Col):
    """Boolean column."""
    def __init__(self, shape=(), dflt=False, pos=None): ...

# Integer Columns
class IntCol(Col):
    """Generic signed integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class UIntCol(Col):
    """Generic unsigned integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class Int8Col(Col):
    """8-bit signed integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class UInt8Col(Col):
    """8-bit unsigned integer column."""  
    def __init__(self, shape=(), dflt=0, pos=None): ...

class Int16Col(Col):
    """16-bit signed integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class UInt16Col(Col):
    """16-bit unsigned integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class Int32Col(Col):
    """32-bit signed integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class UInt32Col(Col):
    """32-bit unsigned integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class Int64Col(Col):
    """64-bit signed integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

class UInt64Col(Col):
    """64-bit unsigned integer column."""
    def __init__(self, shape=(), dflt=0, pos=None): ...

# Floating Point Columns
class FloatCol(Col):
    """Generic floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

class Float32Col(Col):
    """32-bit floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

class Float64Col(Col):
    """64-bit floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

class Float16Col(Col):
    """16-bit floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None):
        """
        Note: Available when NumPy supports float16 type
        """

class Float96Col(Col):
    """96-bit extended precision floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None):
        """
        Note: Platform-dependent availability
        """

class Float128Col(Col):
    """128-bit quadruple precision floating point column."""
    def __init__(self, shape=(), dflt=0.0, pos=None):
        """
        Note: Platform-dependent availability
        """

# Complex Number Columns
class ComplexCol(Col):
    """Generic complex number column."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...

class Complex32Col(Col):
    """32-bit complex column."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...

class Complex64Col(Col):
    """64-bit complex column."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...

class Complex128Col(Col):
    """128-bit complex column."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...

class Complex192Col(Col):
    """192-bit complex column (two 96-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None):
        """
        Note: Platform-dependent availability
        """

class Complex256Col(Col):
    """256-bit complex column (two 128-bit floats)."""
    def __init__(self, shape=(), dflt=0.0+0j, pos=None):
        """
        Note: Platform-dependent availability
        """

# Time Columns
class TimeCol(Col):
    """Generic time column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

class Time32Col(Col):
    """32-bit time column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

class Time64Col(Col):
    """64-bit time column."""
    def __init__(self, shape=(), dflt=0.0, pos=None): ...

# Special Columns
class EnumCol(Col):
    """Enumerated type column."""
    def __init__(self, enum, dflt, base=None, shape=(), pos=None):
        """
        Parameters:
        - enum (Enum): Enumeration definition
        - dflt (any): Default enumeration value  
        - base (Col): Base column type for storage
        - shape (tuple): Shape for enum arrays
        - pos (int): Column position
        """

Type Utilities

def split_type(type):
    """
    Split a type specification into components.
    
    Parameters:
    - type (str): Type specification string
    
    Returns:
    tuple: (kind, itemsize) components of the type
    """

Usage Examples

Defining Table Structures

import tables as tb

# Method 1: Class-based description
class Experiment(tb.IsDescription):
    # Basic types
    run_id = tb.Int64Col()                    # 64-bit integer
    timestamp = tb.Time64Col()                # Microsecond timestamp
    temperature = tb.Float32Col()             # 32-bit float
    active = tb.BoolCol()                     # Boolean
    
    # String types
    name = tb.StringCol(50)                   # Fixed-length string (50 bytes)
    notes = tb.StringCol(200, dflt=b'')      # With default value
    
    # Array types
    coordinates = tb.Float64Col(shape=(3,))   # 3D position vector
    measurements = tb.Int16Col(shape=(10,))   # Array of 10 measurements
    
    # Complex types
    signal = tb.Complex64Col()                # Complex number
    
    # Enumerated types with custom enum
    Status = tb.Enum(['active', 'paused', 'stopped'])
    status = tb.EnumCol(Status, 'active', base=tb.UInt8Col())

# Method 2: Dictionary-based description
experiment_desc = {
    'run_id': tb.Int64Col(),
    'timestamp': tb.Time64Col(),
    'temperature': tb.Float32Col(),
    'name': tb.StringCol(50),
    'coordinates': tb.Float64Col(shape=(3,)),
    'measurements': tb.Int16Col(shape=(10,))
}

# Create table with either approach
with tb.open_file("experiment.h5", "w") as h5file:
    table1 = h5file.create_table("/", "exp_class", Experiment)
    table2 = h5file.create_table("/", "exp_dict", experiment_desc)

Working with Atoms for Arrays

import tables as tb
import numpy as np

with tb.open_file("atoms.h5", "w") as h5file:
    # Create arrays with specific atom types
    
    # String array
    string_atom = tb.StringAtom(20)  # 20-byte strings
    string_array = h5file.create_carray("/", "strings", string_atom, 
                                      shape=(100,), filters=tb.Filters(complevel=1))
    
    # Time series data
    time_atom = tb.Time64Atom()  # Microsecond precision
    time_array = h5file.create_earray("/", "timestamps", time_atom,
                                    shape=(0,), expectedrows=100000)
    
    # Complex signal data
    complex_atom = tb.Complex128Atom()
    signal_array = h5file.create_carray("/", "signal", complex_atom,
                                      shape=(1000, 1000))
    
    # Multidimensional atoms
    vector_atom = tb.Float32Atom(shape=(3,))  # 3D vectors
    vector_array = h5file.create_array("/", "vectors", 
                                     np.zeros((100,), dtype=[('pos', '3f4')]))

Advanced Type Usage

import tables as tb
from enum import Enum

# Custom enumeration
class Priority(Enum):
    LOW = 1
    MEDIUM = 2
    HIGH = 3
    CRITICAL = 4

# Table with mixed advanced types
class TaskDescription(tb.IsDescription):
    task_id = tb.UInt32Col()
    created = tb.Time64Col()
    
    # Variable-length strings (stored as objects)
    title = tb.StringCol(100)
    description = tb.StringCol(500, dflt=b'No description')
    
    # Custom enumeration
    priority = tb.EnumCol(Priority, Priority.MEDIUM, base=tb.UInt8Col())
    
    # Multi-dimensional data
    progress_history = tb.Float32Col(shape=(10,))  # Last 10 progress values
    
    # Complex metadata (stored as pickled objects)
    metadata = tb.ObjectAtom()

with tb.open_file("tasks.h5", "w") as h5file:
    table = h5file.create_table("/", "tasks", TaskDescription)
    
    # Add sample data
    row = table.row
    row['task_id'] = 1
    row['created'] = 1640995200000000  # Timestamp in microseconds
    row['title'] = b'Implement feature X'
    row['priority'] = Priority.HIGH
    row['progress_history'] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    row['metadata'] = {'tags': ['urgent', 'backend'], 'assignee': 'developer'}
    row.append()
    table.flush()

Install with Tessl CLI

npx tessl i tessl/pypi-tables

docs

arrays-homogeneous-data.md

compression-filtering.md

file-operations.md

groups-navigation.md

index.md

querying-indexing.md

tables-structured-data.md

transactions-undo-redo.md

type-system-descriptions.md

tile.json