tessl/pypi-tables

Hierarchical datasets for Python with HDF5 library for managing extremely large amounts of data

Overview

Eval results

Files

File Operations

Name: tessl/pypi-tables
Author: tessl

Core file management functionality for PyTables, providing comprehensive file opening, creation, copying, and validation capabilities with extensive configuration options for optimization and data integrity.

Capabilities

Opening and Creating Files

Opens existing PyTables/HDF5 files or creates new ones with specified access modes, compression settings, and configuration options.

def open_file(filename, mode="r", title="", root_uep="/", filters=None, **kwargs):
    """
    Open a PyTables (HDF5) file.
    
    Parameters:
    - filename (str): Path to the file
    - mode (str): File access mode - "r" (read), "w" (write), "a" (append), "r+" (read/write)
    - title (str): User-defined title for root group
    - root_uep (str): Root user entry point path (default "/")
    - filters (Filters): Default compression filters for new nodes
    - **kwargs: Additional parameters (driver, libver, swmr, etc.)
    
    Returns:
    File: PyTables File object
    """

File Copying and Optimization

Copies PyTables files with optional filtering, optimization, and format conversion capabilities.

def copy_file(srcfilename, dstfilename, overwrite=False, **kwargs):
    """
    Copy a PyTables file, possibly converting between different formats.
    
    Parameters:
    - srcfilename (str): Source file path
    - dstfilename (str): Destination file path  
    - overwrite (bool): Whether to overwrite existing destination file
    - **kwargs: Additional options (filters, upgrade, etc.)
    
    Returns:
    None
    """

File Validation

Tests whether files are valid HDF5 or PyTables files.

def is_hdf5_file(filename):
    """
    Test if a file is a valid HDF5 file.
    
    Parameters:
    - filename (str): Path to file to test
    
    Returns:
    bool: True if file is valid HDF5, False otherwise
    """

def is_pytables_file(filename):
    """
    Test if a file is a valid PyTables file.
    
    Parameters:
    - filename (str): Path to file to test
    
    Returns:
    bool: True if file is valid PyTables, False otherwise
    """

Library Version Information

Retrieves version information for underlying libraries.

def which_lib_version(name):
    """
    Get version information for libraries used by PyTables.
    
    Parameters:
    - name (str): Library name ("hdf5", "blosc", "blosc2", etc.)
    
    Returns:
    str: Version string for specified library
    """

File Object Methods

Core File Operations

class File:
    def close(self):
        """Close the file and flush all pending data."""
        
    def flush(self):
        """Flush all pending data to disk."""
        
    def __enter__(self):
        """Context manager entry."""
        
    def __exit__(self, *args):
        """Context manager exit with automatic cleanup."""

Node Creation Methods

class File:
    def create_group(self, where, name, title="", filters=None, createparents=False):
        """
        Create a new group in the hierarchy.
        
        Parameters:
        - where (str or Group): Parent location
        - name (str): Name for new group
        - title (str): Descriptive title
        - filters (Filters): Default filters for child nodes
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        Group: The created group object
        """
        
    def create_table(self, where, name, description, title="", filters=None, expectedrows=10000, createparents=False, **kwargs):
        """
        Create a new table for structured data.
        
        Parameters:
        - where (str or Group): Parent location
        - name (str): Table name
        - description (Description or dict): Table structure definition
        - title (str): Descriptive title
        - filters (Filters): Compression and filtering options
        - expectedrows (int): Expected number of rows for optimization
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        Table: The created table object
        """
        
    def create_array(self, where, name, object, title="", byteorder=None, createparents=False):
        """
        Create a new array for homogeneous data.
        
        Parameters:
        - where (str or Group): Parent location  
        - name (str): Array name
        - object (array-like): Initial data or array shape
        - title (str): Descriptive title
        - byteorder (str): Byte order specification
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        Array: The created array object
        """
        
    def create_carray(self, where, name, atom, shape, title="", filters=None, chunkshape=None, byteorder=None, createparents=False, **kwargs):
        """
        Create a chunked array for large datasets.
        
        Parameters:
        - where (str or Group): Parent location
        - name (str): Array name  
        - atom (Atom): Data type specification
        - shape (tuple): Array dimensions
        - title (str): Descriptive title
        - filters (Filters): Compression options
        - chunkshape (tuple): Chunk dimensions for optimization
        - byteorder (str): Byte order specification
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        CArray: The created chunked array object
        """
        
    def create_earray(self, where, name, atom, shape, title="", filters=None, expectedrows=1000, chunkshape=None, byteorder=None, createparents=False):
        """
        Create an enlargeable array.
        
        Parameters:
        - where (str or Group): Parent location
        - name (str): Array name
        - atom (Atom): Data type specification  
        - shape (tuple): Initial shape (first dimension can be 0)
        - title (str): Descriptive title
        - filters (Filters): Compression options
        - expectedrows (int): Expected final size for optimization
        - chunkshape (tuple): Chunk dimensions
        - byteorder (str): Byte order specification
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        EArray: The created enlargeable array object
        """
        
    def create_vlarray(self, where, name, atom, title="", filters=None, expectedrows=1000, chunkshape=None, byteorder=None, createparents=False):
        """
        Create a variable-length array.
        
        Parameters:
        - where (str or Group): Parent location
        - name (str): Array name
        - atom (Atom): Data type for array elements
        - title (str): Descriptive title
        - filters (Filters): Compression options
        - expectedrows (int): Expected number of rows
        - chunkshape (int): Chunk size
        - byteorder (str): Byte order specification
        - createparents (bool): Create intermediate groups if needed
        
        Returns:
        VLArray: The created variable-length array object
        """

Node Access and Management

class File:
    def get_node(self, where, name=None, classname=None):
        """
        Retrieve a node from the hierarchy.
        
        Parameters:
        - where (str): Path to node or parent location
        - name (str): Node name (if where is parent)
        - classname (str): Expected node class name for validation
        
        Returns:
        Node: The retrieved node object
        """
        
    def remove_node(self, where, name=None, recursive=False):
        """
        Remove a node from the hierarchy.
        
        Parameters:
        - where (str): Path to node or parent location
        - name (str): Node name (if where is parent)
        - recursive (bool): Remove children recursively for Groups
        """
        
    def move_node(self, where, newparent=None, newname=None, name=None, overwrite=False, createparents=False):
        """
        Move a node to a different location in the hierarchy.
        
        Parameters:
        - where (str): Current path to node or parent location
        - newparent (str): New parent location
        - newname (str): New node name
        - name (str): Node name (if where is parent)
        - overwrite (bool): Overwrite existing node at destination
        - createparents (bool): Create intermediate groups if needed
        """
        
    def copy_node(self, where, newparent=None, newname=None, name=None, overwrite=False, recursive=False, createparents=False, **kwargs):
        """
        Copy a node to a different location.
        
        Parameters:
        - where (str): Current path to node or parent location
        - newparent (str): New parent location
        - newname (str): New node name
        - name (str): Node name (if where is parent)
        - overwrite (bool): Overwrite existing node at destination
        - recursive (bool): Copy children recursively for Groups
        - createparents (bool): Create intermediate groups if needed
        - **kwargs: Additional copy options (filters, etc.)
        
        Returns:
        Node: The copied node object
        """

Tree Traversal

class File:
    def walk_nodes(self, where="/", classname=None):
        """
        Iterate over all nodes in the hierarchy.
        
        Parameters:
        - where (str): Starting location for traversal
        - classname (str): Filter by node class name
        
        Yields:
        Node: Each node in the traversal order
        """
        
    def walk_groups(self, where="/"):
        """
        Iterate over all groups in the hierarchy.
        
        Parameters:
        - where (str): Starting location for traversal
        
        Yields:
        Group: Each group in the traversal order
        """

Usage Examples

Basic File Operations

import tables as tb
import numpy as np

# Create a new file
with tb.open_file("data.h5", mode="w", title="Research Data") as h5file:
    # Create hierarchical structure
    group = h5file.create_group("/", "experiment1", "First Experiment")
    
    # Create different types of data storage
    table = h5file.create_table(group, "measurements", MyDescription)
    array = h5file.create_array(group, "raw_data", np.random.random((100, 100)))
    
    # File automatically closed when context exits

# Copy file with compression
tb.copy_file("data.h5", "compressed_data.h5", 
             filters=tb.Filters(complevel=6, complib="blosc"))

# Validate files
if tb.is_pytables_file("data.h5"):
    print("Valid PyTables file")

Install with Tessl CLI