Hierarchical datasets for Python with HDF5 library for managing extremely large amounts of data
Core file management functionality for PyTables, providing comprehensive file opening, creation, copying, and validation capabilities with extensive configuration options for optimization and data integrity.
Opens existing PyTables/HDF5 files or creates new ones with specified access modes, compression settings, and configuration options.
def open_file(filename, mode="r", title="", root_uep="/", filters=None, **kwargs):
"""
Open a PyTables (HDF5) file.
Parameters:
- filename (str): Path to the file
- mode (str): File access mode - "r" (read), "w" (write), "a" (append), "r+" (read/write)
- title (str): User-defined title for root group
- root_uep (str): Root user entry point path (default "/")
- filters (Filters): Default compression filters for new nodes
- **kwargs: Additional parameters (driver, libver, swmr, etc.)
Returns:
File: PyTables File object
"""Copies PyTables files with optional filtering, optimization, and format conversion capabilities.
def copy_file(srcfilename, dstfilename, overwrite=False, **kwargs):
"""
Copy a PyTables file, possibly converting between different formats.
Parameters:
- srcfilename (str): Source file path
- dstfilename (str): Destination file path
- overwrite (bool): Whether to overwrite existing destination file
- **kwargs: Additional options (filters, upgrade, etc.)
Returns:
None
"""Tests whether files are valid HDF5 or PyTables files.
def is_hdf5_file(filename):
"""
Test if a file is a valid HDF5 file.
Parameters:
- filename (str): Path to file to test
Returns:
bool: True if file is valid HDF5, False otherwise
"""
def is_pytables_file(filename):
"""
Test if a file is a valid PyTables file.
Parameters:
- filename (str): Path to file to test
Returns:
bool: True if file is valid PyTables, False otherwise
"""Retrieves version information for underlying libraries.
def which_lib_version(name):
"""
Get version information for libraries used by PyTables.
Parameters:
- name (str): Library name ("hdf5", "blosc", "blosc2", etc.)
Returns:
str: Version string for specified library
"""class File:
def close(self):
"""Close the file and flush all pending data."""
def flush(self):
"""Flush all pending data to disk."""
def __enter__(self):
"""Context manager entry."""
def __exit__(self, *args):
"""Context manager exit with automatic cleanup."""class File:
def create_group(self, where, name, title="", filters=None, createparents=False):
"""
Create a new group in the hierarchy.
Parameters:
- where (str or Group): Parent location
- name (str): Name for new group
- title (str): Descriptive title
- filters (Filters): Default filters for child nodes
- createparents (bool): Create intermediate groups if needed
Returns:
Group: The created group object
"""
def create_table(self, where, name, description, title="", filters=None, expectedrows=10000, createparents=False, **kwargs):
"""
Create a new table for structured data.
Parameters:
- where (str or Group): Parent location
- name (str): Table name
- description (Description or dict): Table structure definition
- title (str): Descriptive title
- filters (Filters): Compression and filtering options
- expectedrows (int): Expected number of rows for optimization
- createparents (bool): Create intermediate groups if needed
Returns:
Table: The created table object
"""
def create_array(self, where, name, object, title="", byteorder=None, createparents=False):
"""
Create a new array for homogeneous data.
Parameters:
- where (str or Group): Parent location
- name (str): Array name
- object (array-like): Initial data or array shape
- title (str): Descriptive title
- byteorder (str): Byte order specification
- createparents (bool): Create intermediate groups if needed
Returns:
Array: The created array object
"""
def create_carray(self, where, name, atom, shape, title="", filters=None, chunkshape=None, byteorder=None, createparents=False, **kwargs):
"""
Create a chunked array for large datasets.
Parameters:
- where (str or Group): Parent location
- name (str): Array name
- atom (Atom): Data type specification
- shape (tuple): Array dimensions
- title (str): Descriptive title
- filters (Filters): Compression options
- chunkshape (tuple): Chunk dimensions for optimization
- byteorder (str): Byte order specification
- createparents (bool): Create intermediate groups if needed
Returns:
CArray: The created chunked array object
"""
def create_earray(self, where, name, atom, shape, title="", filters=None, expectedrows=1000, chunkshape=None, byteorder=None, createparents=False):
"""
Create an enlargeable array.
Parameters:
- where (str or Group): Parent location
- name (str): Array name
- atom (Atom): Data type specification
- shape (tuple): Initial shape (first dimension can be 0)
- title (str): Descriptive title
- filters (Filters): Compression options
- expectedrows (int): Expected final size for optimization
- chunkshape (tuple): Chunk dimensions
- byteorder (str): Byte order specification
- createparents (bool): Create intermediate groups if needed
Returns:
EArray: The created enlargeable array object
"""
def create_vlarray(self, where, name, atom, title="", filters=None, expectedrows=1000, chunkshape=None, byteorder=None, createparents=False):
"""
Create a variable-length array.
Parameters:
- where (str or Group): Parent location
- name (str): Array name
- atom (Atom): Data type for array elements
- title (str): Descriptive title
- filters (Filters): Compression options
- expectedrows (int): Expected number of rows
- chunkshape (int): Chunk size
- byteorder (str): Byte order specification
- createparents (bool): Create intermediate groups if needed
Returns:
VLArray: The created variable-length array object
"""class File:
def get_node(self, where, name=None, classname=None):
"""
Retrieve a node from the hierarchy.
Parameters:
- where (str): Path to node or parent location
- name (str): Node name (if where is parent)
- classname (str): Expected node class name for validation
Returns:
Node: The retrieved node object
"""
def remove_node(self, where, name=None, recursive=False):
"""
Remove a node from the hierarchy.
Parameters:
- where (str): Path to node or parent location
- name (str): Node name (if where is parent)
- recursive (bool): Remove children recursively for Groups
"""
def move_node(self, where, newparent=None, newname=None, name=None, overwrite=False, createparents=False):
"""
Move a node to a different location in the hierarchy.
Parameters:
- where (str): Current path to node or parent location
- newparent (str): New parent location
- newname (str): New node name
- name (str): Node name (if where is parent)
- overwrite (bool): Overwrite existing node at destination
- createparents (bool): Create intermediate groups if needed
"""
def copy_node(self, where, newparent=None, newname=None, name=None, overwrite=False, recursive=False, createparents=False, **kwargs):
"""
Copy a node to a different location.
Parameters:
- where (str): Current path to node or parent location
- newparent (str): New parent location
- newname (str): New node name
- name (str): Node name (if where is parent)
- overwrite (bool): Overwrite existing node at destination
- recursive (bool): Copy children recursively for Groups
- createparents (bool): Create intermediate groups if needed
- **kwargs: Additional copy options (filters, etc.)
Returns:
Node: The copied node object
"""class File:
def walk_nodes(self, where="/", classname=None):
"""
Iterate over all nodes in the hierarchy.
Parameters:
- where (str): Starting location for traversal
- classname (str): Filter by node class name
Yields:
Node: Each node in the traversal order
"""
def walk_groups(self, where="/"):
"""
Iterate over all groups in the hierarchy.
Parameters:
- where (str): Starting location for traversal
Yields:
Group: Each group in the traversal order
"""import tables as tb
import numpy as np
# Create a new file
with tb.open_file("data.h5", mode="w", title="Research Data") as h5file:
# Create hierarchical structure
group = h5file.create_group("/", "experiment1", "First Experiment")
# Create different types of data storage
table = h5file.create_table(group, "measurements", MyDescription)
array = h5file.create_array(group, "raw_data", np.random.random((100, 100)))
# File automatically closed when context exits
# Copy file with compression
tb.copy_file("data.h5", "compressed_data.h5",
filters=tb.Filters(complevel=6, complib="blosc"))
# Validate files
if tb.is_pytables_file("data.h5"):
print("Valid PyTables file")Install with Tessl CLI
npx tessl i tessl/pypi-tables