Hierarchical datasets for Python with HDF5 library for managing extremely large amounts of data
PyTables uses a hierarchical group structure similar to a filesystem for organizing data nodes. Groups serve as containers that can hold other groups and data nodes (tables, arrays), providing logical organization and namespace management for complex datasets.
class Group:
def __init__(self, parentnode, name, title="", new=False, filters=None):
"""Group constructor (typically called via File.create_group)."""
def _f_walknodes(self, classname=None):
"""
Walk all nodes in this group and its subgroups.
Parameters:
- classname (str): Filter by node class name
Yields:
Node: Each node in traversal order
"""
def _f_list_nodes(self, classname=None):
"""
List immediate child nodes.
Parameters:
- classname (str): Filter by node class name
Returns:
list: Child nodes of specified class
"""
def __contains__(self, name):
"""
Check if child node exists.
Parameters:
- name (str): Child node name
Returns:
bool: True if child exists
"""
def __getitem__(self, name):
"""
Get child node by name.
Parameters:
- name (str): Child node name
Returns:
Node: Child node object
"""
def __iter__(self):
"""Iterate over immediate child nodes."""def get_node(file, where, name=None, classname=None):
"""
Retrieve node by path.
Parameters:
- file (File): PyTables file object
- where (str): Path to node or parent location
- name (str): Node name if where is parent
- classname (str): Expected class for validation
Returns:
Node: Retrieved node object
"""
def walk_nodes(file, where="/", classname=None):
"""
Walk all nodes in hierarchy.
Parameters:
- file (File): PyTables file object
- where (str): Starting location
- classname (str): Filter by class name
Yields:
Node: Each node in traversal order
"""import tables as tb
with tb.open_file("hierarchy.h5", "w") as h5file:
# Create nested group structure
exp_group = h5file.create_group("/", "experiment", "Main Experiment")
data_group = h5file.create_group(exp_group, "data", "Raw Data")
analysis_group = h5file.create_group(exp_group, "analysis", "Analysis Results")
# Create data in different groups
raw_table = h5file.create_table(data_group, "measurements", MyDescription)
results_array = h5file.create_array(analysis_group, "summary", [1, 2, 3])
# Navigate hierarchy
node = h5file.get_node("/experiment/data/measurements")
# List children
children = exp_group._f_list_nodes()
# Walk entire tree
for node in h5file.walk_nodes("/"):
print(f"Found: {node._v_pathname}")Install with Tessl CLI
npx tessl i tessl/pypi-tables