A Python utility belt containing simple tools, a stdlib like feel, and extra batteries
Hash arbitrary data and files, plus dynamic module importing and path resolution utilities for data integrity and module management.
Functions for computing secure hashes of arbitrary Python data structures and files.
def hash_data(data, hasher=NoParam, base=NoParam, types=False, convert=False, extensions=None):
"""
Hash arbitrary Python data structures.
Args:
data: Any Python object (dict, list, str, etc.)
hasher: Hash algorithm (NoParam uses 'sha512')
base: Output encoding (NoParam uses 'hex')
types (bool): Include type information in hash
convert (bool): Convert data to hashable format
extensions: Custom extensions for handling special types
Returns:
str: Hash digest as string
Note:
Data is normalized for consistent hashing across runs.
Supports nested structures, numpy arrays, and custom objects.
NoParam defaults: hasher='sha512', base='hex'
"""
def hash_file(fpath, blocksize=1048576, stride=1, maxbytes=None, hasher=NoParam, base=NoParam):
"""
Hash file contents efficiently.
Args:
fpath (str|Path): Path to file
blocksize (int): Read block size in bytes (default: 1MB)
stride (int): Read every nth block (default: 1 = all blocks)
maxbytes (int): Maximum bytes to read (None = entire file)
hasher: Hash algorithm (NoParam uses 'sha512')
base: Output encoding (NoParam uses 'hex')
Returns:
str: File hash digest
Raises:
FileNotFoundError: File does not exist
IOError: Cannot read file
Note:
NoParam defaults: hasher='sha512', base='hex'
"""Dynamic module importing and path resolution for runtime module loading.
def import_module_from_name(name, **kwargs):
"""
Import module by name with error handling.
Args:
name (str): Module name (e.g., 'os.path', 'numpy')
**kwargs: Additional import options
Returns:
module: Imported module object
Raises:
ImportError: Module cannot be imported
"""
def import_module_from_path(modpath, index=-1):
"""
Import module from file path.
Args:
modpath (str|Path): Path to Python file
index (int): Module index for namespace packages
Returns:
module: Imported module object
Raises:
ImportError: Cannot import from path
FileNotFoundError: File does not exist
"""Functions for converting between module names and file paths.
def modname_to_modpath(modname, **kwargs):
"""
Convert module name to file path.
Args:
modname (str): Module name (e.g., 'os.path')
**kwargs: Additional resolution options
Returns:
str|None: Path to module file or None if not found
"""
def modpath_to_modname(fpath, **kwargs):
"""
Convert file path to module name.
Args:
fpath (str|Path): Path to Python file
**kwargs: Additional conversion options
Returns:
str: Module name
"""
def split_modpath(fpath, **kwargs):
"""
Split module path into components.
Args:
fpath (str|Path): Path to Python file
**kwargs: Additional options
Returns:
dict: Dictionary with path components
"""import ubelt as ub
# Hash simple data
data = {'name': 'Alice', 'age': 30, 'scores': [95, 87, 92]}
hash_value = ub.hash_data(data)
print(f"Data hash: {hash_value}")
# Different hash algorithms
sha256_hash = ub.hash_data(data, hasher='sha256')
md5_hash = ub.hash_data(data, hasher='md5')
print(f"SHA256: {sha256_hash}")
print(f"MD5: {md5_hash}")
# Different output encodings
hex_hash = ub.hash_data(data, base='hex')
b64_hash = ub.hash_data(data, base='base64')
print(f"Hex: {hex_hash}")
print(f"Base64: {b64_hash}")
# Truncated hashes
short_hash = ub.hash_data(data, hashlen=8)
print(f"Short hash: {short_hash}")import ubelt as ub
import numpy as np
# Hash complex nested structures
complex_data = {
'metadata': {
'version': '1.0',
'created': '2023-01-01'
},
'arrays': [
np.array([1, 2, 3, 4]),
np.array([[1, 2], [3, 4]])
],
'config': {
'learning_rate': 0.001,
'batch_size': 32,
'layers': [128, 64, 32]
}
}
hash_value = ub.hash_data(complex_data)
print(f"Complex data hash: {hash_value}")
# Hashing is consistent across runs
hash2 = ub.hash_data(complex_data)
assert hash_value == hash2 # Same data produces same hash
# Order-independent hashing for dicts
data1 = {'a': 1, 'b': 2}
data2 = {'b': 2, 'a': 1}
hash1 = ub.hash_data(data1)
hash2 = ub.hash_data(data2)
assert hash1 == hash2 # Dict order doesn't matterimport ubelt as ub
# Hash file contents
file_path = 'example.txt'
with open(file_path, 'w') as f:
f.write('Hello, World!')
file_hash = ub.hash_file(file_path)
print(f"File hash: {file_hash}")
# Hash large files efficiently (uses chunks)
large_file_hash = ub.hash_file('large_file.bin', blocksize=65536)
# Verify file integrity
def verify_file(fpath, expected_hash):
actual_hash = ub.hash_file(fpath)
return actual_hash == expected_hash
is_valid = verify_file(file_path, file_hash)
print(f"File is valid: {is_valid}")
# Quick hash for caching
cache_key = ub.hash_file('config.json', hashlen=8)
print(f"Cache key: {cache_key}")import ubelt as ub
# Import module by name
os_module = ub.import_module_from_name('os')
print(f"OS name: {os_module.name}")
# Import submodules
path_module = ub.import_module_from_name('os.path')
print(f"Current dir: {path_module.abspath('.')}")
# Safe importing with error handling
try:
numpy = ub.import_module_from_name('numpy')
print("NumPy is available")
except ImportError:
print("NumPy not installed")
# Import from file path
script_path = 'my_script.py'
with open(script_path, 'w') as f:
f.write('''
def greet(name):
return f"Hello, {name}!"
VERSION = "1.0"
''')
my_module = ub.import_module_from_path(script_path)
print(my_module.greet("World"))
print(f"Version: {my_module.VERSION}")import ubelt as ub
# Convert module name to path
os_path = ub.modname_to_modpath('os')
print(f"OS module path: {os_path}")
json_path = ub.modname_to_modpath('json')
print(f"JSON module path: {json_path}")
# Convert path to module name
if json_path:
module_name = ub.modpath_to_modname(json_path)
print(f"Module name: {module_name}")
# Split module path into components
if json_path:
components = ub.split_modpath(json_path)
print(f"Path components: {components}")
# Find package modules
import sys
for path in sys.path:
if 'site-packages' in path:
print(f"Site packages: {path}")
breakimport ubelt as ub
import json
# Cache with data integrity
def cached_computation(data):
"""Cache expensive computation with data hash as key"""
data_hash = ub.hash_data(data, hashlen=16)
cache_file = f'cache_{data_hash}.json'
try:
with open(cache_file, 'r') as f:
cached_result = json.load(f)
print("Using cached result")
return cached_result
except FileNotFoundError:
print("Computing new result")
# Expensive computation
result = sum(x**2 for x in data.get('values', []))
# Cache the result
with open(cache_file, 'w') as f:
json.dump(result, f)
return result
# Test caching
data1 = {'values': [1, 2, 3, 4, 5], 'metadata': 'test'}
result1 = cached_computation(data1) # Computes new
result2 = cached_computation(data1) # Uses cache
# Different data gets different cache
data2 = {'values': [1, 2, 3, 4, 6], 'metadata': 'test'} # Changed last value
result3 = cached_computation(data2) # Computes new
print(f"Results: {result1}, {result2}, {result3}")import ubelt as ub
# Create checksums for multiple files
files_to_check = ['file1.txt', 'file2.txt', 'file3.txt']
# Create test files
for i, fname in enumerate(files_to_check):
with open(fname, 'w') as f:
f.write(f'Content of file {i+1}')
# Generate checksums
checksums = {}
for fpath in files_to_check:
checksums[fpath] = ub.hash_file(fpath, hasher='sha256', hashlen=16)
print("File checksums:")
for fpath, checksum in checksums.items():
print(f"{fpath}: {checksum}")
# Verify files later
def verify_files(expected_checksums):
"""Verify files haven't changed"""
for fpath, expected in expected_checksums.items():
try:
actual = ub.hash_file(fpath, hasher='sha256', hashlen=16)
if actual == expected:
print(f"✓ {fpath} is valid")
else:
print(f"✗ {fpath} has changed!")
except FileNotFoundError:
print(f"✗ {fpath} is missing!")
verify_files(checksums)
# Modify a file and check again
with open('file2.txt', 'a') as f:
f.write(' - modified')
print("\nAfter modification:")
verify_files(checksums)Install with Tessl CLI
npx tessl i tessl/pypi-ubelt