Deep Difference and Search of any Python object/data with delta and hash capabilities.
—
Content-based hashing system that generates consistent hash values for Python objects regardless of key ordering or minor structural differences. DeepHash provides stable, reproducible hashes that can be used for caching, deduplication, and change detection across complex data structures.
Generate deep hash values for any Python object with extensive customization options for handling different data types and structures.
class DeepHash:
def __init__(
self,
obj: Any,
*,
apply_hash: bool = True,
custom_operators: Optional[List[Any]] = None,
default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
encodings: Optional[List[str]] = None,
exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None,
exclude_paths: Optional[PathType] = None,
exclude_regex_paths: Optional[RegexType] = None,
exclude_types: Optional[Union[List[type], Set[type], Tuple[type, ...]]] = None,
hasher: Optional[Callable[[Union[str, bytes]], str]] = None,
hashes: Optional[Union[Dict[Any, Any], "DeepHash"]] = None,
ignore_encoding_errors: bool = False,
ignore_iterable_order: bool = True,
ignore_numeric_type_changes: bool = False,
ignore_private_variables: bool = True,
ignore_repetition: bool = True,
ignore_string_case: bool = False,
ignore_string_type_changes: bool = False,
ignore_type_in_groups: Any = None,
ignore_type_subclasses: bool = False,
ignore_uuid_types: bool = False,
include_paths: Optional[PathType] = None,
number_format_notation: str = "f",
number_to_string_func: Optional[NumberToStringFunc] = None,
parent: str = "root",
significant_digits: Optional[int] = None,
truncate_datetime: Optional[str] = None,
use_enum_value: bool = False,
**kwargs
):
"""
Deep hash of objects based on their content.
Parameters:
- obj: Object to hash
- hasher: Hash function to use (default: SHA256)
- ignore_type_in_groups: List of type groups to treat as equivalent for hashing
- ignore_encoding_errors: Ignore encoding errors when hashing strings
- ignore_numeric_type_changes: Ignore type differences between numeric types
- ignore_type_subclasses: Ignore type differences between subclasses
- ignore_string_type_changes: Ignore differences between string types
- exclude_types: List of types to exclude from hashing
- exclude_paths: List of paths to exclude from hashing
- exclude_regex_paths: List of regex patterns for paths to exclude
- significant_digits: Number of significant digits for float hashing
- apply_hash: Whether to apply the hash function to the result
- encodings: List of encodings to try for string hashing
- ignore_private_variables: Ignore private attributes (starting with _)
- parent: Parent DeepHash object for nested hashing
- ignore_repetition: Ignore repetitive elements when hashing
- number_format_notation: Notation for number formatting in hashes
- exclude_obj_callback: Callback function to exclude objects from hashing
- exclude_obj_callback_strict: Strict mode for exclude callback
- number_to_string_func: Custom function for number to string conversion
- ignore_nan_equality: Ignore NaN values when hashing
- math_epsilon: Epsilon for floating point hash comparisons
"""Methods for retrieving hash values from hashed objects with optional path-based access.
def get(self, path: str = None) -> str:
"""
Get hash value for the object or a specific path within it.
Parameters:
- path: Optional path to get hash for specific part of object
Returns:
Hash string for the specified object or path.
"""
def hexdigest(self) -> str:
"""
Get hexadecimal representation of the object's hash.
Returns:
Hexadecimal hash string.
"""DeepHash objects support dictionary-like access for retrieving hash values.
def __getitem__(self, obj: Any, extract_index: Optional[int] = 0) -> Any:
"""
Get hash value for an object using dictionary-like access.
Parameters:
- obj: Object to get hash for
- extract_index: Index for extracting from the hash result
Returns:
Hash value for the object
"""
def __contains__(self, obj: Any) -> bool:
"""
Check if an object has been hashed.
Parameters:
- obj: Object to check
Returns:
True if object has been hashed, False otherwise
"""
def keys(self) -> Any:
"""
Get all keys (objects) that have been hashed.
Returns:
Keys from the hash result
"""
def values(self) -> Generator[Any, None, None]:
"""
Get all hash values.
Returns:
Generator yielding hash values
"""
def items(self) -> Generator[Tuple[Any, Any], None, None]:
"""
Get all (object, hash) pairs.
Returns:
Generator yielding (object, hash) tuples
"""
def __eq__(self, other: Any) -> bool:
"""
Compare two DeepHash objects for equality.
Parameters:
- other: Other DeepHash object to compare
Returns:
True if hashes are equal, False otherwise
"""
def __bool__(self) -> bool:
"""
Check if the DeepHash object contains any hashes.
Returns:
True if hashes exist, False otherwise
"""Built-in static hash functions available for use with DeepHash.
@staticmethod
def sha256hex(obj: Union[str, bytes]) -> str:
"""
Generate SHA256 hash in hexadecimal format.
Parameters:
- obj: Data to hash
Returns:
SHA256 hash as hexadecimal string.
"""
@staticmethod
def sha1hex(obj: Union[str, bytes]) -> str:
"""
Generate SHA1 hash in hexadecimal format.
Parameters:
- obj: Data to hash
Returns:
SHA1 hash as hexadecimal string.
"""
@staticmethod
def _getitem(hashes: Dict[Any, Any], obj: Any, extract_index: Optional[int] = 0, use_enum_value: bool = False) -> Any:
"""
Static method to get item from hashes dictionary.
Parameters:
- hashes: Dictionary of hashes
- obj: Object to get hash for
- extract_index: Index for extraction
- use_enum_value: Whether to use enum values
Returns:
Hash value for the object
"""
@staticmethod
def get_key(hashes: Dict[Any, Any], key: Any, default: Any = None, extract_index: Optional[int] = 0, use_enum_value: bool = False) -> Any:
"""
Static method to get key from hashes dictionary with default.
Parameters:
- hashes: Dictionary of hashes
- key: Key to get hash for
- default: Default value if key not found
- extract_index: Index for extraction
- use_enum_value: Whether to use enum values
Returns:
Hash value for the key or default value
"""from deepdiff import DeepHash
# Hash simple objects
obj1 = {"name": "John", "age": 30}
obj2 = {"age": 30, "name": "John"} # Same content, different order
hash1 = DeepHash(obj1)
hash2 = DeepHash(obj2)
print(hash1 == hash2) # True - same content produces same hash
print(hash1.hexdigest()) # Get hex representation# Hash with type normalization
data1 = {"value": 42}
data2 = {"value": 42.0}
# Without type normalization - different hashes
hash1 = DeepHash(data1)
hash2 = DeepHash(data2)
print(hash1 == hash2) # False
# With type normalization - same hashes
hash1 = DeepHash(data1, ignore_numeric_type_changes=True)
hash2 = DeepHash(data2, ignore_numeric_type_changes=True)
print(hash1 == hash2) # Trueimport hashlib
# Use custom hash function
def custom_hasher(data):
return hashlib.md5(str(data).encode()).hexdigest()
hash_obj = DeepHash(data, hasher=custom_hasher)
print(hash_obj.hexdigest())# Hash while excluding certain paths
data = {
"user_id": 123,
"name": "John",
"timestamp": "2023-01-01T00:00:00Z",
"metadata": {"created_by": "system"}
}
# Exclude timestamp for stable hashing
hash_obj = DeepHash(
data,
exclude_paths=["root['timestamp']", "root['metadata']['created_by']"]
)# Control floating point precision
data = {"pi": 3.141592653589793}
# Hash with different precision levels
hash1 = DeepHash(data, significant_digits=2) # 3.14
hash2 = DeepHash(data, significant_digits=4) # 3.142
print(hash1.hexdigest())
print(hash2.hexdigest())nested_data = {
"level1": {
"level2": {
"data": [1, 2, 3],
"other": "value"
}
}
}
hash_obj = DeepHash(nested_data)
# Get hash for entire object
full_hash = hash_obj.get()
# Get hash for specific path
partial_hash = hash_obj.get("root['level1']['level2']['data']")
print(f"Full object hash: {full_hash}")
print(f"Array hash: {partial_hash}")class Person:
def __init__(self, name, age):
self.name = name
self.age = age
self._private = "secret"
person1 = Person("John", 30)
person2 = Person("John", 30)
# Hash custom objects
hash1 = DeepHash(person1)
hash2 = DeepHash(person2)
print(hash1 == hash2) # True - same attribute values
# Include private variables
hash1 = DeepHash(person1, ignore_private_variables=False)
hash2 = DeepHash(person2, ignore_private_variables=False)import numpy as np
import pandas as pd
# Hash NumPy arrays
arr = np.array([1, 2, 3, 4, 5])
hash_arr = DeepHash(arr)
# Hash pandas DataFrames
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
hash_df = DeepHash(df)
# Hash with type exclusions
mixed_data = {
"numpy_array": arr,
"dataframe": df,
"regular_list": [1, 2, 3]
}
# Exclude NumPy types
hash_obj = DeepHash(mixed_data, exclude_types=[np.ndarray])# Hash function type
HashFunction = Callable[[Any], str]
# Common hash functions available
SHA256_HASHER: HashFunction
SHA1_HASHER: HashFunction
# DeepHash result access patterns
class DeepHashResult:
def __getitem__(self, key: str) -> str: ... # Access by path
def __eq__(self, other: 'DeepHashResult') -> bool: ... # Compare hashes
def keys(self) -> List[str]: ... # Get all hashed paths
def values(self) -> List[str]: ... # Get all hash values
# Type groups for equivalent hashing
NumericTypes = (int, float, complex)
StringTypes = (str, bytes, bytearray)Install with Tessl CLI
npx tessl i tessl/pypi-deepdiff