CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-deepdiff

Deep Difference and Search of any Python object/data with delta and hash capabilities.

Pending
Overview
Eval results
Files

hashing.mddocs/

Deep Hashing

Content-based hashing system that generates consistent hash values for Python objects regardless of key ordering or minor structural differences. DeepHash provides stable, reproducible hashes that can be used for caching, deduplication, and change detection across complex data structures.

Capabilities

Object Hashing

Generate deep hash values for any Python object with extensive customization options for handling different data types and structures.

class DeepHash:
    def __init__(
        self,
        obj: Any,
        *,
        apply_hash: bool = True,
        custom_operators: Optional[List[Any]] = None,
        default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
        encodings: Optional[List[str]] = None,
        exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None,
        exclude_paths: Optional[PathType] = None,
        exclude_regex_paths: Optional[RegexType] = None,
        exclude_types: Optional[Union[List[type], Set[type], Tuple[type, ...]]] = None,
        hasher: Optional[Callable[[Union[str, bytes]], str]] = None,
        hashes: Optional[Union[Dict[Any, Any], "DeepHash"]] = None,
        ignore_encoding_errors: bool = False,
        ignore_iterable_order: bool = True,
        ignore_numeric_type_changes: bool = False,
        ignore_private_variables: bool = True,
        ignore_repetition: bool = True,
        ignore_string_case: bool = False,
        ignore_string_type_changes: bool = False,
        ignore_type_in_groups: Any = None,
        ignore_type_subclasses: bool = False,
        ignore_uuid_types: bool = False,
        include_paths: Optional[PathType] = None,
        number_format_notation: str = "f",
        number_to_string_func: Optional[NumberToStringFunc] = None,
        parent: str = "root",
        significant_digits: Optional[int] = None,
        truncate_datetime: Optional[str] = None,
        use_enum_value: bool = False,
        **kwargs
    ):
    """
    Deep hash of objects based on their content.
    
    Parameters:
    - obj: Object to hash
    - hasher: Hash function to use (default: SHA256)
    - ignore_type_in_groups: List of type groups to treat as equivalent for hashing
    - ignore_encoding_errors: Ignore encoding errors when hashing strings
    - ignore_numeric_type_changes: Ignore type differences between numeric types
    - ignore_type_subclasses: Ignore type differences between subclasses
    - ignore_string_type_changes: Ignore differences between string types
    - exclude_types: List of types to exclude from hashing
    - exclude_paths: List of paths to exclude from hashing
    - exclude_regex_paths: List of regex patterns for paths to exclude
    - significant_digits: Number of significant digits for float hashing
    - apply_hash: Whether to apply the hash function to the result
    - encodings: List of encodings to try for string hashing
    - ignore_private_variables: Ignore private attributes (starting with _)
    - parent: Parent DeepHash object for nested hashing
    - ignore_repetition: Ignore repetitive elements when hashing
    - number_format_notation: Notation for number formatting in hashes
    - exclude_obj_callback: Callback function to exclude objects from hashing
    - exclude_obj_callback_strict: Strict mode for exclude callback
    - number_to_string_func: Custom function for number to string conversion
    - ignore_nan_equality: Ignore NaN values when hashing
    - math_epsilon: Epsilon for floating point hash comparisons
    """

Hash Retrieval

Methods for retrieving hash values from hashed objects with optional path-based access.

def get(self, path: str = None) -> str:
    """
    Get hash value for the object or a specific path within it.
    
    Parameters:
    - path: Optional path to get hash for specific part of object
    
    Returns:
    Hash string for the specified object or path.
    """

def hexdigest(self) -> str:
    """
    Get hexadecimal representation of the object's hash.
    
    Returns:
    Hexadecimal hash string.
    """

Dictionary-like Access

DeepHash objects support dictionary-like access for retrieving hash values.

def __getitem__(self, obj: Any, extract_index: Optional[int] = 0) -> Any:
    """
    Get hash value for an object using dictionary-like access.
    
    Parameters:
    - obj: Object to get hash for
    - extract_index: Index for extracting from the hash result
    
    Returns:
    Hash value for the object
    """

def __contains__(self, obj: Any) -> bool:
    """
    Check if an object has been hashed.
    
    Parameters:
    - obj: Object to check
    
    Returns:
    True if object has been hashed, False otherwise
    """

def keys(self) -> Any:
    """
    Get all keys (objects) that have been hashed.
    
    Returns:
    Keys from the hash result
    """

def values(self) -> Generator[Any, None, None]:
    """
    Get all hash values.
    
    Returns:
    Generator yielding hash values
    """

def items(self) -> Generator[Tuple[Any, Any], None, None]:
    """
    Get all (object, hash) pairs.
    
    Returns:
    Generator yielding (object, hash) tuples
    """

def __eq__(self, other: Any) -> bool:
    """
    Compare two DeepHash objects for equality.
    
    Parameters:
    - other: Other DeepHash object to compare
    
    Returns:
    True if hashes are equal, False otherwise
    """

def __bool__(self) -> bool:
    """
    Check if the DeepHash object contains any hashes.
    
    Returns:
    True if hashes exist, False otherwise
    """

Static Hash Functions

Built-in static hash functions available for use with DeepHash.

@staticmethod
def sha256hex(obj: Union[str, bytes]) -> str:
    """
    Generate SHA256 hash in hexadecimal format.
    
    Parameters:
    - obj: Data to hash
    
    Returns:
    SHA256 hash as hexadecimal string.
    """

@staticmethod
def sha1hex(obj: Union[str, bytes]) -> str:
    """
    Generate SHA1 hash in hexadecimal format.
    
    Parameters:
    - obj: Data to hash
    
    Returns:
    SHA1 hash as hexadecimal string.
    """

@staticmethod
def _getitem(hashes: Dict[Any, Any], obj: Any, extract_index: Optional[int] = 0, use_enum_value: bool = False) -> Any:
    """
    Static method to get item from hashes dictionary.
    
    Parameters:
    - hashes: Dictionary of hashes
    - obj: Object to get hash for
    - extract_index: Index for extraction
    - use_enum_value: Whether to use enum values
    
    Returns:
    Hash value for the object
    """

@staticmethod
def get_key(hashes: Dict[Any, Any], key: Any, default: Any = None, extract_index: Optional[int] = 0, use_enum_value: bool = False) -> Any:
    """
    Static method to get key from hashes dictionary with default.
    
    Parameters:
    - hashes: Dictionary of hashes
    - key: Key to get hash for
    - default: Default value if key not found
    - extract_index: Index for extraction
    - use_enum_value: Whether to use enum values
    
    Returns:
    Hash value for the key or default value
    """

Usage Examples

Basic Hashing

from deepdiff import DeepHash

# Hash simple objects
obj1 = {"name": "John", "age": 30}
obj2 = {"age": 30, "name": "John"}  # Same content, different order

hash1 = DeepHash(obj1)
hash2 = DeepHash(obj2)

print(hash1 == hash2)  # True - same content produces same hash
print(hash1.hexdigest())  # Get hex representation

Ignoring Type Differences

# Hash with type normalization
data1 = {"value": 42}
data2 = {"value": 42.0}

# Without type normalization - different hashes
hash1 = DeepHash(data1)
hash2 = DeepHash(data2)
print(hash1 == hash2)  # False

# With type normalization - same hashes
hash1 = DeepHash(data1, ignore_numeric_type_changes=True)
hash2 = DeepHash(data2, ignore_numeric_type_changes=True)
print(hash1 == hash2)  # True

Custom Hash Functions

import hashlib

# Use custom hash function
def custom_hasher(data):
    return hashlib.md5(str(data).encode()).hexdigest()

hash_obj = DeepHash(data, hasher=custom_hasher)
print(hash_obj.hexdigest())

Excluding Paths

# Hash while excluding certain paths
data = {
    "user_id": 123,
    "name": "John",
    "timestamp": "2023-01-01T00:00:00Z",
    "metadata": {"created_by": "system"}
}

# Exclude timestamp for stable hashing
hash_obj = DeepHash(
    data,
    exclude_paths=["root['timestamp']", "root['metadata']['created_by']"]
)

Precision Control

# Control floating point precision
data = {"pi": 3.141592653589793}

# Hash with different precision levels
hash1 = DeepHash(data, significant_digits=2)  # 3.14
hash2 = DeepHash(data, significant_digits=4)  # 3.142

print(hash1.hexdigest())
print(hash2.hexdigest())

Path-Based Hash Retrieval

nested_data = {
    "level1": {
        "level2": {
            "data": [1, 2, 3],
            "other": "value"
        }
    }
}

hash_obj = DeepHash(nested_data)

# Get hash for entire object
full_hash = hash_obj.get()

# Get hash for specific path
partial_hash = hash_obj.get("root['level1']['level2']['data']")

print(f"Full object hash: {full_hash}")
print(f"Array hash: {partial_hash}")

Working with Custom Objects

class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        self._private = "secret"

person1 = Person("John", 30)
person2 = Person("John", 30)

# Hash custom objects
hash1 = DeepHash(person1)
hash2 = DeepHash(person2)

print(hash1 == hash2)  # True - same attribute values

# Include private variables
hash1 = DeepHash(person1, ignore_private_variables=False)
hash2 = DeepHash(person2, ignore_private_variables=False)

Handling Complex Data Types

import numpy as np
import pandas as pd

# Hash NumPy arrays
arr = np.array([1, 2, 3, 4, 5])
hash_arr = DeepHash(arr)

# Hash pandas DataFrames
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
hash_df = DeepHash(df)

# Hash with type exclusions
mixed_data = {
    "numpy_array": arr,
    "dataframe": df,
    "regular_list": [1, 2, 3]
}

# Exclude NumPy types
hash_obj = DeepHash(mixed_data, exclude_types=[np.ndarray])

Types

# Hash function type
HashFunction = Callable[[Any], str]

# Common hash functions available
SHA256_HASHER: HashFunction
SHA1_HASHER: HashFunction

# DeepHash result access patterns
class DeepHashResult:
    def __getitem__(self, key: str) -> str: ...  # Access by path
    def __eq__(self, other: 'DeepHashResult') -> bool: ...  # Compare hashes
    def keys(self) -> List[str]: ...  # Get all hashed paths
    def values(self) -> List[str]: ...  # Get all hash values

# Type groups for equivalent hashing
NumericTypes = (int, float, complex)
StringTypes = (str, bytes, bytearray)

Install with Tessl CLI

npx tessl i tessl/pypi-deepdiff

docs

cli.md

delta.md

difference.md

extract.md

hashing.md

index.md

search.md

tile.json