tessl/pypi-deepdiff

Deep Difference and Search of any Python object/data with delta and hash capabilities.

—

Pending

Overview

Eval results

Files

Deep Difference Analysis

Name: tessl/pypi-deepdiff
Author: tessl

Comprehensive comparison of Python objects with detailed change reporting and extensive customization options. DeepDiff goes beyond basic equality checks to provide precise analysis of what changed between two objects, including nested structures, lists, dictionaries, and custom objects.

Capabilities

Object Comparison

Creates detailed difference reports between any two Python objects with customizable comparison behavior and output formatting.

class DeepDiff:
    def __init__(
        self,
        t1: Any,
        t2: Any,
        cache_purge_level: int = 1,
        cache_size: int = 0,
        cache_tuning_sample_size: int = 0,
        custom_operators: Optional[List[Any]] = None,
        cutoff_distance_for_pairs: float = 0.6,
        cutoff_intersection_for_pairs: float = 0.6,
        default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
        encodings: Optional[List[str]] = None,
        exclude_obj_callback: Optional[Callable] = None,
        exclude_obj_callback_strict: Optional[Callable] = None,
        exclude_paths: Union[str, List[str], Set[str], FrozenSet[str], None] = None,
        exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None] = None,
        exclude_types: Optional[List[type]] = None,
        get_deep_distance: bool = False,
        group_by: Union[str, Tuple[str, str], None] = None,
        group_by_sort_key: Union[str, Callable, None] = None,
        hasher: Optional[Callable] = None,
        hashes: Optional[Dict[Any, Any]] = None,
        ignore_encoding_errors: bool = False,
        ignore_nan_inequality: bool = False,
        ignore_numeric_type_changes: bool = False,
        ignore_order: bool = False,
        ignore_order_func: Optional[Callable] = None,
        ignore_private_variables: bool = True,
        ignore_string_case: bool = False,
        ignore_string_type_changes: bool = False,
        ignore_type_in_groups: Optional[List[Tuple[Any, ...]]] = None,
        ignore_type_subclasses: bool = False,
        ignore_uuid_types: bool = False,
        include_obj_callback: Optional[Callable] = None,
        include_obj_callback_strict: Optional[Callable] = None,
        include_paths: Union[str, List[str], None] = None,
        iterable_compare_func: Optional[Callable] = None,
        log_frequency_in_sec: int = 0,
        log_scale_similarity_threshold: float = 0.1,
        log_stacktrace: bool = False,
        math_epsilon: Optional[float] = None,
        max_diffs: Optional[int] = None,
        max_passes: int = 10000000,
        number_format_notation: Literal["f", "e"] = "f",
        number_to_string_func: Optional[Callable] = None,
        progress_logger: Callable[[str], None] = logger.info,
        report_repetition: bool = False,
        significant_digits: Optional[int] = None,
        threshold_to_diff_deeper: float = 0.33,
        truncate_datetime: Optional[str] = None,
        use_enum_value: bool = False,
        use_log_scale: bool = False,
        verbose_level: int = 1,
        view: str = 'text',
        zip_ordered_iterables: bool = False,
        **kwargs
    ):
    """
    Deep difference of dictionaries, iterables, strings and other objects.
    
    Parameters:
    - t1, t2: Objects to compare
    - cache_purge_level: Level of cache purging (0-2)
    - cache_size: Size of LRU cache for comparison operations
    - cache_tuning_sample_size: Sample size for cache tuning
    - custom_operators: List of custom comparison operators
    - cutoff_distance_for_pairs: Distance threshold for pairing items
    - cutoff_intersection_for_pairs: Intersection threshold for pairing
    - default_timezone: Default timezone for datetime comparisons
    - encodings: List of encodings to try for string comparison
    - exclude_obj_callback: Callback function to exclude objects
    - exclude_obj_callback_strict: Callback function to exclude objects with strict checking
    - exclude_paths: Paths to exclude from comparison
    - exclude_regex_paths: Regex patterns for paths to exclude
    - exclude_types: List of types to exclude from comparison
    - get_deep_distance: Calculate distance metric between objects
    - group_by: Group similar changes together
    - group_by_sort_key: Sort key for grouping
    - hasher: Hash function to use for hashing
    - hashes: Pre-computed hashes to use
    - ignore_encoding_errors: Ignore encoding errors when comparing strings
    - ignore_nan_inequality: Ignore NaN inequality comparisons
    - ignore_numeric_type_changes: Ignore type changes between numeric types
    - ignore_order: Ignore order of elements in iterables
    - ignore_order_func: Function to determine when to ignore order
    - ignore_private_variables: Ignore private attributes (starting with _)
    - ignore_string_case: Ignore case when comparing strings
    - ignore_string_type_changes: Ignore changes between string types
    - ignore_type_in_groups: List of type groups to treat as equivalent
    - ignore_type_subclasses: Ignore type changes between subclasses
    - ignore_uuid_types: Ignore type differences between UUID and string
    - include_obj_callback: Callback function to include objects
    - include_obj_callback_strict: Callback function to include objects with strict checking
    - include_paths: List of paths to include (exclude all others)
    - iterable_compare_func: Custom function for comparing iterables
    - log_frequency_in_sec: Frequency of progress logging in seconds
    - log_scale_similarity_threshold: Threshold for log scale similarity
    - log_stacktrace: Log stack traces on errors
    - math_epsilon: Epsilon for floating point comparisons
    - max_diffs: Maximum number of differences to find
    - max_passes: Maximum number of passes for comparison
    - number_format_notation: Notation for number formatting ("f" or "e")
    - number_to_string_func: Custom function for number to string conversion
    - progress_logger: Function for logging progress
    - report_repetition: Report repetitive elements
    - significant_digits: Number of significant digits for float comparison
    - threshold_to_diff_deeper: Threshold for deeper comparison
    - truncate_datetime: Truncate datetime to specified precision
    - use_enum_value: Use enum values instead of enum objects
    - use_log_scale: Use logarithmic scale for similarity
    - verbose_level: Level of detail in output (0-2)
    - view: Output view format ('text' or 'tree')
    - zip_ordered_iterables: Zip iterables when comparing ordered sequences
    """

Result Analysis

Methods for analyzing and extracting information from comparison results.

def get_stats(self) -> Dict[str, Any]:
    """
    Get statistics about the comparison results.
    
    Returns:
    Dict containing comparison statistics including counts of different types of changes.
    """

def custom_report_result(self, report_type: str, level: Any, extra_info: Any = None) -> None:
    """
    Custom report result method for advanced reporting.
    
    Parameters:
    - report_type: Type of report to generate
    - level: Level object containing comparison information
    - extra_info: Optional extra information for the report
    """

@property
def affected_paths(self) -> List[str]:
    """
    List of paths that were affected by changes.
    
    Returns:
    List of path strings indicating where changes occurred.
    """

@property
def affected_root_keys(self) -> List[str]:
    """
    List of root keys that were affected by changes.
    
    Returns:
    List of root key strings that had changes.
    """

Usage Examples

Basic Comparison

from deepdiff import DeepDiff

# Compare dictionaries
t1 = {"name": "John", "age": 30}
t2 = {"name": "Jane", "age": 30}

diff = DeepDiff(t1, t2)
print(diff)
# {'values_changed': {"root['name']": {'old_value': 'John', 'new_value': 'Jane'}}}

Ignoring Order

# Compare lists ignoring order
list1 = [1, 2, 3]
list2 = [3, 2, 1]

diff = DeepDiff(list1, list2, ignore_order=True)
print(diff)
# {} (no differences when ignoring order)

Excluding Paths

# Exclude specific paths from comparison
t1 = {"name": "John", "age": 30, "id": 1}
t2 = {"name": "John", "age": 31, "id": 2}

diff = DeepDiff(t1, t2, exclude_paths=["root['id']"])
print(diff)
# Only shows age difference, id difference is excluded

Custom Operators

from deepdiff.operator import BaseOperator

class CustomOperator(BaseOperator):
    def match(self, level):
        return "custom_field" in level.t1
    
    def give_up_diffing(self, level, diff_instance):
        # Custom comparison logic
        return True

diff = DeepDiff(t1, t2, custom_operators=[CustomOperator()])

Working with Results

diff = DeepDiff(t1, t2)

# Get statistics
stats = diff.get_stats()
print(f"Total changes: {stats['total_changes']}")

# Get affected paths
paths = diff.get_affected_paths()
print(f"Changed paths: {paths}")

# Serialize to JSON
json_diff = diff.to_json()
# Store or transmit json_diff

# Recreate from JSON
restored_diff = DeepDiff.from_json(json_diff)

Types

# Custom operator base class
class BaseOperator:
    def match(self, level: 'DiffLevel') -> bool: ...
    def give_up_diffing(self, level: 'DiffLevel', diff_instance: 'DeepDiff') -> bool: ...

# Level representation for custom operators
class DiffLevel:
    t1: Any  # First object being compared
    t2: Any  # Second object being compared
    path: List[str]  # Path to current comparison location
    
# Type groups for treating different types as equivalent
TypeGroup = Tuple[type, ...]

# Callback types
ExcludeCallback = Callable[[Any, str], bool]
NumberToStringFunc = Callable[[Any], str]

Install with Tessl CLI