CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-diffsync

Library to easily sync/diff/update 2 different data sources

Pending
Overview
Eval results
Files

diff-calculation.mddocs/

Diff Calculation

Comprehensive difference calculation between datasets, supporting hierarchical data structures, customizable comparison logic, and detailed change tracking with multiple output formats.

Capabilities

Diff Generation

Primary methods for calculating differences between two Adapter instances.

def diff_from(self, source: "Adapter", diff_class: Type[Diff] = Diff,
              flags: DiffSyncFlags = DiffSyncFlags.NONE,
              callback: Optional[Callable[[str, int, int], None]] = None) -> Diff:
    """
    Generate a Diff describing the difference from the other DiffSync to this one.
    
    Args:
        source: Object to diff against
        diff_class: Diff or subclass thereof to use for diff calculation and storage
        flags: Flags influencing the behavior of this diff operation
        callback: Function with parameters (stage, current, total), called at intervals as calculation proceeds
    
    Returns:
        Diff object containing all differences found
    """
def diff_to(self, target: "Adapter", diff_class: Type[Diff] = Diff,
            flags: DiffSyncFlags = DiffSyncFlags.NONE,
            callback: Optional[Callable[[str, int, int], None]] = None) -> Diff:
    """
    Generate a Diff describing the difference from this DiffSync to another one.
    
    Args:
        target: Object to diff against
        diff_class: Diff or subclass thereof to use for diff calculation and storage
        flags: Flags influencing the behavior of this diff operation
        callback: Function with parameters (stage, current, total), called at intervals as calculation proceeds
    
    Returns:
        Diff object containing all differences found
    """

Basic Diff Example

from diffsync import Adapter, DiffSyncModel

# Create two adapters with different data
source = NetworkAdapter(name="source")
target = NetworkAdapter(name="target")

# Load data into both adapters
source.load()
target.load()

# Calculate differences - what changes would make target look like source
diff = target.diff_from(source)

# Print summary
print(f"Found {len(diff)} differences")
print(diff.str())

# Get detailed summary
summary = diff.summary()
print(f"Create: {summary['create']}")
print(f"Update: {summary['update']}")
print(f"Delete: {summary['delete']}")

Diff Object

Container for storing and organizing differences between datasets.

class Diff:
    """Diff Object, designed to store multiple DiffElement object and organize them in a group."""
    
    def __init__(self) -> None:
        """Initialize a new, empty Diff object."""
    
    children: OrderedDefaultDict[str, Dict[str, DiffElement]]
    models_processed: int
def add(self, element: "DiffElement") -> None:
    """
    Add a new DiffElement to the changeset of this Diff.
    
    Raises:
        ObjectAlreadyExists: if an element of the same type and same name is already stored
    """
def has_diffs(self) -> bool:
    """
    Indicate if at least one of the child elements contains some diff.
    
    Returns:
        True if at least one child element contains some diff
    """
def summary(self) -> Dict[str, int]:
    """Build a dict summary of this Diff and its child DiffElements."""
def groups(self) -> List[str]:
    """Get the list of all group keys in self.children."""
def get_children(self) -> Iterator["DiffElement"]:
    """
    Iterate over all child elements in all groups in self.children.
    
    For each group of children, check if an order method is defined,
    Otherwise use the default method.
    """
def complete(self) -> None:
    """
    Method to call when this Diff has been fully populated with data and is "complete".
    
    The default implementation does nothing, but a subclass could use this,
    for example, to save the completed Diff to a file or database record.
    """

DiffElement

Individual difference item representing a single object that may or may not have changes.

class DiffElement:
    """DiffElement object, designed to represent a single item/object that may or may not have any diffs."""
    
    def __init__(self, obj_type: str, name: str, keys: Dict,
                 source_name: str = "source", dest_name: str = "dest",
                 diff_class: Type[Diff] = Diff):
        """
        Instantiate a DiffElement.
        
        Args:
            obj_type: Name of the object type being described, as in DiffSyncModel.get_type()
            name: Human-readable name of the object being described, as in DiffSyncModel.get_shortname()
            keys: Primary keys and values uniquely describing this object, as in DiffSyncModel.get_identifiers()
            source_name: Name of the source DiffSync object
            dest_name: Name of the destination DiffSync object
            diff_class: Diff or subclass thereof to use to calculate the diffs to use for synchronization
        """
    
    type: str
    name: str  
    keys: Dict
    source_name: str
    dest_name: str
    source_attrs: Optional[Dict]
    dest_attrs: Optional[Dict]
    child_diff: Diff
@property
def action(self) -> Optional[str]:
    """
    Action, if any, that should be taken to remediate the diffs described by this element.
    
    Returns:
        "create", "update", "delete", or None
    """
def add_attrs(self, source: Optional[Dict] = None, dest: Optional[Dict] = None) -> None:
    """Set additional attributes of a source and/or destination item that may result in diffs."""
def get_attrs_keys(self) -> Iterable[str]:
    """
    Get the list of shared attrs between source and dest, or the attrs of source or dest if only one is present.
    
    Returns:
        - If source_attrs is not set, return the keys of dest_attrs
        - If dest_attrs is not set, return the keys of source_attrs  
        - If both are defined, return the intersection of both keys
    """
def get_attrs_diffs(self) -> Dict[str, Dict[str, Any]]:
    """
    Get the dict of actual attribute diffs between source_attrs and dest_attrs.
    
    Returns:
        Dictionary of the form {"−": {key1: <value>, key2: ...}, "+": {key1: <value>, key2: ...}},
        where the "−" or "+" dicts may be absent
    """
def has_diffs(self, include_children: bool = True) -> bool:
    """
    Check whether this element (or optionally any of its children) has some diffs.
    
    Args:
        include_children: If True, recursively check children for diffs as well
    """
def add_child(self, element: "DiffElement") -> None:
    """
    Attach a child object of type DiffElement.
    
    Childs are saved in a Diff object and are organized by type and name.
    """
def get_children(self) -> Iterator["DiffElement"]:
    """Iterate over all child DiffElements of this one."""

DiffElement Usage Example

# Examine individual diff elements
for element in diff.get_children():
    print(f"Element: {element.type} - {element.name}")
    print(f"Action: {element.action}")
    
    if element.action == "update":
        attrs_diff = element.get_attrs_diffs()
        if "+" in attrs_diff:
            print(f"New values: {attrs_diff['+']}")
        if "-" in attrs_diff:
            print(f"Old values: {attrs_diff['-']}")
    
    # Check for child differences
    if element.has_diffs(include_children=True):
        print("Has child differences")
        for child in element.get_children():
            print(f"  Child: {child.type} - {child.name} ({child.action})")

Diff Serialization and Display

Methods for converting diff objects to various output formats.

def str(self, indent: int = 0) -> str:
    """Build a detailed string representation of this Diff and its child DiffElements."""
def dict(self) -> Dict[str, Dict[str, Dict]]:
    """Build a dictionary representation of this Diff."""

Diff Output Example

# String representation - human readable
print(diff.str())

# Dictionary representation - programmatic access
diff_data = diff.dict()
for model_type, objects in diff_data.items():
    print(f"Model type: {model_type}")
    for obj_name, changes in objects.items():
        print(f"  Object: {obj_name}")
        if "+" in changes:
            print(f"    Added: {changes['+']}")
        if "-" in changes:
            print(f"    Removed: {changes['-']}")

Advanced Diff Features

Custom Diff Classes

class CustomDiff(Diff):
    def complete(self):
        # Save diff to file when complete
        with open(f"diff_{datetime.now().isoformat()}.json", "w") as f:
            json.dump(self.dict(), f, indent=2)
    
    def order_children_device(self, children):
        # Custom ordering for device objects
        return sorted(children.values(), key=lambda x: x.name)

# Use custom diff class
diff = target.diff_from(source, diff_class=CustomDiff)

Progress Callbacks

def progress_callback(stage, current, total):
    percentage = (current / total) * 100 if total > 0 else 0
    print(f"{stage}: {current}/{total} ({percentage:.1f}%)")

# Monitor diff calculation progress
diff = target.diff_from(source, callback=progress_callback)

Filtering with Flags

from diffsync import DiffSyncFlags

# Skip objects that only exist in source
diff = target.diff_from(source, flags=DiffSyncFlags.SKIP_UNMATCHED_SRC)

# Skip objects that only exist in target  
diff = target.diff_from(source, flags=DiffSyncFlags.SKIP_UNMATCHED_DST)

# Skip objects that only exist in either source or target
diff = target.diff_from(source, flags=DiffSyncFlags.SKIP_UNMATCHED_BOTH)

Utility Functions

Helper functions used internally by the diff calculation engine, also available for advanced usage scenarios.

def intersection(lst1: List[T], lst2: List[T]) -> List[T]:
    """
    Calculate the intersection of two lists, with ordering based on the first list.
    
    Args:
        lst1: First list (determines ordering)
        lst2: Second list
    
    Returns:
        List containing elements common to both lists, in lst1 order
    """
def symmetric_difference(lst1: List[T], lst2: List[T]) -> List[T]:
    """
    Calculate the symmetric difference of two lists.
    
    Args:
        lst1: First list
        lst2: Second list
    
    Returns:
        Sorted list containing elements that exist in either list but not both
    """
class OrderedDefaultDict(OrderedDict, Generic[K, V]):
    """A combination of collections.OrderedDict and collections.DefaultDict behavior."""
    
    def __init__(self, dict_type: Callable[[], V]) -> None:
        """
        Create a new OrderedDefaultDict.
        
        Args:
            dict_type: Factory function to create default values for missing keys
        """
    
    def __missing__(self, key: K) -> V:
        """When trying to access a nonexistent key, initialize the key value based on the internal factory."""

Utility Usage Examples

from diffsync.utils import intersection, symmetric_difference, OrderedDefaultDict

# Find common model types between two adapters
common_types = intersection(adapter1.top_level, adapter2.top_level)
print(f"Common model types: {common_types}")

# Find model types that exist in only one adapter
unique_types = symmetric_difference(adapter1.top_level, adapter2.top_level)
print(f"Unique model types: {unique_types}")

# Create an ordered dictionary with default factory
diff_data = OrderedDefaultDict(dict)
diff_data["device"]["router1"] = {"action": "create"}
diff_data["interface"]["eth0"] = {"action": "update"}
print(diff_data)  # Maintains insertion order with auto-initialization

Types

from typing import Any, Dict, Iterator, List, Optional, Callable, Type, TypeVar, Generic
from collections import OrderedDict
from diffsync.utils import OrderedDefaultDict

# Type variables for utility functions
T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")

# Callback function type for progress monitoring
ProgressCallback = Callable[[str, int, int], None]

Install with Tessl CLI

npx tessl i tessl/pypi-diffsync

docs

data-management.md

diff-calculation.md

flags-configuration.md

index.md

model-definition.md

storage-backends.md

synchronization.md

tile.json