Library to easily sync/diff/update 2 different data sources
npx @tessl/cli install tessl/pypi-diffsync@2.1.0DiffSync is a Python utility library designed to compare and synchronize different datasets. It serves as an intermediate translation layer between multiple data sources, enabling developers to define data models and adapters to translate between each base data source and a unified data model. The library excels in scenarios requiring repeated synchronization as data changes over time, accounting for creation, modification, and deletion of records, especially when data forms hierarchical relationships.
pip install diffsyncpip install diffsync[redis]import diffsyncFor main classes:
from diffsync import DiffSyncModel, AdapterFor complete API access:
from diffsync import (
DiffSyncModel, Adapter, Diff,
DiffSyncFlags, DiffSyncModelFlags, DiffSyncStatus,
LocalStore, BaseStore,
# Exceptions
ObjectAlreadyExists, ObjectNotFound, ObjectStoreWrongType,
DiffClassMismatch
)
from diffsync.diff import DiffElement
from diffsync.store.redis import RedisStore
from diffsync.exceptions import (
ObjectNotCreated, ObjectNotUpdated, ObjectNotDeleted
)
from diffsync.enum import DiffSyncActionsfrom diffsync import DiffSyncModel, Adapter
# Define a data model
class Device(DiffSyncModel):
_modelname = "device"
_identifiers = ("name",)
_attributes = ("os_version", "vendor")
name: str
os_version: str
vendor: str
# Create adapters for different data sources
class NetworkAdapter(Adapter):
device = Device
top_level = ["device"]
def load(self):
# Load data from your source (database, API, etc.)
device1 = Device(name="router1", os_version="15.1", vendor="cisco")
device2 = Device(name="switch1", os_version="12.2", vendor="juniper")
self.add(device1)
self.add(device2)
# Create two adapters with different data
source = NetworkAdapter(name="source")
target = NetworkAdapter(name="target")
# Load their respective data
source.load()
target.load()
# Calculate differences
diff = target.diff_from(source)
print(diff.str())
# Synchronize data from source to target
sync_diff = target.sync_from(source)DiffSync uses a hierarchical model-based approach with several key components:
This design enables systematic comparison and synchronization of complex, hierarchical data structures between disparate systems while maintaining data integrity and providing detailed change tracking.
Core functionality for defining data models that represent your domain objects. Models specify unique identifiers, trackable attributes, and parent-child relationships between different object types.
class DiffSyncModel(BaseModel):
_modelname: ClassVar[str]
_identifiers: ClassVar[Tuple[str, ...]]
_attributes: ClassVar[Tuple[str, ...]]
_children: ClassVar[Dict[str, str]]
model_flags: DiffSyncModelFlags
adapter: Optional["Adapter"]Adapter functionality for managing collections of models, loading data from various sources, and providing query and storage operations through configurable storage backends.
class Adapter:
top_level: ClassVar[List[str]]
def __init__(self, name: Optional[str] = None,
internal_storage_engine: Union[Type[BaseStore], BaseStore] = LocalStore): ...
def load(self): ...
def add(self, obj: DiffSyncModel): ...
def get(self, obj: Union[str, DiffSyncModel, Type[DiffSyncModel]],
identifier: Union[str, Dict]) -> DiffSyncModel: ...
def get_all(self, obj: Union[str, DiffSyncModel, Type[DiffSyncModel]]) -> List[DiffSyncModel]: ...Comprehensive difference calculation between datasets, supporting hierarchical data structures, customizable comparison logic, and detailed change tracking with multiple output formats.
def diff_from(self, source: "Adapter", diff_class: Type[Diff] = Diff,
flags: DiffSyncFlags = DiffSyncFlags.NONE,
callback: Optional[Callable[[str, int, int], None]] = None) -> Diff: ...
class Diff:
def __init__(self): ...
def add(self, element: "DiffElement"): ...
def has_diffs(self) -> bool: ...
def summary(self) -> Dict[str, int]: ...Automated synchronization operations that apply calculated differences to update target datasets. Supports creation, modification, and deletion of records with comprehensive error handling and status tracking.
def sync_from(self, source: "Adapter", diff_class: Type[Diff] = Diff,
flags: DiffSyncFlags = DiffSyncFlags.NONE,
callback: Optional[Callable[[str, int, int], None]] = None,
diff: Optional[Diff] = None) -> Diff: ...
def sync_to(self, target: "Adapter", diff_class: Type[Diff] = Diff,
flags: DiffSyncFlags = DiffSyncFlags.NONE,
callback: Optional[Callable[[str, int, int], None]] = None,
diff: Optional[Diff] = None) -> Diff: ...Pluggable storage backend implementations for different persistence requirements, from in-memory storage for temporary operations to Redis-based storage for distributed scenarios.
class BaseStore:
def get(self, *, model: Union[str, "DiffSyncModel", Type["DiffSyncModel"]],
identifier: Union[str, Dict]) -> "DiffSyncModel": ...
def add(self, *, obj: "DiffSyncModel"): ...
def remove(self, *, obj: "DiffSyncModel", remove_children: bool = False): ...
class LocalStore(BaseStore): ...
class RedisStore(BaseStore): ...Behavioral control flags and configuration options for customizing diff calculation and synchronization behavior, including error handling, skipping patterns, and logging verbosity.
class DiffSyncFlags(enum.Flag):
NONE = 0
CONTINUE_ON_FAILURE = 0b1
SKIP_UNMATCHED_SRC = 0b10
SKIP_UNMATCHED_DST = 0b100
LOG_UNCHANGED_RECORDS = 0b1000
class DiffSyncModelFlags(enum.Flag):
NONE = 0
IGNORE = 0b1
SKIP_CHILDREN_ON_DELETE = 0b10
SKIP_UNMATCHED_SRC = 0b100
SKIP_UNMATCHED_DST = 0b1000
NATURAL_DELETION_ORDER = 0b10000