CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pycrdt

Python bindings for Yrs CRDT library providing collaborative data structures for real-time synchronization.

Pending
Overview
Eval results
Files

xml-support.mddocs/

XML Support

Overview

pycrdt provides comprehensive XML document editing capabilities through XmlFragment, XmlElement, and XmlText types. These allow collaborative editing of structured XML documents with automatic conflict resolution, supporting elements, attributes, text content, and nested structures. XML types integrate with the broader CRDT ecosystem and support rich change tracking.

Core Types

XmlFragment

XML document fragment that can contain elements and text nodes.

class XmlFragment:
    def __init__(
        self,
        init: Iterable[XmlFragment | XmlElement | XmlText] | None = None,
        *,
        _doc: Doc | None = None,
        _integrated: _XmlFragment | None = None,
    ) -> None:
        """
        Create a new XML fragment.
        
        Args:
            init: Initial child nodes
            _doc (Doc, optional): Parent document
            _integrated: Native fragment instance
        """

    @property
    def parent(self) -> XmlFragment | XmlElement | XmlText | None:
        """Get the parent node of this fragment."""

    @property
    def children(self) -> XmlChildrenView:
        """Get the children view for managing child nodes."""

    def __str__(self) -> str:
        """Get string representation of the XML fragment."""

    def __eq__(self, other: object) -> bool:
        """Check equality with another XML node."""

    def __hash__(self) -> int:
        """Get hash of the XML fragment."""

    def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
        """
        Observe XML fragment changes.
        
        Args:
            callback: Function called when fragment changes occur
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
        """
        Observe deep changes including nested structures.
        
        Args:
            callback: Function called with list of change events
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def unobserve(self, subscription: Subscription) -> None:
        """
        Remove an event observer.
        
        Args:
            subscription: Subscription handle to remove
        """

XmlElement

XML element with tag, attributes, and children.

class XmlElement:
    def __init__(
        self,
        tag: str | None = None,
        attributes: dict[str, str] | Iterable[tuple[str, str]] | None = None,
        contents: Iterable[XmlFragment | XmlElement | XmlText] | None = None,
        *,
        _doc: Doc | None = None,
        _integrated: _XmlElement | None = None,
    ) -> None:
        """
        Create a new XML element.
        
        Args:
            tag (str, optional): Element tag name
            attributes: Initial attributes as dict or iterable of tuples
            contents: Initial child contents
            _doc (Doc, optional): Parent document
            _integrated: Native element instance
        """

    @property
    def tag(self) -> str | None:
        """Get or set the element tag name."""

    @tag.setter
    def tag(self, value: str | None) -> None:
        """Set the element tag name."""

    @property
    def attributes(self) -> XmlAttributesView:
        """Get the attributes view for managing element attributes."""

    @property
    def children(self) -> XmlChildrenView:
        """Get the children view for managing child nodes."""

    @property
    def parent(self) -> XmlFragment | XmlElement | XmlText | None:
        """Get the parent node of this element."""

    def __str__(self) -> str:
        """Get string representation of the XML element."""

    def __eq__(self, other: object) -> bool:
        """Check equality with another XML node."""

    def __hash__(self) -> int:
        """Get hash of the XML element."""

    def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
        """
        Observe XML element changes.
        
        Args:
            callback: Function called when element changes occur
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
        """
        Observe deep changes including nested structures.
        
        Args:
            callback: Function called with list of change events
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def unobserve(self, subscription: Subscription) -> None:
        """
        Remove an event observer.
        
        Args:
            subscription: Subscription handle to remove
        """

XmlText

Text content within XML elements with formatting attributes.

class XmlText:
    def __init__(
        self,
        text: str | None = None,
        *,
        _doc: Doc | None = None,
        _integrated: _XmlText | None = None,
    ) -> None:
        """
        Create a new XML text node.
        
        Args:
            text (str, optional): Initial text content
            _doc (Doc, optional): Parent document
            _integrated: Native text instance
        """

    @property
    def attributes(self) -> XmlAttributesView:
        """Get the attributes view for managing text formatting."""

    @property
    def parent(self) -> XmlFragment | XmlElement | XmlText | None:
        """Get the parent node of this text node."""

    # Text manipulation interface (similar to Text)
    def __len__(self) -> int:
        """Get the length of the text content."""

    def __str__(self) -> str:
        """Get the text content as a string."""

    def __iadd__(self, value: str) -> XmlText:
        """Append text using += operator."""

    def insert(self, index: int, value: str, attrs: Mapping[str, Any] | None = None) -> None:
        """
        Insert text at the specified index.
        
        Args:
            index (int): Position to insert text
            value (str): Text to insert
            attrs: Formatting attributes for the inserted text
        """

    def insert_embed(self, index: int, value: Any, attrs: dict[str, Any] | None = None) -> None:
        """
        Insert an embedded object at the specified index.
        
        Args:
            index (int): Position to insert object
            value: Object to embed
            attrs: Formatting attributes for the embedded object
        """

    def format(self, start: int, stop: int, attrs: dict[str, Any]) -> None:
        """
        Apply formatting attributes to a text range.
        
        Args:
            start (int): Start index of the range
            stop (int): End index of the range
            attrs: Formatting attributes to apply
        """

    def diff(self) -> list[tuple[Any, dict[str, Any] | None]]:
        """
        Get the formatted text as a list of (content, attributes) tuples.
        
        Returns:
            list: List of (content, attributes) pairs representing formatted text
        """

    def clear(self) -> None:
        """Remove all text content."""

    def to_py(self) -> str:
        """
        Convert text to a Python string.
        
        Returns:
            str: Text content as string
        """

    def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
        """
        Observe XML text changes.
        
        Args:
            callback: Function called when text changes occur
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
        """
        Observe deep changes including nested structures.
        
        Args:
            callback: Function called with list of change events
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def unobserve(self, subscription: Subscription) -> None:
        """
        Remove an event observer.
        
        Args:
            subscription: Subscription handle to remove
        """

View Types

XmlAttributesView

Dict-like view for managing XML element/text attributes.

class XmlAttributesView:
    def get(self, key: str) -> Any | None:
        """
        Get attribute value by key.
        
        Args:
            key (str): Attribute name
            
        Returns:
            Any | None: Attribute value or None if not found
        """

    def __getitem__(self, key: str) -> Any:
        """Get attribute value by key."""

    def __setitem__(self, key: str, value: Any) -> None:
        """Set attribute value."""

    def __delitem__(self, key: str) -> None:
        """Delete attribute by key."""

    def __contains__(self, key: str) -> bool:
        """Check if attribute exists."""

    def __len__(self) -> int:
        """Get number of attributes."""

    def __iter__(self) -> Iterable[tuple[str, Any]]:
        """Iterate over attribute key-value pairs."""

XmlChildrenView

List-like view for managing XML element/fragment children.

class XmlChildrenView:
    def __len__(self) -> int:
        """Get number of child nodes."""

    def __getitem__(self, index: int) -> XmlElement | XmlFragment | XmlText:
        """Get child node by index."""

    def __setitem__(self, key: int, value: str | XmlText | XmlElement) -> None:
        """Set child node at index."""

    def __delitem__(self, key: int | slice) -> None:
        """Delete child node(s) by index or slice."""

    def __iter__(self) -> Iterator[XmlText | XmlElement | XmlFragment]:
        """Iterate over child nodes."""

    def insert(self, index: int, element: str | XmlText | XmlElement) -> XmlText | XmlElement:
        """
        Insert a child node at the specified index.
        
        Args:
            index (int): Position to insert node
            element: Node to insert (string, XmlText, or XmlElement)
            
        Returns:
            XmlText | XmlElement: The inserted node
        """

    def append(self, element: str | XmlText | XmlElement) -> XmlText | XmlElement:
        """
        Append a child node to the end.
        
        Args:
            element: Node to append (string, XmlText, or XmlElement)
            
        Returns:
            XmlText | XmlElement: The appended node
        """

XmlEvent

Event emitted when XML structure changes.

class XmlEvent:
    @property
    def children_changed(self) -> bool:
        """Check if children were modified."""

    @property
    def target(self) -> XmlElement | XmlFragment | XmlText:
        """Get the XML node that changed."""

    @property
    def path(self) -> list[int | str]:
        """Get the path to the changed node within the document structure."""

    @property
    def delta(self) -> list[dict[str, Any]]:
        """Get the delta describing content changes (for XmlText)."""

    @property
    def keys(self) -> list[str]:
        """Get the list of attribute keys that changed."""

Usage Examples

Basic XML Document Creation

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

# Create root element
root = XmlElement("document")
root.attributes["version"] = "1.0"
xml_doc.children.append(root)

# Add nested elements
header = XmlElement("header")
title = XmlElement("title")
title_text = XmlText("My Document")
title.children.append(title_text)
header.children.append(title)
root.children.append(header)

# Add content section
content = XmlElement("content")
paragraph = XmlElement("p")
para_text = XmlText("This is the first paragraph.")
paragraph.children.append(para_text)
content.children.append(paragraph)
root.children.append(content)

print(str(xml_doc))
# Output: <document version="1.0"><header><title>My Document</title></header><content><p>This is the first paragraph.</p></content></document>

Working with Attributes

from pycrdt import Doc, XmlElement

doc = Doc()
element = XmlElement("div")

# Set attributes
element.attributes["id"] = "main-content"
element.attributes["class"] = "container"
element.attributes["style"] = "color: blue;"

# Get attributes
element_id = element.attributes.get("id")
element_class = element.attributes["class"]

# Check attributes
has_style = "style" in element.attributes
print(f"Has style: {has_style}")

# Iterate attributes
for key, value in element.attributes:
    print(f"{key}: {value}")

# Remove attributes
del element.attributes["style"]

Rich Text in XML

from pycrdt import Doc, XmlElement, XmlText

doc = Doc()
paragraph = XmlElement("p")

# Create rich text content
text = XmlText()
text.insert(0, "This is ", None)
text.insert(8, "bold", {"font-weight": "bold"})
text.insert(12, " and this is ", None)
text.insert(25, "italic", {"font-style": "italic"})
text.insert(31, " text.", None)

paragraph.children.append(text)

# Format existing text
text.format(0, 4, {"color": "red"})  # Make "This" red

# Get formatted content
diff = text.diff()
for content, attrs in diff:
    print(f"'{content}' with attributes: {attrs}")

Dynamic XML Construction

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

def create_article(title: str, author: str, content: list[str]) -> XmlElement:
    """Create an article XML structure."""
    article = XmlElement("article")
    article.attributes["author"] = author
    article.attributes["created"] = "2024-01-01"
    
    # Add title
    title_elem = XmlElement("title")
    title_text = XmlText(title)
    title_elem.children.append(title_text)
    article.children.append(title_elem)
    
    # Add content paragraphs
    content_elem = XmlElement("content")
    for paragraph_text in content:
        p_elem = XmlElement("p")
        p_text = XmlText(paragraph_text)
        p_elem.children.append(p_text)
        content_elem.children.append(p_elem)
    
    article.children.append(content_elem)
    return article

doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

# Create articles
article1 = create_article(
    "Introduction to CRDTs",
    "Alice",
    ["CRDTs are data structures...", "They provide conflict resolution..."]
)

article2 = create_article(
    "XML in Collaborative Editing",
    "Bob", 
    ["XML documents can be edited...", "Multiple users can collaborate..."]
)

xml_doc.children.append(article1)
xml_doc.children.append(article2)

XML Document Navigation

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

def find_elements_by_tag(parent, tag_name: str) -> list:
    """Find all elements with a specific tag name."""
    results = []
    
    for child in parent.children:
        if isinstance(child, XmlElement) and child.tag == tag_name:
            results.append(child)
        
        # Recursively search in child elements and fragments
        if isinstance(child, (XmlElement, XmlFragment)):
            results.extend(find_elements_by_tag(child, tag_name))
    
    return results

def get_text_content(node) -> str:
    """Extract all text content from a node."""
    text_parts = []
    
    if isinstance(node, XmlText):
        return str(node)
    
    if hasattr(node, 'children'):
        for child in node.children:
            text_parts.append(get_text_content(child))
    
    return ''.join(text_parts)

# Example usage
doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

# Build document (from previous example)
root = XmlElement("book")
chapter1 = XmlElement("chapter")
chapter1.attributes["title"] = "Introduction"
chapter1_text = XmlText("This is the introduction chapter.")
chapter1.children.append(chapter1_text)

chapter2 = XmlElement("chapter") 
chapter2.attributes["title"] = "Advanced Topics"
chapter2_text = XmlText("This covers advanced material.")
chapter2.children.append(chapter2_text)

root.children.append(chapter1)
root.children.append(chapter2)
xml_doc.children.append(root)

# Navigate and query
chapters = find_elements_by_tag(xml_doc, "chapter")
print(f"Found {len(chapters)} chapters")

for chapter in chapters:
    title = chapter.attributes.get("title", "Untitled")
    content = get_text_content(chapter)
    print(f"Chapter: {title} - {content}")

Event Observation

from pycrdt import Doc, XmlFragment, XmlElement, XmlEvent

doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

def on_xml_change(event: XmlEvent):
    print(f"XML changed: {event.target}")
    print(f"Children changed: {event.children_changed}")
    print(f"Path: {event.path}")
    
    if event.keys:
        print(f"Attribute keys changed: {event.keys}")
    
    if event.delta:
        print(f"Content delta: {event.delta}")

# Subscribe to changes
subscription = xml_doc.observe(on_xml_change)

# Make changes to trigger events
root = XmlElement("root")
root.attributes["id"] = "main"
xml_doc.children.append(root)

child_elem = XmlElement("child")
child_elem.attributes["class"] = "item"
root.children.append(child_elem)

# Clean up
xml_doc.unobserve(subscription)

Deep Event Observation

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

def on_deep_change(events):
    print(f"Deep XML changes: {len(events)} events")
    for event in events:
        print(f"  {type(event.target).__name__} at path {event.path}")
        if event.children_changed:
            print("    Children modified")
        if event.keys:
            print(f"    Attributes changed: {event.keys}")

# Subscribe to deep changes
subscription = xml_doc.observe_deep(on_deep_change)

# Create nested structure
root = XmlElement("document")
section = XmlElement("section")
paragraph = XmlElement("p")
text = XmlText("Hello, world!")

# Add nested structure (triggers multiple events)
xml_doc.children.append(root)
root.children.append(section)
section.children.append(paragraph)
paragraph.children.append(text)

# Modify nested content
text.insert(7, "XML ")
paragraph.attributes["class"] = "intro"

# Clean up
xml_doc.unobserve(subscription)

Collaborative XML Editing

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

# Simulate two clients editing the same XML document
doc1 = Doc(client_id=1)
doc2 = Doc(client_id=2)

xml_doc1 = doc1.get("shared_xml", type=XmlFragment)
xml_doc2 = doc2.get("shared_xml", type=XmlFragment)

# Client 1 creates initial structure
with doc1.transaction(origin="client1"):
    root = XmlElement("document")
    root.attributes["version"] = "1.0"
    xml_doc1.children.append(root)
    
    content = XmlElement("content")
    root.children.append(content)

# Sync to client 2
update = doc1.get_update()
doc2.apply_update(update)

# Client 2 adds content
with doc2.transaction(origin="client2"):
    root2 = xml_doc2.children[0]  # Get root element
    content2 = root2.children[0]  # Get content element
    
    paragraph = XmlElement("p")
    paragraph.attributes["id"] = "p1"
    text = XmlText("This is from client 2.")
    paragraph.children.append(text)
    content2.children.append(paragraph)

# Client 1 adds more content concurrently
with doc1.transaction(origin="client1"):
    root1 = xml_doc1.children[0]
    content1 = root1.children[0]
    
    header = XmlElement("h1")
    header_text = XmlText("Document Title") 
    header.children.append(header_text)
    content1.children.insert(0, header)  # Insert at beginning

# Sync changes
update1 = doc1.get_update(doc2.get_state())
update2 = doc2.get_update(doc1.get_state())

doc2.apply_update(update1)
doc1.apply_update(update2)

# Both clients now have consistent XML
print(f"Client 1 XML: {str(xml_doc1)}")
print(f"Client 2 XML: {str(xml_doc2)}")

XML Transformation and Processing

from pycrdt import Doc, XmlFragment, XmlElement, XmlText

def xml_to_html(xml_node) -> str:
    """Convert XML structure to HTML string."""
    if isinstance(xml_node, XmlText):
        return str(xml_node)
    
    if isinstance(xml_node, XmlElement):
        tag = xml_node.tag or "div"
        
        # Build attributes string
        attrs = []
        for key, value in xml_node.attributes:
            attrs.append(f'{key}="{value}"')
        attrs_str = " " + " ".join(attrs) if attrs else ""
        
        # Process children
        children_html = ""
        for child in xml_node.children:
            children_html += xml_to_html(child)
        
        return f"<{tag}{attrs_str}>{children_html}</{tag}>"
    
    if isinstance(xml_node, XmlFragment):
        result = ""
        for child in xml_node.children:
            result += xml_to_html(child)
        return result

def count_elements(xml_node) -> dict:
    """Count elements by tag name."""
    counts = {}
    
    if isinstance(xml_node, XmlElement):
        tag = xml_node.tag or "unknown"
        counts[tag] = counts.get(tag, 0) + 1
        
        # Count in children  
        for child in xml_node.children:
            child_counts = count_elements(child)
            for tag, count in child_counts.items():
                counts[tag] = counts.get(tag, 0) + count
    
    elif isinstance(xml_node, XmlFragment):
        for child in xml_node.children:
            child_counts = count_elements(child)
            for tag, count in child_counts.items():
                counts[tag] = counts.get(tag, 0) + count
    
    return counts

# Example usage
doc = Doc()
xml_doc = doc.get("document", type=XmlFragment)

# Build sample document
article = XmlElement("article")
article.attributes["class"] = "blog-post"

title = XmlElement("h1")
title.children.append(XmlText("My Blog Post"))
article.children.append(title)

content = XmlElement("div")
content.attributes["class"] = "content"

p1 = XmlElement("p") 
p1.children.append(XmlText("First paragraph."))
content.children.append(p1)

p2 = XmlElement("p")
p2.children.append(XmlText("Second paragraph."))
content.children.append(p2)

article.children.append(content)
xml_doc.children.append(article)

# Transform and analyze
html_output = xml_to_html(xml_doc)
print(f"HTML output: {html_output}")

element_counts = count_elements(xml_doc)
print(f"Element counts: {element_counts}")

Error Handling

from pycrdt import Doc, XmlElement, XmlText

doc = Doc()

try:
    # Invalid attribute operations
    element = XmlElement("div")
    del element.attributes["nonexistent"]  # May raise KeyError
    
    # Invalid child operations
    invalid_index = element.children[100]  # May raise IndexError
    
    # Invalid text operations
    text = XmlText("Hello")
    text.insert(-1, "Invalid")  # May raise ValueError
    
except (KeyError, IndexError, ValueError) as e:
    print(f"XML operation failed: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-pycrdt

docs

array-operations.md

awareness.md

document-management.md

index.md

map-operations.md

position-undo.md

synchronization.md

text-operations.md

xml-support.md

tile.json