CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pycrdt

Python bindings for Yrs CRDT library providing collaborative data structures for real-time synchronization.

Pending
Overview
Eval results
Files

text-operations.mddocs/

Text Operations

Overview

The Text type in pycrdt provides collaborative text editing capabilities similar to Python strings, but with automatic conflict resolution across multiple clients. It supports rich text formatting with attributes, embedded objects, and comprehensive change tracking through delta operations.

Core Types

Text

Collaborative text editing with string-like interface and rich formatting support.

class Text:
    def __init__(
        self,
        init: str | None = None,
        *,
        _doc: Doc | None = None,
        _integrated: _Text | None = None,
    ) -> None:
        """
        Create a new collaborative text object.
        
        Args:
            init (str, optional): Initial text content
            _doc (Doc, optional): Parent document
            _integrated (_Text, optional): Native text instance
        """

    # String-like interface
    def __len__(self) -> int:
        """Get the length of the text."""

    def __str__(self) -> str:
        """Get the text content as a string."""

    def __iter__(self) -> Iterator[str]:
        """Iterate over characters in the text."""

    def __contains__(self, item: str) -> bool:
        """Check if substring exists in text."""

    def __getitem__(self, key: int | slice) -> str:
        """Get character or substring by index/slice."""

    def __setitem__(self, key: int | slice, value: str) -> None:
        """Set character or substring by index/slice."""

    def __delitem__(self, key: int | slice) -> None:
        """Delete character or substring by index/slice."""

    def __iadd__(self, value: str) -> Text:
        """Append text using += operator."""

    # Text manipulation methods
    def insert(self, index: int, value: str, attrs: dict[str, Any] | None = None) -> None:
        """
        Insert text at the specified index.
        
        Args:
            index (int): Position to insert text
            value (str): Text to insert
            attrs (dict, optional): Formatting attributes for the inserted text
        """

    def insert_embed(self, index: int, value: Any, attrs: dict[str, Any] | None = None) -> None:
        """
        Insert an embedded object at the specified index.
        
        Args:
            index (int): Position to insert object
            value: Object to embed
            attrs (dict, optional): Formatting attributes for the embedded object
        """

    def format(self, start: int, stop: int, attrs: dict[str, Any]) -> None:
        """
        Apply formatting attributes to a text range.
        
        Args:
            start (int): Start index of the range
            stop (int): End index of the range
            attrs (dict): Formatting attributes to apply
        """

    def diff(self) -> list[tuple[Any, dict[str, Any] | None]]:
        """
        Get the formatted text as a list of (content, attributes) tuples.
        
        Returns:
            list: List of (content, attributes) pairs representing formatted text
        """

    def clear(self) -> None:
        """Remove all text content."""

    def to_py(self) -> str | None:
        """
        Convert text to a Python string.
        
        Returns:
            str | None: Text content as string, or None if empty
        """

    def observe(self, callback: Callable[[TextEvent], None]) -> Subscription:
        """
        Observe text changes.
        
        Args:
            callback: Function called when text changes occur
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def observe_deep(self, callback: Callable[[list[TextEvent]], None]) -> Subscription:
        """
        Observe deep changes including nested structures.
        
        Args:
            callback: Function called with list of change events
            
        Returns:
            Subscription: Handle for unsubscribing
        """

    def unobserve(self, subscription: Subscription) -> None:
        """
        Remove an event observer.
        
        Args:
            subscription: Subscription handle to remove
        """

    async def events(
        self, 
        deep: bool = False, 
        max_buffer_size: float = float("inf")
    ) -> MemoryObjectReceiveStream:
        """
        Get an async stream of text events.
        
        Args:
            deep (bool): Include deep change events
            max_buffer_size (float): Maximum event buffer size
            
        Returns:
            MemoryObjectReceiveStream: Async event stream
        """

    def sticky_index(self, index: int, assoc: Assoc = Assoc.AFTER) -> StickyIndex:
        """
        Create a sticky index that maintains its position during edits.
        
        Args:
            index (int): Initial index position
            assoc (Assoc): Association type (BEFORE or AFTER)
            
        Returns:
            StickyIndex: Persistent position tracker
        """

TextEvent

Event emitted when text changes occur.

class TextEvent:
    @property
    def target(self) -> Text:
        """Get the text object that changed."""

    @property
    def delta(self) -> list[dict[str, Any]]:
        """
        Get the delta describing the changes.
        
        Delta format:
        - {"retain": n} - Keep n characters unchanged
        - {"insert": "text", "attributes": {...}} - Insert text with attributes
        - {"delete": n} - Delete n characters
        """

    @property
    def path(self) -> list[int | str]:
        """Get the path to the changed text within the document structure."""

Usage Examples

Basic Text Operations

from pycrdt import Doc, Text

doc = Doc()
text = doc.get("content", type=Text)

# Basic string operations
text.insert(0, "Hello, world!")
print(str(text))  # "Hello, world!"
print(len(text))  # 13

# String-like access
print(text[0])      # "H"
print(text[0:5])    # "Hello"
print("world" in text)  # True

# Modification
text[7:12] = "Python"
print(str(text))  # "Hello, Python!"

# Append text
text += " How are you?"
print(str(text))  # "Hello, Python! How are you?"

Rich Text Formatting

from pycrdt import Doc, Text

doc = Doc()
text = doc.get("document", type=Text)

# Insert text with formatting
text.insert(0, "Bold Text", {"bold": True})
text.insert(9, " and ", None)
text.insert(14, "Italic Text", {"italic": True})

# Apply formatting to existing text
text.format(0, 4, {"color": "red"})  # Make "Bold" red
text.format(19, 25, {"underline": True})  # Underline "Italic"

# Get formatted content
diff = text.diff()
for content, attrs in diff:
    print(f"Content: {content}, Attributes: {attrs}")

Embedded Objects

from pycrdt import Doc, Text

doc = Doc()
text = doc.get("rich_content", type=Text)

# Insert text and embedded objects
text.insert(0, "Check out this image: ")
text.insert_embed(22, {"type": "image", "src": "photo.jpg"}, {"width": 300})
text.insert(23, " and this link: ")
text.insert_embed(39, {"type": "link", "url": "https://example.com"}, {"color": "blue"})

# Process mixed content
diff = text.diff()
for content, attrs in diff:
    if isinstance(content, dict):
        print(f"Embedded object: {content}")
    else:
        print(f"Text: {content}")

Position Tracking

from pycrdt import Doc, Text, Assoc

doc = Doc()
text = doc.get("content", type=Text)

text.insert(0, "Hello, world!")

# Create sticky indices
start_pos = text.sticky_index(7, Assoc.BEFORE)  # Before "world"
end_pos = text.sticky_index(12, Assoc.AFTER)    # After "world"

# Insert text before the tracked region
text.insert(0, "Well, ")
print(f"Start position: {start_pos.get_index()}")  # Adjusted position
print(f"End position: {end_pos.get_index()}")      # Adjusted position

# Extract text using sticky positions
with doc.transaction() as txn:
    start_idx = start_pos.get_index(txn)
    end_idx = end_pos.get_index(txn)
    tracked_text = text[start_idx:end_idx]
    print(f"Tracked text: {tracked_text}")  # "world"

Event Observation

from pycrdt import Doc, Text, TextEvent

doc = Doc()
text = doc.get("content", type=Text)

def on_text_change(event: TextEvent):
    print(f"Text changed in: {event.target}")
    print(f"Delta: {event.delta}")
    for op in event.delta:
        if "retain" in op:
            print(f"  Retain {op['retain']} characters")
        elif "insert" in op:
            attrs = op.get("attributes", {})
            print(f"  Insert '{op['insert']}' with {attrs}")
        elif "delete" in op:
            print(f"  Delete {op['delete']} characters")

# Subscribe to changes
subscription = text.observe(on_text_change)

# Make changes to trigger events
text.insert(0, "Hello")
text.insert(5, ", world!")
text.format(0, 5, {"bold": True})

# Clean up
text.unobserve(subscription)

Async Event Streaming

import anyio
from pycrdt import Doc, Text

async def monitor_text_changes(text: Text):
    async with text.events() as event_stream:
        async for event in event_stream:
            print(f"Text event: {event.delta}")

doc = Doc()
text = doc.get("content", type=Text)

# Start monitoring in background
async def main():
    async with anyio.create_task_group() as tg:
        tg.start_soon(monitor_text_changes, text)
        
        # Make changes
        await anyio.sleep(0.1)
        text.insert(0, "Hello")
        await anyio.sleep(0.1)
        text += ", World!"

anyio.run(main)

Collaborative Editing Simulation

from pycrdt import Doc, Text

# Simulate two clients editing the same document
doc1 = Doc(client_id=1)
doc2 = Doc(client_id=2)

text1 = doc1.get("shared_text", type=Text)
text2 = doc2.get("shared_text", type=Text)

# Client 1 makes changes
with doc1.transaction(origin="client1") as txn:
    text1.insert(0, "Hello from client 1")

# Get update and apply to client 2
update = doc1.get_update()
doc2.apply_update(update)

print(str(text2))  # "Hello from client 1"

# Client 2 makes concurrent changes
with doc2.transaction(origin="client2") as txn:
    text2.insert(0, "Hi! ")
    text2.insert(len(text2), " - and client 2")

# Sync back to client 1
update = doc2.get_update(doc1.get_state())
doc1.apply_update(update)

print(str(text1))  # "Hi! Hello from client 1 - and client 2"

Complex Text Processing

from pycrdt import Doc, Text

doc = Doc()
text = doc.get("document", type=Text)

# Build a document with mixed content
text.insert(0, "Document Title", {"heading": 1, "bold": True})
text.insert(14, "\n\n")
text.insert(16, "This is the first paragraph with ", {"paragraph": True})
text.insert(50, "bold text", {"bold": True})
text.insert(59, " and ")
text.insert(64, "italic text", {"italic": True})
text.insert(75, ".")

text.insert(76, "\n\n")
text.insert(78, "Second paragraph with a ")
text.insert_embed(102, {"type": "link", "url": "example.com", "text": "link"})
text.insert(103, " and an ")
text.insert_embed(111, {"type": "image", "src": "diagram.png", "alt": "Diagram"})
text.insert(112, ".")

# Process the document
def analyze_content(text: Text):
    """Analyze text content and structure."""
    diff = text.diff()
    
    text_parts = []
    embeds = []
    
    for content, attrs in diff:
        if isinstance(content, dict):
            embeds.append(content)
        else:
            if attrs and "heading" in attrs:
                text_parts.append(("heading", content))
            elif attrs and "paragraph" in attrs:
                text_parts.append(("paragraph", content))
            else:
                text_parts.append(("text", content))
    
    return text_parts, embeds

parts, objects = analyze_content(text)
print(f"Text parts: {len(parts)}")
print(f"Embedded objects: {len(objects)}")

Delta Operations

Text changes are represented as delta operations that describe insertions, deletions, and retains:

# Example delta operations
delta_examples = [
    {"retain": 5},                          # Keep 5 characters
    {"insert": "Hello", "attributes": {"bold": True}},  # Insert formatted text
    {"delete": 3},                          # Delete 3 characters
    {"insert": {"type": "image", "src": "photo.jpg"}},  # Insert embed
]

# Processing deltas
def apply_delta(text: Text, delta: list[dict]):
    """Apply a delta to text (conceptual example)."""
    pos = 0
    for op in delta:
        if "retain" in op:
            pos += op["retain"]
        elif "insert" in op:
            content = op["insert"]
            attrs = op.get("attributes")
            if isinstance(content, str):
                text.insert(pos, content, attrs)
                pos += len(content)
            else:
                text.insert_embed(pos, content, attrs)
                pos += 1
        elif "delete" in op:
            del text[pos:pos + op["delete"]]

Error Handling

from pycrdt import Doc, Text

doc = Doc()
text = doc.get("content", type=Text)

try:
    # Invalid index operations
    text.insert(-1, "Invalid")  # May raise ValueError
    
    # Invalid slice operations
    text[100:200] = "Out of bounds"  # May raise ValueError
    
    # Invalid formatting
    text.format(10, 5, {"invalid": "range"})  # start > stop
    
except (ValueError, IndexError) as e:
    print(f"Text operation failed: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-pycrdt

docs

array-operations.md

awareness.md

document-management.md

index.md

map-operations.md

position-undo.md

synchronization.md

text-operations.md

xml-support.md

tile.json