0
# XML Support
1
2
## Overview
3
4
pycrdt provides comprehensive XML document editing capabilities through `XmlFragment`, `XmlElement`, and `XmlText` types. These allow collaborative editing of structured XML documents with automatic conflict resolution, supporting elements, attributes, text content, and nested structures. XML types integrate with the broader CRDT ecosystem and support rich change tracking.
5
6
## Core Types
7
8
### XmlFragment
9
10
XML document fragment that can contain elements and text nodes.
11
12
```python { .api }
13
class XmlFragment:
14
def __init__(
15
self,
16
init: Iterable[XmlFragment | XmlElement | XmlText] | None = None,
17
*,
18
_doc: Doc | None = None,
19
_integrated: _XmlFragment | None = None,
20
) -> None:
21
"""
22
Create a new XML fragment.
23
24
Args:
25
init: Initial child nodes
26
_doc (Doc, optional): Parent document
27
_integrated: Native fragment instance
28
"""
29
30
@property
31
def parent(self) -> XmlFragment | XmlElement | XmlText | None:
32
"""Get the parent node of this fragment."""
33
34
@property
35
def children(self) -> XmlChildrenView:
36
"""Get the children view for managing child nodes."""
37
38
def __str__(self) -> str:
39
"""Get string representation of the XML fragment."""
40
41
def __eq__(self, other: object) -> bool:
42
"""Check equality with another XML node."""
43
44
def __hash__(self) -> int:
45
"""Get hash of the XML fragment."""
46
47
def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
48
"""
49
Observe XML fragment changes.
50
51
Args:
52
callback: Function called when fragment changes occur
53
54
Returns:
55
Subscription: Handle for unsubscribing
56
"""
57
58
def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
59
"""
60
Observe deep changes including nested structures.
61
62
Args:
63
callback: Function called with list of change events
64
65
Returns:
66
Subscription: Handle for unsubscribing
67
"""
68
69
def unobserve(self, subscription: Subscription) -> None:
70
"""
71
Remove an event observer.
72
73
Args:
74
subscription: Subscription handle to remove
75
"""
76
```
77
78
### XmlElement
79
80
XML element with tag, attributes, and children.
81
82
```python { .api }
83
class XmlElement:
84
def __init__(
85
self,
86
tag: str | None = None,
87
attributes: dict[str, str] | Iterable[tuple[str, str]] | None = None,
88
contents: Iterable[XmlFragment | XmlElement | XmlText] | None = None,
89
*,
90
_doc: Doc | None = None,
91
_integrated: _XmlElement | None = None,
92
) -> None:
93
"""
94
Create a new XML element.
95
96
Args:
97
tag (str, optional): Element tag name
98
attributes: Initial attributes as dict or iterable of tuples
99
contents: Initial child contents
100
_doc (Doc, optional): Parent document
101
_integrated: Native element instance
102
"""
103
104
@property
105
def tag(self) -> str | None:
106
"""Get or set the element tag name."""
107
108
@tag.setter
109
def tag(self, value: str | None) -> None:
110
"""Set the element tag name."""
111
112
@property
113
def attributes(self) -> XmlAttributesView:
114
"""Get the attributes view for managing element attributes."""
115
116
@property
117
def children(self) -> XmlChildrenView:
118
"""Get the children view for managing child nodes."""
119
120
@property
121
def parent(self) -> XmlFragment | XmlElement | XmlText | None:
122
"""Get the parent node of this element."""
123
124
def __str__(self) -> str:
125
"""Get string representation of the XML element."""
126
127
def __eq__(self, other: object) -> bool:
128
"""Check equality with another XML node."""
129
130
def __hash__(self) -> int:
131
"""Get hash of the XML element."""
132
133
def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
134
"""
135
Observe XML element changes.
136
137
Args:
138
callback: Function called when element changes occur
139
140
Returns:
141
Subscription: Handle for unsubscribing
142
"""
143
144
def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
145
"""
146
Observe deep changes including nested structures.
147
148
Args:
149
callback: Function called with list of change events
150
151
Returns:
152
Subscription: Handle for unsubscribing
153
"""
154
155
def unobserve(self, subscription: Subscription) -> None:
156
"""
157
Remove an event observer.
158
159
Args:
160
subscription: Subscription handle to remove
161
"""
162
```
163
164
### XmlText
165
166
Text content within XML elements with formatting attributes.
167
168
```python { .api }
169
class XmlText:
170
def __init__(
171
self,
172
text: str | None = None,
173
*,
174
_doc: Doc | None = None,
175
_integrated: _XmlText | None = None,
176
) -> None:
177
"""
178
Create a new XML text node.
179
180
Args:
181
text (str, optional): Initial text content
182
_doc (Doc, optional): Parent document
183
_integrated: Native text instance
184
"""
185
186
@property
187
def attributes(self) -> XmlAttributesView:
188
"""Get the attributes view for managing text formatting."""
189
190
@property
191
def parent(self) -> XmlFragment | XmlElement | XmlText | None:
192
"""Get the parent node of this text node."""
193
194
# Text manipulation interface (similar to Text)
195
def __len__(self) -> int:
196
"""Get the length of the text content."""
197
198
def __str__(self) -> str:
199
"""Get the text content as a string."""
200
201
def __iadd__(self, value: str) -> XmlText:
202
"""Append text using += operator."""
203
204
def insert(self, index: int, value: str, attrs: Mapping[str, Any] | None = None) -> None:
205
"""
206
Insert text at the specified index.
207
208
Args:
209
index (int): Position to insert text
210
value (str): Text to insert
211
attrs: Formatting attributes for the inserted text
212
"""
213
214
def insert_embed(self, index: int, value: Any, attrs: dict[str, Any] | None = None) -> None:
215
"""
216
Insert an embedded object at the specified index.
217
218
Args:
219
index (int): Position to insert object
220
value: Object to embed
221
attrs: Formatting attributes for the embedded object
222
"""
223
224
def format(self, start: int, stop: int, attrs: dict[str, Any]) -> None:
225
"""
226
Apply formatting attributes to a text range.
227
228
Args:
229
start (int): Start index of the range
230
stop (int): End index of the range
231
attrs: Formatting attributes to apply
232
"""
233
234
def diff(self) -> list[tuple[Any, dict[str, Any] | None]]:
235
"""
236
Get the formatted text as a list of (content, attributes) tuples.
237
238
Returns:
239
list: List of (content, attributes) pairs representing formatted text
240
"""
241
242
def clear(self) -> None:
243
"""Remove all text content."""
244
245
def to_py(self) -> str:
246
"""
247
Convert text to a Python string.
248
249
Returns:
250
str: Text content as string
251
"""
252
253
def observe(self, callback: Callable[[XmlEvent], None]) -> Subscription:
254
"""
255
Observe XML text changes.
256
257
Args:
258
callback: Function called when text changes occur
259
260
Returns:
261
Subscription: Handle for unsubscribing
262
"""
263
264
def observe_deep(self, callback: Callable[[list[XmlEvent]], None]) -> Subscription:
265
"""
266
Observe deep changes including nested structures.
267
268
Args:
269
callback: Function called with list of change events
270
271
Returns:
272
Subscription: Handle for unsubscribing
273
"""
274
275
def unobserve(self, subscription: Subscription) -> None:
276
"""
277
Remove an event observer.
278
279
Args:
280
subscription: Subscription handle to remove
281
"""
282
```
283
284
## View Types
285
286
### XmlAttributesView
287
288
Dict-like view for managing XML element/text attributes.
289
290
```python { .api }
291
class XmlAttributesView:
292
def get(self, key: str) -> Any | None:
293
"""
294
Get attribute value by key.
295
296
Args:
297
key (str): Attribute name
298
299
Returns:
300
Any | None: Attribute value or None if not found
301
"""
302
303
def __getitem__(self, key: str) -> Any:
304
"""Get attribute value by key."""
305
306
def __setitem__(self, key: str, value: Any) -> None:
307
"""Set attribute value."""
308
309
def __delitem__(self, key: str) -> None:
310
"""Delete attribute by key."""
311
312
def __contains__(self, key: str) -> bool:
313
"""Check if attribute exists."""
314
315
def __len__(self) -> int:
316
"""Get number of attributes."""
317
318
def __iter__(self) -> Iterable[tuple[str, Any]]:
319
"""Iterate over attribute key-value pairs."""
320
```
321
322
### XmlChildrenView
323
324
List-like view for managing XML element/fragment children.
325
326
```python { .api }
327
class XmlChildrenView:
328
def __len__(self) -> int:
329
"""Get number of child nodes."""
330
331
def __getitem__(self, index: int) -> XmlElement | XmlFragment | XmlText:
332
"""Get child node by index."""
333
334
def __setitem__(self, key: int, value: str | XmlText | XmlElement) -> None:
335
"""Set child node at index."""
336
337
def __delitem__(self, key: int | slice) -> None:
338
"""Delete child node(s) by index or slice."""
339
340
def __iter__(self) -> Iterator[XmlText | XmlElement | XmlFragment]:
341
"""Iterate over child nodes."""
342
343
def insert(self, index: int, element: str | XmlText | XmlElement) -> XmlText | XmlElement:
344
"""
345
Insert a child node at the specified index.
346
347
Args:
348
index (int): Position to insert node
349
element: Node to insert (string, XmlText, or XmlElement)
350
351
Returns:
352
XmlText | XmlElement: The inserted node
353
"""
354
355
def append(self, element: str | XmlText | XmlElement) -> XmlText | XmlElement:
356
"""
357
Append a child node to the end.
358
359
Args:
360
element: Node to append (string, XmlText, or XmlElement)
361
362
Returns:
363
XmlText | XmlElement: The appended node
364
"""
365
```
366
367
### XmlEvent
368
369
Event emitted when XML structure changes.
370
371
```python { .api }
372
class XmlEvent:
373
@property
374
def children_changed(self) -> bool:
375
"""Check if children were modified."""
376
377
@property
378
def target(self) -> XmlElement | XmlFragment | XmlText:
379
"""Get the XML node that changed."""
380
381
@property
382
def path(self) -> list[int | str]:
383
"""Get the path to the changed node within the document structure."""
384
385
@property
386
def delta(self) -> list[dict[str, Any]]:
387
"""Get the delta describing content changes (for XmlText)."""
388
389
@property
390
def keys(self) -> list[str]:
391
"""Get the list of attribute keys that changed."""
392
```
393
394
## Usage Examples
395
396
### Basic XML Document Creation
397
398
```python
399
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
400
401
doc = Doc()
402
xml_doc = doc.get("document", type=XmlFragment)
403
404
# Create root element
405
root = XmlElement("document")
406
root.attributes["version"] = "1.0"
407
xml_doc.children.append(root)
408
409
# Add nested elements
410
header = XmlElement("header")
411
title = XmlElement("title")
412
title_text = XmlText("My Document")
413
title.children.append(title_text)
414
header.children.append(title)
415
root.children.append(header)
416
417
# Add content section
418
content = XmlElement("content")
419
paragraph = XmlElement("p")
420
para_text = XmlText("This is the first paragraph.")
421
paragraph.children.append(para_text)
422
content.children.append(paragraph)
423
root.children.append(content)
424
425
print(str(xml_doc))
426
# Output: <document version="1.0"><header><title>My Document</title></header><content><p>This is the first paragraph.</p></content></document>
427
```
428
429
### Working with Attributes
430
431
```python
432
from pycrdt import Doc, XmlElement
433
434
doc = Doc()
435
element = XmlElement("div")
436
437
# Set attributes
438
element.attributes["id"] = "main-content"
439
element.attributes["class"] = "container"
440
element.attributes["style"] = "color: blue;"
441
442
# Get attributes
443
element_id = element.attributes.get("id")
444
element_class = element.attributes["class"]
445
446
# Check attributes
447
has_style = "style" in element.attributes
448
print(f"Has style: {has_style}")
449
450
# Iterate attributes
451
for key, value in element.attributes:
452
print(f"{key}: {value}")
453
454
# Remove attributes
455
del element.attributes["style"]
456
```
457
458
### Rich Text in XML
459
460
```python
461
from pycrdt import Doc, XmlElement, XmlText
462
463
doc = Doc()
464
paragraph = XmlElement("p")
465
466
# Create rich text content
467
text = XmlText()
468
text.insert(0, "This is ", None)
469
text.insert(8, "bold", {"font-weight": "bold"})
470
text.insert(12, " and this is ", None)
471
text.insert(25, "italic", {"font-style": "italic"})
472
text.insert(31, " text.", None)
473
474
paragraph.children.append(text)
475
476
# Format existing text
477
text.format(0, 4, {"color": "red"}) # Make "This" red
478
479
# Get formatted content
480
diff = text.diff()
481
for content, attrs in diff:
482
print(f"'{content}' with attributes: {attrs}")
483
```
484
485
### Dynamic XML Construction
486
487
```python
488
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
489
490
def create_article(title: str, author: str, content: list[str]) -> XmlElement:
491
"""Create an article XML structure."""
492
article = XmlElement("article")
493
article.attributes["author"] = author
494
article.attributes["created"] = "2024-01-01"
495
496
# Add title
497
title_elem = XmlElement("title")
498
title_text = XmlText(title)
499
title_elem.children.append(title_text)
500
article.children.append(title_elem)
501
502
# Add content paragraphs
503
content_elem = XmlElement("content")
504
for paragraph_text in content:
505
p_elem = XmlElement("p")
506
p_text = XmlText(paragraph_text)
507
p_elem.children.append(p_text)
508
content_elem.children.append(p_elem)
509
510
article.children.append(content_elem)
511
return article
512
513
doc = Doc()
514
xml_doc = doc.get("document", type=XmlFragment)
515
516
# Create articles
517
article1 = create_article(
518
"Introduction to CRDTs",
519
"Alice",
520
["CRDTs are data structures...", "They provide conflict resolution..."]
521
)
522
523
article2 = create_article(
524
"XML in Collaborative Editing",
525
"Bob",
526
["XML documents can be edited...", "Multiple users can collaborate..."]
527
)
528
529
xml_doc.children.append(article1)
530
xml_doc.children.append(article2)
531
```
532
533
### XML Document Navigation
534
535
```python
536
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
537
538
def find_elements_by_tag(parent, tag_name: str) -> list:
539
"""Find all elements with a specific tag name."""
540
results = []
541
542
for child in parent.children:
543
if isinstance(child, XmlElement) and child.tag == tag_name:
544
results.append(child)
545
546
# Recursively search in child elements and fragments
547
if isinstance(child, (XmlElement, XmlFragment)):
548
results.extend(find_elements_by_tag(child, tag_name))
549
550
return results
551
552
def get_text_content(node) -> str:
553
"""Extract all text content from a node."""
554
text_parts = []
555
556
if isinstance(node, XmlText):
557
return str(node)
558
559
if hasattr(node, 'children'):
560
for child in node.children:
561
text_parts.append(get_text_content(child))
562
563
return ''.join(text_parts)
564
565
# Example usage
566
doc = Doc()
567
xml_doc = doc.get("document", type=XmlFragment)
568
569
# Build document (from previous example)
570
root = XmlElement("book")
571
chapter1 = XmlElement("chapter")
572
chapter1.attributes["title"] = "Introduction"
573
chapter1_text = XmlText("This is the introduction chapter.")
574
chapter1.children.append(chapter1_text)
575
576
chapter2 = XmlElement("chapter")
577
chapter2.attributes["title"] = "Advanced Topics"
578
chapter2_text = XmlText("This covers advanced material.")
579
chapter2.children.append(chapter2_text)
580
581
root.children.append(chapter1)
582
root.children.append(chapter2)
583
xml_doc.children.append(root)
584
585
# Navigate and query
586
chapters = find_elements_by_tag(xml_doc, "chapter")
587
print(f"Found {len(chapters)} chapters")
588
589
for chapter in chapters:
590
title = chapter.attributes.get("title", "Untitled")
591
content = get_text_content(chapter)
592
print(f"Chapter: {title} - {content}")
593
```
594
595
### Event Observation
596
597
```python
598
from pycrdt import Doc, XmlFragment, XmlElement, XmlEvent
599
600
doc = Doc()
601
xml_doc = doc.get("document", type=XmlFragment)
602
603
def on_xml_change(event: XmlEvent):
604
print(f"XML changed: {event.target}")
605
print(f"Children changed: {event.children_changed}")
606
print(f"Path: {event.path}")
607
608
if event.keys:
609
print(f"Attribute keys changed: {event.keys}")
610
611
if event.delta:
612
print(f"Content delta: {event.delta}")
613
614
# Subscribe to changes
615
subscription = xml_doc.observe(on_xml_change)
616
617
# Make changes to trigger events
618
root = XmlElement("root")
619
root.attributes["id"] = "main"
620
xml_doc.children.append(root)
621
622
child_elem = XmlElement("child")
623
child_elem.attributes["class"] = "item"
624
root.children.append(child_elem)
625
626
# Clean up
627
xml_doc.unobserve(subscription)
628
```
629
630
### Deep Event Observation
631
632
```python
633
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
634
635
doc = Doc()
636
xml_doc = doc.get("document", type=XmlFragment)
637
638
def on_deep_change(events):
639
print(f"Deep XML changes: {len(events)} events")
640
for event in events:
641
print(f" {type(event.target).__name__} at path {event.path}")
642
if event.children_changed:
643
print(" Children modified")
644
if event.keys:
645
print(f" Attributes changed: {event.keys}")
646
647
# Subscribe to deep changes
648
subscription = xml_doc.observe_deep(on_deep_change)
649
650
# Create nested structure
651
root = XmlElement("document")
652
section = XmlElement("section")
653
paragraph = XmlElement("p")
654
text = XmlText("Hello, world!")
655
656
# Add nested structure (triggers multiple events)
657
xml_doc.children.append(root)
658
root.children.append(section)
659
section.children.append(paragraph)
660
paragraph.children.append(text)
661
662
# Modify nested content
663
text.insert(7, "XML ")
664
paragraph.attributes["class"] = "intro"
665
666
# Clean up
667
xml_doc.unobserve(subscription)
668
```
669
670
### Collaborative XML Editing
671
672
```python
673
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
674
675
# Simulate two clients editing the same XML document
676
doc1 = Doc(client_id=1)
677
doc2 = Doc(client_id=2)
678
679
xml_doc1 = doc1.get("shared_xml", type=XmlFragment)
680
xml_doc2 = doc2.get("shared_xml", type=XmlFragment)
681
682
# Client 1 creates initial structure
683
with doc1.transaction(origin="client1"):
684
root = XmlElement("document")
685
root.attributes["version"] = "1.0"
686
xml_doc1.children.append(root)
687
688
content = XmlElement("content")
689
root.children.append(content)
690
691
# Sync to client 2
692
update = doc1.get_update()
693
doc2.apply_update(update)
694
695
# Client 2 adds content
696
with doc2.transaction(origin="client2"):
697
root2 = xml_doc2.children[0] # Get root element
698
content2 = root2.children[0] # Get content element
699
700
paragraph = XmlElement("p")
701
paragraph.attributes["id"] = "p1"
702
text = XmlText("This is from client 2.")
703
paragraph.children.append(text)
704
content2.children.append(paragraph)
705
706
# Client 1 adds more content concurrently
707
with doc1.transaction(origin="client1"):
708
root1 = xml_doc1.children[0]
709
content1 = root1.children[0]
710
711
header = XmlElement("h1")
712
header_text = XmlText("Document Title")
713
header.children.append(header_text)
714
content1.children.insert(0, header) # Insert at beginning
715
716
# Sync changes
717
update1 = doc1.get_update(doc2.get_state())
718
update2 = doc2.get_update(doc1.get_state())
719
720
doc2.apply_update(update1)
721
doc1.apply_update(update2)
722
723
# Both clients now have consistent XML
724
print(f"Client 1 XML: {str(xml_doc1)}")
725
print(f"Client 2 XML: {str(xml_doc2)}")
726
```
727
728
### XML Transformation and Processing
729
730
```python
731
from pycrdt import Doc, XmlFragment, XmlElement, XmlText
732
733
def xml_to_html(xml_node) -> str:
734
"""Convert XML structure to HTML string."""
735
if isinstance(xml_node, XmlText):
736
return str(xml_node)
737
738
if isinstance(xml_node, XmlElement):
739
tag = xml_node.tag or "div"
740
741
# Build attributes string
742
attrs = []
743
for key, value in xml_node.attributes:
744
attrs.append(f'{key}="{value}"')
745
attrs_str = " " + " ".join(attrs) if attrs else ""
746
747
# Process children
748
children_html = ""
749
for child in xml_node.children:
750
children_html += xml_to_html(child)
751
752
return f"<{tag}{attrs_str}>{children_html}</{tag}>"
753
754
if isinstance(xml_node, XmlFragment):
755
result = ""
756
for child in xml_node.children:
757
result += xml_to_html(child)
758
return result
759
760
def count_elements(xml_node) -> dict:
761
"""Count elements by tag name."""
762
counts = {}
763
764
if isinstance(xml_node, XmlElement):
765
tag = xml_node.tag or "unknown"
766
counts[tag] = counts.get(tag, 0) + 1
767
768
# Count in children
769
for child in xml_node.children:
770
child_counts = count_elements(child)
771
for tag, count in child_counts.items():
772
counts[tag] = counts.get(tag, 0) + count
773
774
elif isinstance(xml_node, XmlFragment):
775
for child in xml_node.children:
776
child_counts = count_elements(child)
777
for tag, count in child_counts.items():
778
counts[tag] = counts.get(tag, 0) + count
779
780
return counts
781
782
# Example usage
783
doc = Doc()
784
xml_doc = doc.get("document", type=XmlFragment)
785
786
# Build sample document
787
article = XmlElement("article")
788
article.attributes["class"] = "blog-post"
789
790
title = XmlElement("h1")
791
title.children.append(XmlText("My Blog Post"))
792
article.children.append(title)
793
794
content = XmlElement("div")
795
content.attributes["class"] = "content"
796
797
p1 = XmlElement("p")
798
p1.children.append(XmlText("First paragraph."))
799
content.children.append(p1)
800
801
p2 = XmlElement("p")
802
p2.children.append(XmlText("Second paragraph."))
803
content.children.append(p2)
804
805
article.children.append(content)
806
xml_doc.children.append(article)
807
808
# Transform and analyze
809
html_output = xml_to_html(xml_doc)
810
print(f"HTML output: {html_output}")
811
812
element_counts = count_elements(xml_doc)
813
print(f"Element counts: {element_counts}")
814
```
815
816
## Error Handling
817
818
```python
819
from pycrdt import Doc, XmlElement, XmlText
820
821
doc = Doc()
822
823
try:
824
# Invalid attribute operations
825
element = XmlElement("div")
826
del element.attributes["nonexistent"] # May raise KeyError
827
828
# Invalid child operations
829
invalid_index = element.children[100] # May raise IndexError
830
831
# Invalid text operations
832
text = XmlText("Hello")
833
text.insert(-1, "Invalid") # May raise ValueError
834
835
except (KeyError, IndexError, ValueError) as e:
836
print(f"XML operation failed: {e}")
837
```