Python Telegraph API wrapper for creating and managing Telegraph pages and accounts
Utility functions for converting between HTML and Telegraph's internal node format. These functions handle HTML parsing, validation, and conversion while respecting Telegraph's allowed tag restrictions.
Convert HTML content to Telegraph's internal node format.
def html_to_nodes(html_content: str) -> list:
"""
Convert HTML content to Telegraph nodes format.
Parameters:
- html_content (str): HTML string to convert
Returns:
list: Telegraph nodes representation of the HTML
Raises:
NotAllowedTag: HTML contains tags not allowed by Telegraph
InvalidHTML: HTML is malformed or has mismatched tags
"""Usage examples:
from telegraph.utils import html_to_nodes
# Simple HTML conversion
html = '<p>Hello <strong>world</strong>!</p>'
nodes = html_to_nodes(html)
print(nodes)
# Output: [{'tag': 'p', 'children': ['Hello ', {'tag': 'strong', 'children': ['world']}, '!']}]
# Complex HTML with attributes
html = '<p><a href="https://example.com">Link</a></p>'
nodes = html_to_nodes(html)
print(nodes)
# Output: [{'tag': 'p', 'children': [{'tag': 'a', 'attrs': {'href': 'https://example.com'}, 'children': ['Link']}]}]
# HTML with images
html = '<figure><img src="/file/image.jpg" alt="Photo"><figcaption>Caption</figcaption></figure>'
nodes = html_to_nodes(html)Convert Telegraph nodes back to HTML format.
def nodes_to_html(nodes: list) -> str:
"""
Convert Telegraph nodes to HTML format.
Parameters:
- nodes (list): Telegraph nodes to convert
Returns:
str: HTML representation of the nodes
"""Usage examples:
from telegraph.utils import nodes_to_html
# Convert nodes to HTML
nodes = [
{'tag': 'p', 'children': ['Hello ', {'tag': 'em', 'children': ['world']}, '!']}
]
html = nodes_to_html(nodes)
print(html)
# Output: '<p>Hello <em>world</em>!</p>'
# Complex nodes with attributes
nodes = [
{'tag': 'p', 'children': [
{'tag': 'a', 'attrs': {'href': 'https://example.com'}, 'children': ['Visit site']}
]}
]
html = nodes_to_html(nodes)
print(html)
# Output: '<p><a href="https://example.com">Visit site</a></p>'You can convert HTML to nodes and back to HTML:
from telegraph.utils import html_to_nodes, nodes_to_html
original_html = '<p>Test <strong>content</strong> with <em>formatting</em>.</p>'
nodes = html_to_nodes(original_html)
converted_html = nodes_to_html(nodes)
print(converted_html)
# Output: '<p>Test <strong>content</strong> with <em>formatting</em>.</p>'Telegraph nodes use a specific JSON structure:
Plain strings represent text content:
"Hello world"Dictionaries represent HTML elements:
{
'tag': 'p', # Required: HTML tag name
'attrs': {'id': 'content'}, # Optional: attributes dict
'children': ['Text content'] # Optional: child nodes list
}# Paragraph with text
{'tag': 'p', 'children': ['Simple paragraph']}
# Bold text
{'tag': 'strong', 'children': ['Bold text']}
# Link with attributes
{'tag': 'a', 'attrs': {'href': 'https://example.com'}, 'children': ['Link text']}
# Image (void element)
{'tag': 'img', 'attrs': {'src': '/file/image.jpg', 'alt': 'Description'}}
# Nested elements
{'tag': 'p', 'children': [
'Text with ',
{'tag': 'strong', 'children': ['bold']},
' and ',
{'tag': 'em', 'children': ['italic']},
' formatting.'
]}Telegraph supports a restricted set of HTML tags:
Text formatting: b, strong, i, em, u, s, code
Structure: p, br, h3, h4, hr, blockquote, pre
Lists: ul, ol, li
Media: img, video, iframe, figure, figcaption
Links: a
Semantic: aside
<pre> tags is preserved exactly# Multiple spaces collapsed
html = '<p>Multiple spaces here</p>'
nodes = html_to_nodes(html)
result = nodes_to_html(nodes)
print(result) # '<p>Multiple spaces here</p>'
# Preformatted text preserved
html = '<pre> Code with spaces </pre>'
nodes = html_to_nodes(html)
result = nodes_to_html(nodes)
print(result) # '<pre> Code with spaces </pre>'HTML tag names are automatically converted to lowercase:
html = '<P><STRONG>Upper case tags</STRONG></P>'
nodes = html_to_nodes(html)
result = nodes_to_html(nodes)
print(result) # '<p><strong>Upper case tags</strong></p>'HTML utility functions raise specific exceptions for different error conditions:
from telegraph.utils import html_to_nodes
from telegraph.exceptions import NotAllowedTag, InvalidHTML
# Handle disallowed tags
try:
html = '<script>alert("bad")</script>'
nodes = html_to_nodes(html)
except NotAllowedTag as e:
print(f"Tag not allowed: {e}")
# Handle malformed HTML
try:
html = '<p><strong>Unclosed tags</p>'
nodes = html_to_nodes(html)
except InvalidHTML as e:
print(f"Invalid HTML: {e}")
# Handle missing start tags
try:
html = '</div><p>Content</p>'
nodes = html_to_nodes(html)
except InvalidHTML as e:
print(f"Missing start tag: {e}")Use utilities to work with different content formats:
from telegraph import Telegraph
from telegraph.utils import html_to_nodes, nodes_to_html
telegraph = Telegraph(access_token='your_token')
# Create page with HTML, retrieve as nodes
html_content = '<p>Original <strong>HTML</strong> content.</p>'
response = telegraph.create_page(
title='HTML Example',
html_content=html_content
)
# Get page content as nodes
page = telegraph.get_page(response['path'], return_html=False)
nodes = page['content']
# Modify nodes programmatically
nodes.append({'tag': 'p', 'children': ['Added paragraph.']})
# Convert back to HTML and update page
updated_html = nodes_to_html(nodes)
telegraph.edit_page(
response['path'],
title='Updated HTML Example',
html_content=updated_html
)def process_nodes(nodes):
"""Process nodes recursively to modify content."""
processed = []
for node in nodes:
if isinstance(node, str):
# Process text nodes
processed.append(node.upper())
elif isinstance(node, dict):
# Process element nodes
new_node = {'tag': node['tag']}
if 'attrs' in node:
new_node['attrs'] = node['attrs']
if 'children' in node:
new_node['children'] = process_nodes(node['children'])
processed.append(new_node)
return processed
# Apply custom processing
original_nodes = html_to_nodes('<p>Process <em>this</em> text.</p>')
modified_nodes = process_nodes(original_nodes)
result_html = nodes_to_html(modified_nodes)
print(result_html) # '<p>PROCESS <em>THIS</em> TEXT.</p>'Utility function for Telegraph-compatible JSON serialization.
def json_dumps(*args, **kwargs) -> str:
"""
Serialize object to JSON string with Telegraph-compatible formatting.
Uses compact separators and ensures proper Unicode handling.
Arguments passed through to json.dumps() with optimized defaults.
Returns:
str: JSON string with compact formatting
"""Usage example:
from telegraph.utils import json_dumps
# Serialize nodes for Telegraph API
nodes = [{'tag': 'p', 'children': ['Hello, world!']}]
json_string = json_dumps(nodes)
print(json_string) # Compact JSON outputContext manager for handling file uploads with proper resource management.
class FilesOpener:
"""
Context manager for opening and managing file objects for upload.
Parameters:
- paths (str|list): File path(s) or file-like object(s)
- key_format (str): Format string for file keys, defaults to 'file{}'
"""
def __init__(self, paths, key_format: str = 'file{}'):
pass
def __enter__(self) -> list:
"""
Open files and return list of (key, (filename, file_object, mimetype)) tuples.
"""
pass
def __exit__(self, type, value, traceback):
"""
Close all opened files.
"""
passUsage example:
from telegraph.utils import FilesOpener
# Handle single file
with FilesOpener('image.jpg') as files:
print(files) # [('file0', ('file0', <file_object>, 'image/jpeg'))]
# Handle multiple files
with FilesOpener(['img1.png', 'img2.jpg']) as files:
for key, (filename, file_obj, mimetype) in files:
print(f"{key}: {filename} ({mimetype})")Important constants for HTML processing and validation.
ALLOWED_TAGS: set = {
'a', 'aside', 'b', 'blockquote', 'br', 'code', 'em', 'figcaption', 'figure',
'h3', 'h4', 'hr', 'i', 'iframe', 'img', 'li', 'ol', 'p', 'pre', 's',
'strong', 'u', 'ul', 'video'
}
VOID_ELEMENTS: set = {
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'
}
BLOCK_ELEMENTS: set = {
'address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl',
'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', 'nav',
'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot', 'ul',
'video'
}These constants can be imported and used for validation:
from telegraph.utils import ALLOWED_TAGS, VOID_ELEMENTS, BLOCK_ELEMENTS
def validate_tag(tag_name):
"""Check if a tag is allowed by Telegraph."""
return tag_name.lower() in ALLOWED_TAGS
def is_void_element(tag_name):
"""Check if a tag is a void element (self-closing)."""
return tag_name.lower() in VOID_ELEMENTS
def is_block_element(tag_name):
"""Check if a tag is a block-level element."""
return tag_name.lower() in BLOCK_ELEMENTS
# Usage
print(validate_tag('p')) # True
print(validate_tag('script')) # False
print(is_void_element('br')) # True
print(is_block_element('p')) # Truedef validate_content(html):
"""Validate HTML content for Telegraph compatibility."""
try:
nodes = html_to_nodes(html)
return True, "Content is valid"
except NotAllowedTag as e:
return False, f"Contains disallowed tag: {e}"
except InvalidHTML as e:
return False, f"Invalid HTML structure: {e}"
# Validate before creating page
html = '<p>Valid content with <strong>formatting</strong>.</p>'
is_valid, message = validate_content(html)
if is_valid:
telegraph.create_page(title='Validated Content', html_content=html)
else:
print(f"Invalid content: {message}")Install with Tessl CLI
npx tessl i tessl/pypi-telegraph