CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-rdflib

RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information.

Pending
Overview
Eval results
Files

parsers-serializers.mddocs/

Parsers and Serializers

Pluggable parsers and serializers supporting RDF/XML, Turtle, N-Triples, N-Quads, TriG, JSON-LD, and other RDF formats through a unified interface with the plugin system.

Capabilities

Plugin Registration and Management

Functions for registering and managing parser and serializer plugins.

def register(name: str, kind: str, module_path: str, class_name: str):
    """
    Register a plugin.
    
    Parameters:
    - name: Plugin name (format identifier)
    - kind: Plugin type ('parser', 'serializer', 'store', 'query_processor', etc.)
    - module_path: Python module path
    - class_name: Class name within module
    """

def get(name: str, kind: str):
    """
    Get a plugin instance.
    
    Parameters:
    - name: Plugin name
    - kind: Plugin type
    
    Returns:
    Plugin instance
    """

def plugins(kind: str = None) -> Iterator[Tuple[str, str]]:
    """
    List available plugins.
    
    Parameters:
    - kind: Plugin type to filter by
    
    Returns:
    Iterator: (name, kind) pairs
    """

Parser - Base Parser Interface

Base interface for RDF format parsers.

class Parser:
    def __init__(self):
        """Initialize parser."""
    
    def parse(self, source, graph: Graph, encoding: str = None, **kwargs):
        """
        Parse RDF data into graph.
        
        Parameters:
        - source: Input source (file-like object, string, or URL)
        - graph: Target graph to parse into
        - encoding: Character encoding
        """

Serializer - Base Serializer Interface

Base interface for RDF format serializers.

class Serializer:
    def __init__(self, graph: Graph):
        """
        Initialize serializer.
        
        Parameters:
        - graph: Graph to serialize
        """
    
    def serialize(self, stream, base: str = None, encoding: str = None, **kwargs):
        """
        Serialize graph to stream.
        
        Parameters:
        - stream: Output stream (file-like object)
        - base: Base URI for relative references
        - encoding: Character encoding
        """

Built-in Parsers

RDFLib includes parsers for major RDF formats:

# RDF/XML Parser
class XMLParser(Parser):
    """Parse RDF/XML format."""

# Turtle/N3 Parser  
class TurtleParser(Parser):
    """Parse Turtle and N3 formats."""

# N-Triples Parser
class NTriplesParser(Parser):
    """Parse N-Triples format."""

# N-Quads Parser
class NQuadsParser(Parser):
    """Parse N-Quads format."""

# TriX Parser
class TriXParser(Parser):
    """Parse TriX XML format."""

# TriG Parser
class TriGParser(Parser):
    """Parse TriG format."""

# JSON-LD Parser
class JsonLdParser(Parser):
    """Parse JSON-LD format."""

# HexTuples Parser
class HextuplesParser(Parser):
    """Parse HexTuples format."""

Built-in Serializers

RDFLib includes serializers for major RDF formats:

# RDF/XML Serializer
class XMLSerializer(Serializer):
    """Serialize to RDF/XML format."""

# Turtle Serializer
class TurtleSerializer(Serializer):
    """Serialize to Turtle format."""

# N-Triples Serializer
class NTriplesSerializer(Serializer):
    """Serialize to N-Triples format."""

# N-Quads Serializer
class NQuadsSerializer(Serializer):
    """Serialize to N-Quads format."""

# TriX Serializer
class TriXSerializer(Serializer):
    """Serialize to TriX XML format."""

# TriG Serializer
class TriGSerializer(Serializer):
    """Serialize to TriG format."""

# JSON-LD Serializer
class JsonLdSerializer(Serializer):
    """Serialize to JSON-LD format."""

# HexTuples Serializer
class HextuplesSerializer(Serializer):
    """Serialize to HexTuples format."""

Format Identification

Supported Format Names

Common format identifiers used with parse() and serialize() methods:

# Format name mappings
PARSER_FORMATS = {
    'xml': 'RDF/XML format',
    'rdf': 'RDF/XML format', 
    'turtle': 'Turtle format',
    'ttl': 'Turtle format',
    'n3': 'Notation3 format',
    'nt': 'N-Triples format',
    'ntriples': 'N-Triples format',
    'nquads': 'N-Quads format',
    'nq': 'N-Quads format',
    'trix': 'TriX XML format',
    'trig': 'TriG format',
    'json-ld': 'JSON-LD format',
    'jsonld': 'JSON-LD format',
    'hext': 'HexTuples format'
}

SERIALIZER_FORMATS = {
    'xml': 'RDF/XML format',
    'pretty-xml': 'Pretty RDF/XML format',
    'turtle': 'Turtle format',
    'ttl': 'Turtle format',
    'longturtle': 'Long-form Turtle format',
    'n3': 'Notation3 format',
    'nt': 'N-Triples format',
    'ntriples': 'N-Triples format',
    'nquads': 'N-Quads format',
    'nq': 'N-Quads format',
    'trix': 'TriX XML format',
    'trig': 'TriG format',
    'json-ld': 'JSON-LD format',
    'jsonld': 'JSON-LD format',
    'hext': 'HexTuples format'
}

Usage Examples

Basic Parsing

from rdflib import Graph

g = Graph()

# Parse from file with format detection
g.parse("data.ttl")  # Format detected from extension

# Parse with explicit format
g.parse("data.rdf", format="xml")
g.parse("data.n3", format="n3")
g.parse("data.jsonld", format="json-ld")

# Parse from URL
g.parse("http://example.org/data.rdf")

# Parse from string
rdf_data = """
@prefix ex: <http://example.org/> .
ex:person1 ex:name "John Doe" .
"""
g.parse(data=rdf_data, format="turtle")

# Parse with base URI
g.parse(data=rdf_data, format="turtle", publicID="http://example.org/base")

Basic Serialization

from rdflib import Graph

g = Graph()
# ... populate graph ...

# Serialize to different formats
turtle_data = g.serialize(format="turtle")
xml_data = g.serialize(format="xml")
ntriples_data = g.serialize(format="nt")
jsonld_data = g.serialize(format="json-ld")

# Serialize to file
g.serialize("output.ttl", format="turtle")
g.serialize("output.rdf", format="xml")

# Serialize with base URI
turtle_with_base = g.serialize(format="turtle", base="http://example.org/")

# Pretty print XML
pretty_xml = g.serialize(format="pretty-xml")

Working with Datasets

from rdflib import Dataset

ds = Dataset()
# ... populate dataset ...

# Parse dataset formats
ds.parse("data.trig", format="trig")
ds.parse("data.nq", format="nquads")

# Serialize dataset formats
trig_data = ds.serialize(format="trig")
nquads_data = ds.serialize(format="nquads")

Custom Parser Registration

from rdflib import Graph
from rdflib.parser import Parser
from rdflib.plugin import register

class CustomParser(Parser):
    def parse(self, source, graph, encoding=None, **kwargs):
        """Custom parser implementation."""
        # Read source and parse into graph
        pass

# Register custom parser
register(
    name="custom",
    kind="parser", 
    module_path="mymodule.parsers",
    class_name="CustomParser"
)

# Use custom parser
g = Graph()
g.parse("data.custom", format="custom")

Custom Serializer Registration

from rdflib import Graph
from rdflib.serializer import Serializer
from rdflib.plugin import register

class CustomSerializer(Serializer):
    def serialize(self, stream, base=None, encoding=None, **kwargs):
        """Custom serializer implementation."""
        # Serialize graph to stream
        pass

# Register custom serializer
register(
    name="custom",
    kind="serializer",
    module_path="mymodule.serializers", 
    class_name="CustomSerializer"
)

# Use custom serializer
g = Graph()
# ... populate graph ...
custom_data = g.serialize(format="custom")

Format-Specific Options

from rdflib import Graph

g = Graph()
# ... populate graph ...

# Turtle with custom indentation
turtle_data = g.serialize(
    format="turtle",
    indent="  ",  # Custom indent
    base="http://example.org/"
)

# JSON-LD with context
jsonld_data = g.serialize(
    format="json-ld",
    context={
        "name": "http://xmlns.com/foaf/0.1/name",
        "age": "http://xmlns.com/foaf/0.1/age"
    },
    indent=2
)

# RDF/XML with pretty printing
xml_data = g.serialize(
    format="pretty-xml",
    max_depth=3,
    untyped_literals=True
)

Parsing with Error Handling

from rdflib import Graph
from rdflib.exceptions import ParserError

g = Graph()

try:
    g.parse("invalid_data.ttl", format="turtle")
except ParserError as e:
    print(f"Parse error: {e}")
except FileNotFoundError:
    print("File not found")
except Exception as e:
    print(f"Unexpected error: {e}")

Batch Processing

import os
from rdflib import Graph

def process_rdf_files(directory):
    """Process all RDF files in directory."""
    g = Graph()
    
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        
        # Determine format from extension
        if filename.endswith('.ttl'):
            format_type = 'turtle'
        elif filename.endswith('.rdf'):
            format_type = 'xml'
        elif filename.endswith('.n3'):
            format_type = 'n3'
        elif filename.endswith('.nt'):
            format_type = 'nt'
        else:
            continue  # Skip unknown formats
        
        try:
            g.parse(filepath, format=format_type)
            print(f"Parsed {filename} ({format_type})")
        except Exception as e:
            print(f"Error parsing {filename}: {e}")
    
    return g

# Process all files and serialize result
combined_graph = process_rdf_files("rdf_data/")
combined_graph.serialize("combined.ttl", format="turtle")

Streaming Large Files

from rdflib import Graph
import gzip

# Parse compressed RDF
g = Graph()
with gzip.open('large_data.ttl.gz', 'rt', encoding='utf-8') as f:
    g.parse(f, format="turtle")

# Serialize to compressed file
with gzip.open('output.ttl.gz', 'wt', encoding='utf-8') as f:
    g.serialize(f, format="turtle")

Format Detection

from rdflib import Graph
from rdflib.util import guess_format

# Automatic format detection
filename = "data.ttl"
format_type = guess_format(filename)
print(f"Detected format: {format_type}")

g = Graph()  
g.parse(filename, format=format_type)

# Detection from content
data = """<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
</rdf:RDF>"""

format_type = guess_format(data=data)
g.parse(data=data, format=format_type)

Plugin Introspection

from rdflib.plugin import plugins

# List all parsers
print("Available parsers:")
for name, kind in plugins("parser"):
    print(f"  {name}")

# List all serializers  
print("Available serializers:")
for name, kind in plugins("serializer"):
    print(f"  {name}")

# List all plugins
print("All plugins:")
for name, kind in plugins():
    print(f"  {name} ({kind})")

Install with Tessl CLI

npx tessl i tessl/pypi-rdflib

docs

dataset-named-graphs.md

graph-operations.md

index.md

namespace-management.md

parsers-serializers.md

rdf-containers-collections.md

rdf-terms.md

sparql-queries-updates.md

utilities-helpers.md

tile.json