tessl/pypi-rdflib

RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information.

—

Pending

Overview

Eval results

Files

Utilities and Helpers

Name: tessl/pypi-rdflib
Author: tessl

Utility functions for RDF data manipulation, format detection, term conversion, and graph comparison operations that support common RDF processing tasks.

Capabilities

Term Conversion and Creation

Functions for converting between Python objects and RDF terms.

def to_term(s, default: URIRef = None) -> Node:
    """
    Convert Python object to RDF term.
    
    Parameters:
    - s: Python object (string, int, float, bool, datetime, etc.)
    - default: Default URIRef if conversion is ambiguous
    
    Returns:
    Node: Appropriate RDF term (URIRef, Literal, BNode)
    """

def from_n3(s: str, default_graph: Graph = None, backend: str = None, nsm: NamespaceManager = None) -> Node:
    """
    Parse N3/Turtle notation to create RDF term.
    
    Parameters:
    - s: N3 string representation
    - default_graph: Graph for context
    - backend: Parser backend to use
    - nsm: Namespace manager for prefix resolution
    
    Returns:
    Node: Parsed RDF term
    """

Collection Utilities

General-purpose collection manipulation functions.

def first(seq):
    """
    Get first item from iterable.
    
    Parameters:
    - seq: Iterable sequence
    
    Returns:
    First item or None if empty
    """

def uniq(seq):
    """
    Remove duplicates while preserving order.
    
    Parameters:
    - seq: Input sequence
    
    Returns:
    Generator: Unique items in order
    """

def more_than(seq, n: int) -> bool:
    """
    Check if sequence has more than n items.
    
    Parameters:
    - seq: Sequence to check
    - n: Threshold count
    
    Returns:
    bool: True if sequence has more than n items
    """

Format Detection

Functions for detecting RDF formats from files and content.

def guess_format(fpath: str = None, data: str = None) -> str:
    """
    Guess RDF format from file path or content.
    
    Parameters:
    - fpath: File path (uses extension)
    - data: RDF data content (analyzes structure)
    
    Returns:
    str: Format identifier ('turtle', 'xml', 'json-ld', etc.)
    """

Graph Analysis

Functions for analyzing graph structure and finding patterns.

def find_roots(graph: Graph, prop: URIRef, roots: Set[Node] = None) -> Set[Node]:
    """
    Find root nodes in graph hierarchy.
    
    Parameters:
    - graph: Graph to analyze
    - prop: Property defining hierarchy (e.g., rdfs:subClassOf)
    - roots: Existing root set to extend
    
    Returns:
    Set: Root nodes with no incoming hierarchy edges
    """

Date and Time Utilities

Functions for working with date/time values in RDF.

def date_time(t: float = None, local_time_zone: bool = False) -> Literal:
    """
    Create datetime literal from timestamp.
    
    Parameters:
    - t: Unix timestamp (current time if None)
    - local_time_zone: Use local timezone instead of UTC
    
    Returns:
    Literal: XSD datetime literal
    """

def parse_date_time(val: str) -> datetime:
    """
    Parse XSD datetime string.
    
    Parameters:
    - val: XSD datetime string
    
    Returns:
    datetime: Parsed datetime object
    """

Graph Comparison

Functions for comparing and analyzing graph differences.

def isomorphic(graph1: Graph, graph2: Graph) -> bool:
    """
    Test if two graphs are isomorphic.
    
    Parameters:
    - graph1: First graph
    - graph2: Second graph
    
    Returns:
    bool: True if graphs are isomorphic
    """

def graph_diff(g1: Graph, g2: Graph) -> Tuple[Graph, Graph]:
    """
    Compute difference between two graphs.
    
    Parameters:
    - g1: First graph
    - g2: Second graph
    
    Returns:
    Tuple: (in_first_only, in_second_only) graphs
    """

def to_canonical_graph(graph: Graph) -> Graph:
    """
    Convert graph to canonical form for comparison.
    
    Parameters:
    - graph: Input graph
    
    Returns:
    Graph: Canonicalized graph
    """

URI and Path Utilities

Functions for working with URIs and file paths.

def defrag(uriref: URIRef) -> URIRef:
    """
    Remove fragment from URI.
    
    Parameters:
    - uriref: URI reference
    
    Returns:
    URIRef: URI without fragment
    """

def file_uri_to_path(uri: str) -> str:
    """
    Convert file:// URI to file path.
    
    Parameters:
    - uri: File URI string
    
    Returns:
    str: Local file path
    """

def path_to_file_uri(path: str) -> str:
    """
    Convert file path to file:// URI.
    
    Parameters:
    - path: Local file path
    
    Returns:
    str: File URI
    """

Namespace Utilities

Additional namespace manipulation functions.

def split_uri(uri: URIRef) -> Tuple[str, str]:
    """
    Split URI into namespace and local name.
    
    Parameters:
    - uri: URI to split
    
    Returns:
    Tuple: (namespace, local_name)
    """

Usage Examples

Term Conversion

from rdflib import Graph, Literal
from rdflib.util import to_term, from_n3
from rdflib.namespace import XSD
from datetime import datetime

# Convert Python objects to RDF terms
string_term = to_term("Hello World")  # -> Literal("Hello World")
int_term = to_term(42)               # -> Literal(42, datatype=XSD.integer)
float_term = to_term(3.14)           # -> Literal(3.14, datatype=XSD.decimal)
bool_term = to_term(True)            # -> Literal(True, datatype=XSD.boolean)
date_term = to_term(datetime.now())  # -> Literal with XSD.dateTime

print(f"String: {string_term}")
print(f"Integer: {int_term}")
print(f"Float: {float_term}")
print(f"Boolean: {bool_term}")
print(f"Date: {date_term}")

# Parse N3 notation
uri_from_n3 = from_n3("<http://example.org/person/1>")
literal_from_n3 = from_n3('"John Doe"')
typed_literal = from_n3('"42"^^<http://www.w3.org/2001/XMLSchema#integer>')

print(f"URI: {uri_from_n3}")
print(f"Literal: {literal_from_n3}")
print(f"Typed: {typed_literal}")

Collection Utilities

from rdflib.util import first, uniq, more_than

# Get first item
data = [1, 2, 3, 4, 5]
first_item = first(data)
print(f"First: {first_item}")

# Remove duplicates
duplicated = [1, 2, 2, 3, 3, 3, 4, 4, 5]
unique_items = list(uniq(duplicated))
print(f"Unique: {unique_items}")

# Check if more than threshold
has_many = more_than(data, 3)
print(f"More than 3 items: {has_many}")

# Handle empty sequences
empty_first = first([])
print(f"First of empty: {empty_first}")  # None

Format Detection

from rdflib.util import guess_format

# Detect from file extension
format1 = guess_format("data.ttl")
format2 = guess_format("data.rdf")
format3 = guess_format("data.jsonld")

print(f"TTL format: {format1}")
print(f"RDF format: {format2}")
print(f"JSON-LD format: {format3}")

# Detect from content
turtle_content = """
@prefix ex: <http://example.org/> .
ex:person1 ex:name "John" .
"""

xml_content = """<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
</rdf:RDF>"""

turtle_format = guess_format(data=turtle_content)
xml_format = guess_format(data=xml_content)

print(f"Turtle detected: {turtle_format}")
print(f"XML detected: {xml_format}")

Graph Comparison

from rdflib import Graph, URIRef, Literal
from rdflib.util import isomorphic, graph_diff
from rdflib.namespace import FOAF

# Create two similar graphs
g1 = Graph()
g1.add((URIRef("http://example.org/person/1"), FOAF.name, Literal("John")))
g1.add((URIRef("http://example.org/person/1"), FOAF.age, Literal(30)))

g2 = Graph()
g2.add((URIRef("http://example.org/person/1"), FOAF.name, Literal("John")))
g2.add((URIRef("http://example.org/person/1"), FOAF.age, Literal(30))) 

# Test isomorphism
are_isomorphic = isomorphic(g1, g2)
print(f"Graphs are isomorphic: {are_isomorphic}")

# Add different data
g2.add((URIRef("http://example.org/person/1"), FOAF.email, Literal("john@example.com")))
g1.add((URIRef("http://example.org/person/1"), FOAF.phone, Literal("555-1234")))

# Find differences
only_in_g1, only_in_g2 = graph_diff(g1, g2)

print("Only in g1:")
for triple in only_in_g1:
    print(f"  {triple}")

print("Only in g2:")
for triple in only_in_g2:
    print(f"  {triple}")

Graph Analysis

from rdflib import Graph, URIRef
from rdflib.namespace import RDFS
from rdflib.util import find_roots

g = Graph()

# Create class hierarchy
animal = URIRef("http://example.org/Animal")
mammal = URIRef("http://example.org/Mammal")
dog = URIRef("http://example.org/Dog")
cat = URIRef("http://example.org/Cat")

g.add((mammal, RDFS.subClassOf, animal))
g.add((dog, RDFS.subClassOf, mammal))
g.add((cat, RDFS.subClassOf, mammal))

# Find root classes
roots = find_roots(g, RDFS.subClassOf)
print("Root classes:")
for root in roots:
    print(f"  {root}")

Date and Time Utilities

from rdflib.util import date_time, parse_date_time
from datetime import datetime
import time

# Create datetime literals
current_time = date_time()
specific_time = date_time(1609459200)  # 2021-01-01 00:00:00 UTC
local_time = date_time(local_time_zone=True)

print(f"Current: {current_time}")
print(f"Specific: {specific_time}")
print(f"Local: {local_time}")

# Parse datetime strings
dt_string = "2021-01-01T12:00:00Z"
parsed_dt = parse_date_time(dt_string)
print(f"Parsed: {parsed_dt}")

URI Utilities

from rdflib import URIRef
from rdflib.util import split_uri, defrag
import os

# Split URI into namespace and local name
uri = URIRef("http://xmlns.com/foaf/0.1/name")
namespace, local_name = split_uri(uri)
print(f"Namespace: {namespace}")
print(f"Local name: {local_name}")

# Remove fragment from URI
fragmented_uri = URIRef("http://example.org/resource#section1")
defragged = defrag(fragmented_uri)
print(f"Original: {fragmented_uri}")
print(f"Defragged: {defragged}")

# Work with file URIs
file_path = "/path/to/data.ttl"
file_uri = f"file://{os.path.abspath(file_path)}"
print(f"File URI: {file_uri}")

Advanced Utilities

from rdflib import Graph, URIRef, Literal, BNode
from rdflib.util import to_term, first, uniq
from rdflib.namespace import FOAF, RDF

g = Graph()

# Build graph with various data types
people_data = [
    ("John", 30, "john@example.com"),
    ("Jane", 25, "jane@example.com"),
    ("Bob", 35, "bob@example.com"),
    ("Alice", 30, "alice@example.com")  # Duplicate age
]

for i, (name, age, email) in enumerate(people_data, 1):
    person = URIRef(f"http://example.org/person/{i}")
    g.add((person, RDF.type, FOAF.Person))
    g.add((person, FOAF.name, to_term(name)))
    g.add((person, FOAF.age, to_term(age)))
    g.add((person, FOAF.mbox, URIRef(f"mailto:{email}")))

# Find unique ages
ages = [obj.toPython() for obj in g.objects(None, FOAF.age)]
unique_ages = list(uniq(ages))
print(f"Ages: {ages}")
print(f"Unique ages: {unique_ages}")

# Get first person
first_person = first(g.subjects(RDF.type, FOAF.Person))
print(f"First person: {first_person}")

# Check if there are many people
many_people = more_than(list(g.subjects(RDF.type, FOAF.Person)), 2)
print(f"More than 2 people: {many_people}")

Error Handling with Utilities

from rdflib.util import from_n3, to_term, guess_format

# Handle parsing errors
try:
    bad_n3 = from_n3("<<invalid>>")
except Exception as e:
    print(f"N3 parsing error: {e}")

# Handle unsupported types
class CustomObject:
    pass

try:
    custom_term = to_term(CustomObject())
except Exception as e:
    print(f"Term conversion error: {e}")

# Handle unknown formats
try:
    unknown_format = guess_format("data.unknown")
    print(f"Unknown format result: {unknown_format}")
except Exception as e:
    print(f"Format detection error: {e}")

Install with Tessl CLI