tessl/pypi-rdflib

RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information.

—

Pending

Overview

Eval results

Files

SPARQL Queries and Updates

Name: tessl/pypi-rdflib
Author: tessl

Complete SPARQL 1.1 implementation supporting SELECT, CONSTRUCT, ASK, DESCRIBE queries and INSERT, DELETE, LOAD, CLEAR updates with extension mechanisms for custom functions.

Capabilities

Query Preparation and Execution

Functions for preparing and executing SPARQL queries with namespace bindings and variable bindings.

def prepareQuery(queryString: str, initNs: Dict[str, Namespace] = None, base: str = None):
    """
    Prepare a SPARQL query for execution.
    
    Parameters:
    - queryString: SPARQL query string
    - initNs: Initial namespace bindings
    - base: Base URI for relative references
    
    Returns:
    PreparedQuery: Prepared query object
    """

def prepareUpdate(updateString: str, initNs: Dict[str, Namespace] = None, base: str = None):
    """
    Prepare a SPARQL update for execution.
    
    Parameters:
    - updateString: SPARQL update string
    - initNs: Initial namespace bindings
    - base: Base URI for relative references
    
    Returns:
    PreparedUpdate: Prepared update object
    """

Result - Query Result Container

Container for SPARQL query results with iteration and serialization capabilities.

class Result:
    def __init__(self, type: str):
        """
        Create a result container.
        
        Parameters:
        - type: Result type ('SELECT', 'CONSTRUCT', 'ASK', 'DESCRIBE')
        """
    
    def __iter__(self) -> Iterator:
        """
        Iterate over result rows or triples.
        
        Returns:
        Iterator: ResultRow objects for SELECT, triples for CONSTRUCT/DESCRIBE
        """
    
    def __len__(self) -> int:
        """
        Get number of results.
        
        Returns:
        int: Result count
        """
    
    def __bool__(self) -> bool:
        """
        Boolean conversion (useful for ASK queries).
        
        Returns:
        bool: True if results exist or ASK result is true
        """
    
    def serialize(self, format: str = 'xml', encoding: str = 'utf-8') -> str:
        """
        Serialize results to specified format.
        
        Parameters:
        - format: Output format ('xml', 'json', 'csv', 'tsv')
        - encoding: Character encoding
        
        Returns:
        str: Serialized results
        """
    
    @property
    def type(self) -> str:
        """
        Get result type.
        
        Returns:
        str: 'SELECT', 'CONSTRUCT', 'ASK', or 'DESCRIBE'
        """
    
    @property
    def bindings(self) -> List[Dict[str, Node]]:
        """
        Get variable bindings for SELECT results.
        
        Returns:
        List: List of binding dictionaries
        """
    
    @property
    def vars(self) -> List[str]:
        """
        Get query variables for SELECT results.
        
        Returns:
        List: Variable names
        """
    
    @property
    def graph(self) -> Graph:
        """
        Get result graph for CONSTRUCT/DESCRIBE results.
        
        Returns:
        Graph: Result graph
        """

ResultRow - Individual Result Row

Individual row from SELECT query results.

class ResultRow:
    def __init__(self, values: Dict[str, Node], labels: List[str]):
        """
        Create a result row.
        
        Parameters:
        - values: Variable bindings
        - labels: Variable names
        """
    
    def __getitem__(self, key: Union[str, int]) -> Node:
        """
        Get value by variable name or index.
        
        Parameters:
        - key: Variable name or index
        
        Returns:
        Node: Bound value
        """
    
    def __getattr__(self, name: str) -> Node:
        """
        Get value by variable name as attribute.
        
        Parameters:
        - name: Variable name
        
        Returns:
        Node: Bound value
        """
    
    def __iter__(self) -> Iterator[Node]:
        """
        Iterate over bound values.
        
        Returns:
        Iterator: Bound values in order
        """
    
    def asdict(self) -> Dict[str, Node]:
        """
        Get row as dictionary.
        
        Returns:
        Dict: Variable name to value mapping
        """

Processor - Query Processor Interface

Base interface for SPARQL query processors.

class Processor:
    def query(self, query, initBindings: Dict = None, initNs: Dict[str, Namespace] = None, base: str = None, **kwargs) -> Result:
        """
        Execute a SPARQL query.
        
        Parameters:
        - query: Query string or prepared query
        - initBindings: Initial variable bindings
        - initNs: Initial namespace bindings
        - base: Base URI
        
        Returns:
        Result: Query results
        """

UpdateProcessor - Update Processor Interface

Base interface for SPARQL update processors.

class UpdateProcessor:
    def update(self, update, initBindings: Dict = None, initNs: Dict[str, Namespace] = None, base: str = None, **kwargs):
        """
        Execute a SPARQL update.
        
        Parameters:
        - update: Update string or prepared update
        - initBindings: Initial variable bindings
        - initNs: Initial namespace bindings
        - base: Base URI
        """

Usage Examples

SELECT Queries

from rdflib import Graph
from rdflib.namespace import FOAF
from rdflib.plugins.sparql import prepareQuery

g = Graph()
g.parse("people.ttl", format="turtle")

# Simple SELECT query
query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    SELECT ?name ?age WHERE {
        ?person foaf:name ?name .
        ?person foaf:age ?age .
        FILTER(?age > 25)
    }
    ORDER BY ?age
"""

results = g.query(query)
for row in results:
    print(f"Name: {row.name}, Age: {row.age}")

# Access by index
for row in results:
    print(f"Name: {row[0]}, Age: {row[1]}")

# Convert to dictionary
for row in results:
    data = row.asdict()
    print(f"Data: {data}")

Prepared Queries

from rdflib import Graph, Literal
from rdflib.namespace import FOAF
from rdflib.plugins.sparql import prepareQuery

g = Graph()
# ... populate graph ...

# Prepare query with namespace bindings
query_string = """
    SELECT ?name ?email WHERE {
        ?person foaf:name ?name .
        ?person foaf:mbox ?email .
        FILTER(?name = ?target_name)
    }
"""

prepared = prepareQuery(query_string, initNs={"foaf": FOAF})

# Execute with variable bindings
results = g.query(prepared, initBindings={"target_name": Literal("John Doe")})
for row in results:
    print(f"Name: {row.name}, Email: {row.email}")

CONSTRUCT Queries

from rdflib import Graph

g = Graph()
g.parse("full_data.ttl", format="turtle")

# CONSTRUCT query to extract subset
construct_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    CONSTRUCT {
        ?person foaf:name ?name .
        ?person foaf:age ?age .
    }
    WHERE {
        ?person foaf:name ?name .
        ?person foaf:age ?age .
        FILTER(?age >= 18)
    }
"""

result_graph = g.query(construct_query)
print(f"Constructed graph has {len(result_graph)} triples")

# Serialize the result graph
adult_data = result_graph.serialize(format="turtle")

ASK Queries

from rdflib import Graph

g = Graph()
g.parse("data.ttl", format="turtle")

# ASK query to check existence
ask_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    ASK {
        ?person foaf:name "John Doe" .
        ?person foaf:age ?age .
        FILTER(?age > 30)
    }
"""

result = g.query(ask_query)
if result:
    print("Found John Doe over 30")
else:
    print("No match found")

# Boolean conversion
has_match = bool(result)

DESCRIBE Queries

from rdflib import Graph, URIRef

g = Graph()
g.parse("data.ttl", format="turtle")

# DESCRIBE query
describe_query = """
    DESCRIBE <http://example.org/person/1>
"""

result = g.query(describe_query)
described_graph = result.graph

print(f"Description has {len(described_graph)} triples")
for triple in described_graph:
    print(triple)

SPARQL Updates

from rdflib import Graph, Literal, URIRef
from rdflib.namespace import FOAF

g = Graph()

# INSERT DATA update
insert_update = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    INSERT DATA {
        <http://example.org/person/1> foaf:name "John Doe" ;
                                     foaf:age 30 ;
                                     foaf:mbox <mailto:john@example.com> .
    }
"""

g.update(insert_update)

# INSERT/DELETE with WHERE clause
modify_update = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    DELETE { ?person foaf:age ?old_age }
    INSERT { ?person foaf:age ?new_age }
    WHERE {
        ?person foaf:name "John Doe" .
        ?person foaf:age ?old_age .
        BIND(?old_age + 1 AS ?new_age)
    }
"""

g.update(modify_update)

# DELETE WHERE update
delete_update = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    DELETE WHERE {
        ?person foaf:mbox ?email .
        FILTER(CONTAINS(STR(?email), "spam"))
    }
"""

g.update(delete_update)

Complex Queries with SPARQL 1.1 Features

from rdflib import Graph

g = Graph()
g.parse("complex_data.ttl", format="turtle")

# Query with aggregation, grouping, and subqueries
complex_query = """
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX org: <http://www.w3.org/ns/org#>
    
    SELECT ?org_name (COUNT(?person) AS ?employee_count) (AVG(?age) AS ?avg_age) WHERE {
        ?person foaf:name ?name .
        ?person foaf:age ?age .
        ?person org:memberOf ?org .
        ?org foaf:name ?org_name .
        
        # Subquery to filter organizations with > 5 employees
        {
            SELECT ?org WHERE {
                ?person org:memberOf ?org .
            }
            GROUP BY ?org
            HAVING(COUNT(?person) > 5)
        }
    }
    GROUP BY ?org_name
    ORDER BY DESC(?employee_count)
"""

results = g.query(complex_query)
for row in results:
    print(f"Org: {row.org_name}, Employees: {row.employee_count}, Avg Age: {row.avg_age}")

Result Serialization

from rdflib import Graph

g = Graph()
# ... populate and query ...

results = g.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10")

# Serialize to different formats
xml_results = results.serialize(format='xml')
json_results = results.serialize(format='json')
csv_results = results.serialize(format='csv')
tsv_results = results.serialize(format='tsv')

# Save to file
with open('results.json', 'w') as f:
    f.write(json_results)

Custom SPARQL Functions

from rdflib import Graph, Literal
from rdflib.plugins.sparql.evalutils import register_custom_function
from rdflib.namespace import XSD

def custom_upper(arg):
    """Custom SPARQL function to uppercase strings."""
    if isinstance(arg, Literal):
        return Literal(str(arg).upper())
    return arg

# Register custom function
register_custom_function(
    "http://example.org/functions#upper",
    custom_upper,
    return_type=XSD.string
)

g = Graph()
# ... populate graph ...

# Use custom function in query
query = """
    PREFIX fn: <http://example.org/functions#>
    SELECT ?name ?upper_name WHERE {
        ?person foaf:name ?name .
        BIND(fn:upper(?name) AS ?upper_name)
    }
"""

results = g.query(query)
for row in results:
    print(f"Original: {row.name}, Upper: {row.upper_name}")

Install with Tessl CLI