Python implementation of the JSON-LD API for processing Linked Data in JSON format
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Configurable document loaders for fetching remote JSON-LD contexts and documents via HTTP. PyLD supports both synchronous and asynchronous loading with pluggable HTTP client implementations.
Global document loader configuration for all JSON-LD processing operations.
def set_document_loader(load_document_):
"""
Sets the global default JSON-LD document loader.
Args:
load_document_: Document loader function that takes (url, options)
and returns RemoteDocument
"""
def get_document_loader():
"""
Gets the current global document loader.
Returns:
function: Current document loader function
"""
def load_document(url, options, base=None, profile=None, requestProfile=None):
"""
Loads a document from a URL using the current document loader.
Args:
url (str): The URL (relative or absolute) of the remote document
options (dict): Loading options including documentLoader
base (str): The absolute URL to use for making url absolute
profile (str): Profile for selecting JSON-LD script elements from HTML
requestProfile (str): One or more IRIs for request profile parameter
Returns:
RemoteDocument: Loaded document with content and metadata
Raises:
JsonLdError: If document loading fails
"""Synchronous HTTP document loader using the popular Requests library.
def requests_document_loader(secure=False, **kwargs):
"""
Creates a document loader using the Requests library.
Args:
secure (bool): Require all requests to use HTTPS (default: False)
**kwargs: Additional keyword arguments passed to requests.get()
Common kwargs:
timeout (float or tuple): Request timeout in seconds
verify (bool or str): SSL certificate verification
cert (str or tuple): Client certificate for authentication
headers (dict): Custom HTTP headers
proxies (dict): Proxy configuration
allow_redirects (bool): Follow redirects (default: True)
stream (bool): Stream download (default: False)
Returns:
function: Document loader function compatible with PyLD
Raises:
ImportError: If requests library is not available
"""from pyld import jsonld
# Basic requests loader with timeout
loader = jsonld.requests_document_loader(timeout=10)
jsonld.set_document_loader(loader)
# Advanced requests loader with SSL and authentication
secure_loader = jsonld.requests_document_loader(
secure=True, # Force HTTPS
timeout=(5, 30), # 5s connect, 30s read timeout
verify='/path/to/cacert.pem', # Custom CA bundle
cert=('/path/to/client.crt', '/path/to/client.key'), # Client cert
headers={'User-Agent': 'MyApp/1.0'},
proxies={'https': 'https://proxy.example.com:8080'}
)
jsonld.set_document_loader(secure_loader)
# Use in JSON-LD processing
doc = jsonld.expand('https://example.org/context.jsonld')Asynchronous HTTP document loader using aiohttp for high-performance concurrent operations.
def aiohttp_document_loader(loop=None, secure=False, **kwargs):
"""
Creates an asynchronous document loader using aiohttp.
Args:
loop: Event loop for async operations (default: current loop)
secure (bool): Require all requests to use HTTPS (default: False)
**kwargs: Additional keyword arguments passed to aiohttp session
Common kwargs:
timeout (aiohttp.ClientTimeout): Request timeout configuration
connector (aiohttp.BaseConnector): Custom connector for connection pooling
headers (dict): Default headers for all requests
cookies (dict): Default cookies
auth (aiohttp.BasicAuth): Authentication credentials
trust_env (bool): Use environment proxy settings
connector_kwargs: Additional arguments for TCPConnector
Returns:
function: Async document loader function compatible with PyLD
Raises:
ImportError: If aiohttp library is not available
"""import asyncio
from pyld import jsonld
import aiohttp
# Basic aiohttp loader
loader = jsonld.aiohttp_document_loader()
jsonld.set_document_loader(loader)
# Advanced aiohttp loader with custom configuration
timeout = aiohttp.ClientTimeout(total=30, connect=5)
connector = aiohttp.TCPConnector(
limit=100, # Total connection pool size
ttl_dns_cache=300, # DNS cache TTL
use_dns_cache=True
)
advanced_loader = jsonld.aiohttp_document_loader(
secure=True,
timeout=timeout,
connector=connector,
headers={'User-Agent': 'MyApp/1.0'},
auth=aiohttp.BasicAuth('user', 'pass')
)
jsonld.set_document_loader(advanced_loader)
# Process documents asynchronously
async def process_documents():
doc1 = jsonld.expand('https://example.org/doc1.jsonld')
doc2 = jsonld.expand('https://example.org/doc2.jsonld')
return doc1, doc2
# Note: aiohttp loader only provides async loading;
# JSON-LD processing itself remains synchronousFallback loader that raises exceptions for all requests, used when no HTTP libraries are available.
def dummy_document_loader(**kwargs):
"""
Creates a dummy document loader that raises exceptions on use.
Args:
**kwargs: Extra keyword arguments (ignored)
Returns:
function: Document loader that always fails
Raises:
JsonLdError: Always raises with 'loading document failed' error
"""Document loaders return RemoteDocument objects with this structure:
# RemoteDocument format
{
"document": {...}, # The loaded JSON-LD document
"documentUrl": "string", # Final URL after redirects
"contextUrl": "string" # Context URL if Link header present
}PyLD automatically processes HTTP Link headers to discover JSON-LD contexts:
def parse_link_header(header):
"""
Parses HTTP Link header for JSON-LD context discovery.
Args:
header (str): HTTP Link header value
Returns:
list: Parsed link relationships with URLs and attributes
"""from pyld import jsonld
# Link header parsing
header = '<https://example.org/context.jsonld>; rel="http://www.w3.org/ns/json-ld#context"'
links = jsonld.parse_link_header(header)
# Result: [{"target": "https://example.org/context.jsonld", "rel": "http://www.w3.org/ns/json-ld#context"}]Create custom document loaders for specialized requirements:
def custom_document_loader(url, options=None):
"""
Custom document loader implementation.
Args:
url (str): Document URL to load
options (dict): Loading options
Returns:
dict: RemoteDocument with document, documentUrl, contextUrl
"""
try:
# Custom loading logic
if url.startswith('file://'):
# Handle file:// URLs
with open(url[7:], 'r') as f:
document = json.load(f)
return {
'document': document,
'documentUrl': url,
'contextUrl': None
}
elif url.startswith('cache://'):
# Handle cached documents
document = get_from_cache(url)
return {
'document': document,
'documentUrl': url,
'contextUrl': None
}
else:
# Fallback to default HTTP loading
return default_http_loader(url, options)
except Exception as e:
from pyld.jsonld import JsonLdError
raise JsonLdError(
f'Could not load document: {url}',
'loading document failed',
{'url': url},
cause=e
)
# Register custom loader
jsonld.set_document_loader(custom_document_loader)# Force HTTPS for all requests
loader = jsonld.requests_document_loader(secure=True)
jsonld.set_document_loader(loader)# Custom CA bundle
loader = jsonld.requests_document_loader(
verify='/path/to/custom-cacert.pem'
)
# Disable verification (not recommended for production)
loader = jsonld.requests_document_loader(verify=False)# Requests timeouts
loader = jsonld.requests_document_loader(
timeout=(5, 30) # 5s connect, 30s read
)
# Aiohttp timeouts
import aiohttp
timeout = aiohttp.ClientTimeout(total=30, connect=5)
loader = jsonld.aiohttp_document_loader(timeout=timeout)def filtered_document_loader(url, options=None):
"""Document loader with URL filtering."""
# Block private networks
if url.startswith('http://192.168.') or url.startswith('http://10.'):
raise JsonLdError('Private network access denied', 'loading document failed')
# Allow only specific domains
allowed_domains = ['example.org', 'w3.org', 'schema.org']
domain = urllib.parse.urlparse(url).netloc
if domain not in allowed_domains:
raise JsonLdError('Domain not allowed', 'loading document failed')
# Use standard loader for allowed URLs
return standard_loader(url, options)
jsonld.set_document_loader(filtered_document_loader)PyLD automatically selects document loaders in this priority order:
Override with explicit loader selection:
# Force aiohttp even if requests is available
jsonld.set_document_loader(jsonld.aiohttp_document_loader())
# Or force requests
jsonld.set_document_loader(jsonld.requests_document_loader())Document loaders may raise JsonLdError with these error types:
Handle loading errors gracefully:
from pyld.jsonld import JsonLdError
try:
result = jsonld.expand('https://example.org/doc.jsonld')
except JsonLdError as e:
if e.code == 'loading document failed':
print(f"Could not load document: {e.details}")
# Handle network error
else:
# Handle other JSON-LD errors
raiseInstall with Tessl CLI
npx tessl i tessl/pypi-pyld