Python implementation of the JSON-LD API for processing Linked Data in JSON format
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Utility functions for URL parsing, IRI manipulation, and base URL resolution following RFC 3986 standards. These functions support JSON-LD's IRI processing requirements and URL normalization.
Functions for resolving relative IRIs against base IRIs and converting absolute IRIs back to relative form.
def prepend_base(base, iri):
"""
Prepends a base IRI to a relative IRI to create an absolute IRI.
Args:
base (str): The base IRI to resolve against
iri (str): The relative IRI to resolve
Returns:
str: The absolute IRI
Raises:
JsonLdError: If base IRI is invalid or resolution fails
"""
def remove_base(base, iri):
"""
Removes a base IRI from an absolute IRI to create a relative IRI.
Args:
base (str): The base IRI to remove
iri (str): The absolute IRI to make relative
Returns:
str: The relative IRI if the IRI starts with base, otherwise the
original absolute IRI
Raises:
JsonLdError: If base IRI is invalid
"""from pyld import jsonld
# Resolve relative IRI against base
base = "https://example.org/data/"
relative = "document.jsonld"
absolute = jsonld.prepend_base(base, relative)
print(absolute) # "https://example.org/data/document.jsonld"
# Resolve with path traversal
relative_path = "../other/doc.jsonld"
resolved = jsonld.prepend_base(base, relative_path)
print(resolved) # "https://example.org/other/doc.jsonld"
# Make absolute IRI relative to base
absolute_iri = "https://example.org/data/context.jsonld"
relative_result = jsonld.remove_base(base, absolute_iri)
print(relative_result) # "context.jsonld"
# IRI not relative to base remains absolute
other_iri = "https://other.org/context.jsonld"
unchanged = jsonld.remove_base(base, other_iri)
print(unchanged) # "https://other.org/context.jsonld"RFC 3986 compliant URL parsing and reconstruction utilities.
def parse_url(url):
"""
Parses a URL into its component parts following RFC 3986.
Args:
url (str): The URL to parse
Returns:
ParsedUrl: Named tuple with components (scheme, authority, path, query, fragment)
Components:
scheme (str): URL scheme (http, https, etc.)
authority (str): Authority component (host:port)
path (str): Path component
query (str): Query string component
fragment (str): Fragment identifier component
"""
def unparse_url(parsed):
"""
Reconstructs a URL from its parsed components.
Args:
parsed (ParsedUrl, dict, list, or tuple): URL components
Returns:
str: The reconstructed URL
Raises:
TypeError: If parsed components are in invalid format
"""from pyld import jsonld
# Parse URL into components
url = "https://example.org:8080/path/to/doc.jsonld?param=value#section"
parsed = jsonld.parse_url(url)
print(parsed.scheme) # "https"
print(parsed.authority) # "example.org:8080" (default ports removed)
print(parsed.path) # "/path/to/doc.jsonld"
print(parsed.query) # "param=value"
print(parsed.fragment) # "section"
# Reconstruct URL from components
reconstructed = jsonld.unparse_url(parsed)
print(reconstructed) # "https://example.org:8080/path/to/doc.jsonld?param=value#section"
# Modify components and reconstruct
modified_parsed = parsed._replace(path="/new/path.jsonld", query="new=param")
new_url = jsonld.unparse_url(modified_parsed)
print(new_url) # "https://example.org:8080/new/path.jsonld?new=param#section"
# Parse URLs with missing components
simple_url = "https://example.org/doc"
simple_parsed = jsonld.parse_url(simple_url)
print(simple_parsed.query) # None
print(simple_parsed.fragment) # NoneUtility for normalizing URL paths by removing dot segments according to RFC 3986.
def remove_dot_segments(path):
"""
Removes dot segments from a URL path according to RFC 3986.
Resolves '.' and '..' segments in URL paths to create normalized paths.
Args:
path (str): The path to normalize
Returns:
str: The normalized path with dot segments removed
"""from pyld import jsonld
# Remove current directory references
path1 = "/a/b/./c"
normalized1 = jsonld.remove_dot_segments(path1)
print(normalized1) # "/a/b/c"
# Remove parent directory references
path2 = "/a/b/../c"
normalized2 = jsonld.remove_dot_segments(path2)
print(normalized2) # "/a/c"
# Complex path with multiple dot segments
path3 = "/a/b/c/./../../g"
normalized3 = jsonld.remove_dot_segments(path3)
print(normalized3) # "/a/g"
# Leading dot segments
path4 = "../../../g"
normalized4 = jsonld.remove_dot_segments(path4)
print(normalized4) # "g"The parse_url() function returns a ParsedUrl named tuple with these fields:
# ParsedUrl named tuple structure
ParsedUrl = namedtuple('ParsedUrl', ['scheme', 'authority', 'path', 'query', 'fragment'])
# Example ParsedUrl instance
ParsedUrl(
scheme='https',
authority='example.org:8080',
path='/path/to/resource',
query='param=value',
fragment='section'
)PyLD automatically removes default ports from the authority component:
# Default ports are removed
url1 = "https://example.org:443/path"
parsed1 = jsonld.parse_url(url1)
print(parsed1.authority) # "example.org" (443 removed)
url2 = "http://example.org:80/path"
parsed2 = jsonld.parse_url(url2)
print(parsed2.authority) # "example.org" (80 removed)
# Non-default ports are preserved
url3 = "https://example.org:8080/path"
parsed3 = jsonld.parse_url(url3)
print(parsed3.authority) # "example.org:8080" (8080 preserved)These utilities work with both URLs and IRIs (Internationalized Resource Identifiers):
# ASCII URLs
ascii_url = "https://example.org/path"
parsed_ascii = jsonld.parse_url(ascii_url)
# International IRIs
iri = "https://例え.テスト/パス"
parsed_iri = jsonld.parse_url(iri)
# Both work with the same parsing logic# Resolve context relative to document base
document_url = "https://example.org/data/document.jsonld"
context_ref = "../contexts/main.jsonld"
# Extract base from document URL
base = jsonld.remove_base("", document_url).rsplit('/', 1)[0] + "/"
context_url = jsonld.prepend_base(base, context_ref)
print(context_url) # "https://example.org/contexts/main.jsonld"def canonicalize_url(url):
"""Canonicalize URL by parsing and reconstructing."""
parsed = jsonld.parse_url(url)
# Normalize path
normalized_path = jsonld.remove_dot_segments(parsed.path)
canonical_parsed = parsed._replace(path=normalized_path)
return jsonld.unparse_url(canonical_parsed)
# Canonicalize URLs for comparison
url1 = "https://example.org/a/b/../c"
url2 = "https://example.org/a/c"
canonical1 = canonicalize_url(url1)
canonical2 = canonicalize_url(url2)
print(canonical1 == canonical2) # Truedef resolve_links(base_url, links):
"""Resolve a list of relative links against a base URL."""
return [jsonld.prepend_base(base_url, link) for link in links]
base = "https://example.org/docs/"
relative_links = ["intro.html", "../images/logo.png", "section/details.html"]
absolute_links = resolve_links(base, relative_links)
# Result: ["https://example.org/docs/intro.html",
# "https://example.org/images/logo.png",
# "https://example.org/docs/section/details.html"]These utilities implement RFC 3986 URL processing standards:
The implementation handles edge cases like:
URL utility functions may raise JsonLdError for:
Handle URL errors appropriately:
from pyld.jsonld import JsonLdError
try:
result = jsonld.prepend_base("invalid-base", "relative")
except JsonLdError as e:
print(f"URL resolution failed: {e}")
# Handle invalid base IRIInstall with Tessl CLI
npx tessl i tessl/pypi-pyld