Python library to parse, validate and create SPDX documents
—
Comprehensive parsing functionality for SPDX documents supporting all major formats with automatic format detection, robust error handling, and encoding support.
Parse SPDX documents from files with automatic format detection based on file extension.
def parse_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX file in any supported format.
Automatically detects format from file extension:
- .spdx, .tag -> Tag/Value format
- .json -> JSON format
- .yaml, .yml -> YAML format
- .xml -> XML format
- .rdf, .rdf.xml -> RDF/XML format
Args:
file_name: Path to SPDX file
encoding: File encoding (default: utf-8, recommended)
Returns:
Document: Parsed SPDX document object
Raises:
SPDXParsingError: If parsing fails with detailed error messages
FileNotFoundError: If file doesn't exist
"""Parse SPDX documents from JSON format files.
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX document from JSON file.
Args:
file_name: Path to JSON file
encoding: File encoding
Returns:
Document: Parsed SPDX document
Raises:
SPDXParsingError: If JSON parsing fails
JSONDecodeError: If JSON is invalid
"""Parse SPDX documents from XML format files.
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX document from XML file.
Args:
file_name: Path to XML file
encoding: File encoding
Returns:
Document: Parsed SPDX document
Raises:
SPDXParsingError: If XML parsing fails
ExpatError: If XML is malformed
"""Parse SPDX documents from YAML format files.
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX document from YAML file.
Args:
file_name: Path to YAML file
encoding: File encoding
Returns:
Document: Parsed SPDX document
Raises:
SPDXParsingError: If YAML parsing fails
ScannerError: If YAML is invalid
"""Parse SPDX documents from Tag-Value format files (the original SPDX format).
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX document from Tag-Value file.
Args:
file_name: Path to Tag-Value file (.spdx extension)
encoding: File encoding
Returns:
Document: Parsed SPDX document
Raises:
SPDXParsingError: If Tag-Value parsing fails
"""Parse SPDX documents from RDF/XML format files.
def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX document from RDF/XML file.
Args:
file_name: Path to RDF file (.rdf or .rdf.xml extension)
encoding: File encoding
Returns:
Document: Parsed SPDX document
Raises:
SPDXParsingError: If RDF parsing fails
SAXParseException: If RDF/XML is malformed
"""Determine SPDX file format from filename extension.
def file_name_to_format(file_name: str) -> FileFormat:
"""
Detect SPDX file format from filename extension.
Supported extensions:
- .rdf, .rdf.xml -> RDF_XML
- .tag, .spdx -> TAG_VALUE
- .json -> JSON
- .xml -> XML
- .yaml, .yml -> YAML
Args:
file_name: File path or name
Returns:
FileFormat: Detected format enum value
Raises:
SPDXParsingError: If file extension is not supported
"""Comprehensive error handling with detailed error messages for parsing failures.
class SPDXParsingError(Exception):
"""
Exception raised when SPDX parsing fails.
Contains detailed error messages about parsing failures.
"""
def get_messages(self) -> List[str]:
"""
Get list of detailed parsing error messages.
Returns:
List of error message strings
"""from spdx_tools.spdx.parser.parse_anything import parse_file
# Parse any supported format
try:
document = parse_file("example.spdx")
print(f"Parsed document: {document.creation_info.name}")
print(f"SPDX version: {document.creation_info.spdx_version}")
print(f"Packages: {len(document.packages)}")
print(f"Files: {len(document.files)}")
except Exception as e:
print(f"Parsing failed: {e}")from spdx_tools.spdx.parser.json import json_parser
from spdx_tools.spdx.parser.xml import xml_parser
from spdx_tools.spdx.parser.yaml import yaml_parser
# Parse specific formats
json_doc = json_parser.parse_from_file("document.json")
xml_doc = xml_parser.parse_from_file("document.xml")
yaml_doc = yaml_parser.parse_from_file("document.yaml")from spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.parser.error import SPDXParsingError
from json import JSONDecodeError
from xml.parsers.expat import ExpatError
from xml.sax import SAXParseException
from yaml.scanner import ScannerError
try:
document = parse_file("problematic.spdx")
except SPDXParsingError as e:
print("SPDX parsing errors:")
for message in e.get_messages():
print(f" - {message}")
except JSONDecodeError as e:
print(f"Invalid JSON: {e}")
except ExpatError as e:
print(f"Invalid XML: {e}")
except SAXParseException as e:
print(f"Invalid RDF/XML: {e}")
except ScannerError as e:
print(f"Invalid YAML: {e}")
except FileNotFoundError as e:
print(f"File not found: {e.filename}")# Parse file with specific encoding
document = parse_file("document.spdx", encoding="latin-1")from enum import Enum
from typing import List
class FileFormat(Enum):
"""Supported SPDX file formats for parsing."""
JSON = "json"
YAML = "yaml"
XML = "xml"
TAG_VALUE = "tag_value"
RDF_XML = "rdf_xml"
class SPDXParsingError(Exception):
"""Exception for SPDX parsing failures."""
def get_messages(self) -> List[str]: ...Install with Tessl CLI
npx tessl i tessl/pypi-spdx-tools