Python library to parse, validate and create SPDX documents
npx @tessl/cli install tessl/pypi-spdx-tools@0.8.0A comprehensive Python library for working with SPDX (Software Package Data Exchange) documents. Enables parsing, validation, creation, and conversion of SPDX files across multiple formats including Tag/Value, RDF, JSON, YAML, and XML. Provides full validation capabilities for SPDX document versions 2.2 and 2.3 against the official specification, includes command-line tools for file processing, and offers experimental support for the upcoming SPDX v3.0 specification.
pip install spdx-toolsimport spdx_toolsFor parsing and writing documents:
from spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.writer.write_anything import write_fileFor working with data models:
from spdx_tools.spdx.model import Document, Package, File, RelationshipFor validation:
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_documentfrom spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.writer.write_anything import write_file
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document
# Parse an SPDX document from any supported format
document = parse_file("input.spdx")
# Validate the document
validation_messages = validate_full_spdx_document(document)
if validation_messages:
for message in validation_messages:
print(f"Validation error: {message.validation_message}")
else:
print("Document is valid")
# Convert to different format
write_file(document, "output.json") # Converts to JSON
# Access document components
print(f"Document name: {document.creation_info.name}")
print(f"Number of packages: {len(document.packages)}")
print(f"Number of files: {len(document.files)}")SPDX Tools is organized around core concepts:
The library supports SPDX specification versions 2.2 and 2.3, with experimental support for the upcoming SPDX 3.0 specification. All major SPDX serialization formats are supported: Tag/Value, RDF/XML, JSON, YAML, and XML.
Parse SPDX documents from files in any supported format with automatic format detection based on file extension.
def parse_file(file_name: str, encoding: str = "utf-8") -> Document:
"""
Parse SPDX file in any supported format.
Args:
file_name: Path to SPDX file (.spdx, .json, .yaml, .xml, .rdf)
encoding: File encoding (default: utf-8)
Returns:
Document: Parsed SPDX document object
Raises:
SPDXParsingError: If parsing fails
"""Write SPDX documents to files in any supported format with automatic format detection and optional validation.
def write_file(document: Document, file_name: str, validate: bool = True) -> None:
"""
Write SPDX document to file in any supported format.
Args:
document: SPDX document to write
file_name: Output file path (format determined by extension)
validate: Whether to validate document before writing
"""Comprehensive validation of SPDX documents against the official specification with detailed error reporting.
def validate_full_spdx_document(document: Document, spdx_version: str = None) -> List[ValidationMessage]:
"""
Validate complete SPDX document against specification.
Args:
document: SPDX document to validate
spdx_version: SPDX version to validate against ("SPDX-2.2" or "SPDX-2.3")
Returns:
List of validation messages (empty if valid)
"""Complete object model representing all SPDX specification elements including documents, packages, files, relationships, and metadata.
class Document:
"""Main SPDX document containing all elements."""
creation_info: CreationInfo
packages: List[Package]
files: List[File]
snippets: List[Snippet]
extracted_licensing_infos: List[ExtractedLicensingInfo]
relationships: List[Relationship]
annotations: List[Annotation]
class CreationInfo:
"""Document creation metadata."""
spdx_version: str
spdx_id: str
name: str
document_namespace: str
creators: List[Actor]
created: datetimeCLI tools for parsing, validating, converting, and analyzing SPDX documents with support for all major formats and comprehensive error reporting.
def main(infile: str, outfile: str, version: str, novalidation: bool, graph: bool) -> None:
"""
CLI entry point for pyspdxtools command.
Args:
infile: Input SPDX file path
outfile: Output file path (or "-" for stdout)
version: SPDX version to validate against
novalidation: Skip validation if True
graph: Generate relationship graph if True
"""Experimental support for the upcoming SPDX 3.0 specification including new data models, profile support, and enhanced capabilities.
class SpdxDocument:
"""SPDX 3.0 document root."""
creation_info: CreationInfo
elements: List[Element]
class Element:
"""Base class for all SPDX 3.0 elements."""
spdx_id: str
name: Optional[str]
summary: Optional[str]from enum import Enum
from datetime import datetime
from typing import List, Optional, Union
class FileFormat(Enum):
"""Supported SPDX file formats."""
JSON = "json"
YAML = "yaml"
XML = "xml"
TAG_VALUE = "tag_value"
RDF_XML = "rdf_xml"
class ActorType(Enum):
"""Types of actors/creators."""
PERSON = "Person"
ORGANIZATION = "Organization"
TOOL = "Tool"
class RelationshipType(Enum):
"""Types of relationships between SPDX elements."""
DESCRIBES = "DESCRIBES"
DESCRIBED_BY = "DESCRIBED_BY"
CONTAINS = "CONTAINS"
CONTAINED_BY = "CONTAINED_BY"
DEPENDS_ON = "DEPENDS_ON"
DEPENDENCY_OF = "DEPENDENCY_OF"
# ... many more relationship types
class ChecksumAlgorithm(Enum):
"""Supported checksum algorithms."""
SHA1 = "SHA1"
SHA224 = "SHA224"
SHA256 = "SHA256"
SHA384 = "SHA384"
SHA512 = "SHA512"
MD5 = "MD5"
BLAKE2B_256 = "BLAKE2b-256"
# ... more algorithms
class ValidationMessage:
"""Validation error or warning message."""
validation_message: str
context: ValidationContext
class SPDXParsingError(Exception):
"""Exception raised during SPDX parsing."""
def get_messages(self) -> List[str]:
"""Get list of parsing error messages."""