Python library to parse, validate and create SPDX documents
—
Comprehensive validation functionality for SPDX documents against the official specification with detailed error reporting and support for multiple SPDX versions.
Validate complete SPDX documents against the official specification with comprehensive error reporting.
def validate_full_spdx_document(document: Document, spdx_version: str = None) -> List[ValidationMessage]:
"""
Validate complete SPDX document against SPDX specification.
Performs comprehensive validation including:
- SPDX version compatibility
- Document structure validation
- Package information validation
- File information validation
- Relationship validation
- License information validation
- Checksum validation
- SPDX ID validation
Args:
document: SPDX document to validate
spdx_version: SPDX version to validate against ("SPDX-2.2" or "SPDX-2.3")
If None, uses version from document
Returns:
List[ValidationMessage]: List of validation errors/warnings
Empty list if document is valid
"""Validate document creation information and metadata.
def validate_creation_info(creation_info: CreationInfo, spdx_version: str) -> List[ValidationMessage]:
"""
Validate document creation information.
Validates:
- SPDX version format
- SPDX ID format
- Document namespace URI format
- Creator information
- License list version compatibility
Args:
creation_info: Document creation info to validate
spdx_version: SPDX version for validation rules
Returns:
List of validation messages
"""Validate package information and metadata.
def validate_packages(packages: List[Package], spdx_version: str, document: Optional[Document] = None) -> List[ValidationMessage]:
"""
Validate all packages in document.
Validates:
- Package SPDX IDs
- Download locations
- Verification codes
- License information
- External package references
- Package relationships
Args:
packages: List of packages to validate
spdx_version: SPDX version for validation rules
Returns:
List of validation messages
"""Validate file information and metadata.
def validate_files(files: List[File], spdx_version: str, document: Optional[Document] = None) -> List[ValidationMessage]:
"""
Validate all files in document.
Validates:
- File SPDX IDs
- File paths
- Checksums and algorithms
- License information
- File types
- Copyright information
Args:
files: List of files to validate
spdx_version: SPDX version for validation rules
Returns:
List of validation messages
"""Validate relationships between SPDX elements.
def validate_relationships(relationships: List[Relationship], spdx_version: str) -> List[ValidationMessage]:
"""
Validate all relationships in document.
Validates:
- Relationship types
- SPDX element references
- Relationship consistency
- Required relationships
Args:
relationships: List of relationships to validate
spdx_version: SPDX version for validation rules
Returns:
List of validation messages
"""Validate license information and expressions.
def validate_extracted_licensing_infos(
extracted_licensing_infos: List[ExtractedLicensingInfo],
spdx_version: str
) -> List[ValidationMessage]:
"""
Validate extracted licensing information.
Validates:
- License IDs format
- License text content
- License references
- License expressions
Args:
extracted_licensing_infos: List of extracted licenses to validate
spdx_version: SPDX version for validation rules
Returns:
List of validation messages
"""Validate checksums and algorithms.
def validate_checksums(checksums: List[Checksum]) -> List[ValidationMessage]:
"""
Validate checksum information.
Validates:
- Checksum algorithm support
- Checksum format
- Checksum value format
Args:
checksums: List of checksums to validate
Returns:
List of validation messages
"""Validate SPDX identifier formats and references.
def get_list_of_all_spdx_ids(document: Document) -> List[str]:
"""
Get all SPDX IDs present in document.
Args:
document: SPDX document
Returns:
List of all SPDX IDs in document
"""
def validate_spdx_id_format(spdx_id: str) -> List[ValidationMessage]:
"""
Validate SPDX ID format.
Args:
spdx_id: SPDX ID to validate
Returns:
List of validation messages
"""Validate URI formats used in SPDX documents.
def validate_uri_format(uri: str) -> List[ValidationMessage]:
"""
Validate URI format.
Args:
uri: URI to validate
Returns:
List of validation messages
"""from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document
from spdx_tools.spdx.parser.parse_anything import parse_file
# Parse and validate document
document = parse_file("example.spdx")
validation_messages = validate_full_spdx_document(document)
if validation_messages:
print("Document validation failed:")
for message in validation_messages:
print(f" - {message.validation_message}")
if hasattr(message, 'context'):
print(f" Context: {message.context}")
else:
print("Document is valid!")# Validate against specific SPDX version
validation_messages = validate_full_spdx_document(document, "SPDX-2.3")
# Check document version
print(f"Document version: {document.creation_info.spdx_version}")from spdx_tools.spdx.validation.package_validator import validate_packages
from spdx_tools.spdx.validation.file_validator import validate_files
from spdx_tools.spdx.validation.relationship_validator import validate_relationships
# Validate specific components
package_errors = validate_packages(document.packages, "SPDX-2.3")
file_errors = validate_files(document.files, "SPDX-2.3")
relationship_errors = validate_relationships(document.relationships, "SPDX-2.3")
print(f"Package validation errors: {len(package_errors)}")
print(f"File validation errors: {len(file_errors)}")
print(f"Relationship validation errors: {len(relationship_errors)}")from spdx_tools.spdx.writer.write_anything import write_file
# Validation is enabled by default in write_file
try:
write_file(document, "output.json") # validate=True by default
print("Document written successfully (validation passed)")
except Exception as e:
print(f"Validation or writing failed: {e}")
# Skip validation for faster writing (not recommended)
write_file(document, "output.json", validate=False)def analyze_validation_errors(validation_messages):
"""Analyze and categorize validation errors."""
error_types = {}
for message in validation_messages:
# Categorize by error type
if "SPDX ID" in message.validation_message:
error_types.setdefault("SPDX ID", []).append(message)
elif "license" in message.validation_message.lower():
error_types.setdefault("License", []).append(message)
elif "checksum" in message.validation_message.lower():
error_types.setdefault("Checksum", []).append(message)
else:
error_types.setdefault("Other", []).append(message)
for error_type, messages in error_types.items():
print(f"{error_type} errors ({len(messages)}):")
for msg in messages:
print(f" - {msg.validation_message}")
# Analyze errors
validation_messages = validate_full_spdx_document(document)
if validation_messages:
analyze_validation_errors(validation_messages)import sys
from spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.validation.document_validator import validate_full_spdx_document
def validate_spdx_file(file_path: str) -> bool:
"""Validate SPDX file for CI/CD pipeline."""
try:
document = parse_file(file_path)
validation_messages = validate_full_spdx_document(document)
if validation_messages:
print(f"❌ Validation failed for {file_path}:")
for message in validation_messages:
print(f" - {message.validation_message}")
return False
else:
print(f"✅ {file_path} is valid")
return True
except Exception as e:
print(f"❌ Error processing {file_path}: {e}")
return False
# Use in CI script
if __name__ == "__main__":
files_to_validate = sys.argv[1:]
all_valid = all(validate_spdx_file(f) for f in files_to_validate)
sys.exit(0 if all_valid else 1)from typing import List, Optional
from dataclasses import dataclass
from enum import Enum
class ValidationMessage:
"""Validation error or warning message."""
validation_message: str
context: ValidationContext
@dataclass
class ValidationContext:
"""Context information for validation messages."""
spdx_id: Optional[str]
element_type: SpdxElementType
class SpdxElementType(Enum):
"""Types of SPDX elements for validation context."""
DOCUMENT = "Document"
PACKAGE = "Package"
FILE = "File"
SNIPPET = "Snippet"
RELATIONSHIP = "Relationship"
ANNOTATION = "Annotation"
EXTRACTED_LICENSING_INFO = "ExtractedLicensingInfo"Install with Tessl CLI
npx tessl i tessl/pypi-spdx-tools