CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-spdx-tools

Python library to parse, validate and create SPDX documents

Pending
Overview
Eval results
Files

parsing.mddocs/

Document Parsing

Comprehensive parsing functionality for SPDX documents supporting all major formats with automatic format detection, robust error handling, and encoding support.

Capabilities

Universal File Parsing

Parse SPDX documents from files with automatic format detection based on file extension.

def parse_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX file in any supported format.
    
    Automatically detects format from file extension:
    - .spdx, .tag -> Tag/Value format
    - .json -> JSON format
    - .yaml, .yml -> YAML format
    - .xml -> XML format
    - .rdf, .rdf.xml -> RDF/XML format
    
    Args:
        file_name: Path to SPDX file
        encoding: File encoding (default: utf-8, recommended)
        
    Returns:
        Document: Parsed SPDX document object
        
    Raises:
        SPDXParsingError: If parsing fails with detailed error messages
        FileNotFoundError: If file doesn't exist
    """

JSON Parsing

Parse SPDX documents from JSON format files.

def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX document from JSON file.
    
    Args:
        file_name: Path to JSON file
        encoding: File encoding
        
    Returns:
        Document: Parsed SPDX document
        
    Raises:
        SPDXParsingError: If JSON parsing fails
        JSONDecodeError: If JSON is invalid
    """

XML Parsing

Parse SPDX documents from XML format files.

def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX document from XML file.
    
    Args:
        file_name: Path to XML file
        encoding: File encoding
        
    Returns:
        Document: Parsed SPDX document
        
    Raises:
        SPDXParsingError: If XML parsing fails
        ExpatError: If XML is malformed
    """

YAML Parsing

Parse SPDX documents from YAML format files.

def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX document from YAML file.
    
    Args:
        file_name: Path to YAML file
        encoding: File encoding
        
    Returns:
        Document: Parsed SPDX document
        
    Raises:
        SPDXParsingError: If YAML parsing fails
        ScannerError: If YAML is invalid
    """

Tag-Value Parsing

Parse SPDX documents from Tag-Value format files (the original SPDX format).

def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX document from Tag-Value file.
    
    Args:
        file_name: Path to Tag-Value file (.spdx extension)
        encoding: File encoding
        
    Returns:
        Document: Parsed SPDX document
        
    Raises:
        SPDXParsingError: If Tag-Value parsing fails
    """

RDF Parsing

Parse SPDX documents from RDF/XML format files.

def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
    """
    Parse SPDX document from RDF/XML file.
    
    Args:
        file_name: Path to RDF file (.rdf or .rdf.xml extension)
        encoding: File encoding
        
    Returns:
        Document: Parsed SPDX document
        
    Raises:
        SPDXParsingError: If RDF parsing fails
        SAXParseException: If RDF/XML is malformed
    """

Format Detection

Determine SPDX file format from filename extension.

def file_name_to_format(file_name: str) -> FileFormat:
    """
    Detect SPDX file format from filename extension.
    
    Supported extensions:
    - .rdf, .rdf.xml -> RDF_XML
    - .tag, .spdx -> TAG_VALUE
    - .json -> JSON
    - .xml -> XML
    - .yaml, .yml -> YAML
    
    Args:
        file_name: File path or name
        
    Returns:
        FileFormat: Detected format enum value
        
    Raises:
        SPDXParsingError: If file extension is not supported
    """

Error Handling

Comprehensive error handling with detailed error messages for parsing failures.

class SPDXParsingError(Exception):
    """
    Exception raised when SPDX parsing fails.
    
    Contains detailed error messages about parsing failures.
    """
    
    def get_messages(self) -> List[str]:
        """
        Get list of detailed parsing error messages.
        
        Returns:
            List of error message strings
        """

Usage Examples

Basic File Parsing

from spdx_tools.spdx.parser.parse_anything import parse_file

# Parse any supported format
try:
    document = parse_file("example.spdx")
    print(f"Parsed document: {document.creation_info.name}")
    print(f"SPDX version: {document.creation_info.spdx_version}")
    print(f"Packages: {len(document.packages)}")
    print(f"Files: {len(document.files)}")
except Exception as e:
    print(f"Parsing failed: {e}")

Format-Specific Parsing

from spdx_tools.spdx.parser.json import json_parser
from spdx_tools.spdx.parser.xml import xml_parser
from spdx_tools.spdx.parser.yaml import yaml_parser

# Parse specific formats
json_doc = json_parser.parse_from_file("document.json")
xml_doc = xml_parser.parse_from_file("document.xml")
yaml_doc = yaml_parser.parse_from_file("document.yaml")

Error Handling

from spdx_tools.spdx.parser.parse_anything import parse_file
from spdx_tools.spdx.parser.error import SPDXParsingError
from json import JSONDecodeError
from xml.parsers.expat import ExpatError
from xml.sax import SAXParseException
from yaml.scanner import ScannerError

try:
    document = parse_file("problematic.spdx")
except SPDXParsingError as e:
    print("SPDX parsing errors:")
    for message in e.get_messages():
        print(f"  - {message}")
except JSONDecodeError as e:
    print(f"Invalid JSON: {e}")
except ExpatError as e:
    print(f"Invalid XML: {e}")
except SAXParseException as e:
    print(f"Invalid RDF/XML: {e}")
except ScannerError as e:
    print(f"Invalid YAML: {e}")
except FileNotFoundError as e:
    print(f"File not found: {e.filename}")

Custom Encoding

# Parse file with specific encoding
document = parse_file("document.spdx", encoding="latin-1")

Types

from enum import Enum
from typing import List

class FileFormat(Enum):
    """Supported SPDX file formats for parsing."""
    JSON = "json"
    YAML = "yaml" 
    XML = "xml"
    TAG_VALUE = "tag_value"
    RDF_XML = "rdf_xml"

class SPDXParsingError(Exception):
    """Exception for SPDX parsing failures."""
    def get_messages(self) -> List[str]: ...

Install with Tessl CLI

npx tessl i tessl/pypi-spdx-tools

docs

cli.md

index.md

models.md

parsing.md

spdx3.md

validation.md

writing.md

tile.json