CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-license-expression

A comprehensive utility library to parse, compare, simplify and normalize license expressions using boolean logic

Pending
Overview
Eval results
Files

licensing.mddocs/

Core Licensing Operations

The Licensing class provides the main interface for parsing, validating, and manipulating license expressions. It extends the boolean algebra system to provide license-specific functionality including expression parsing, validation, equivalence testing, and containment analysis.

Capabilities

Licensing Class

The primary class for working with license expressions, providing comprehensive parsing, validation, and comparison capabilities.

class Licensing:
    """
    Main entry point for license expression operations.
    Extends boolean.BooleanAlgebra with license-specific functionality.
    """
    
    def __init__(self, symbols=None, quiet: bool = True):
        """
        Initialize Licensing with optional license symbols.
        
        Parameters:
        - symbols: Iterable of LicenseSymbol objects or strings
        - quiet: Print warnings and errors found in symbols unless True (default: True)
        """
    
    def parse(self, expression: str, validate: bool = False, strict: bool = False, simple: bool = False, **kwargs) -> LicenseExpression:
        """
        Parse a license expression string into a LicenseExpression object.
        
        Parameters:
        - expression: License expression string to parse
        - validate: Whether to validate symbols against known licenses
        - strict: Whether to use strict validation (raises exceptions for unknown symbols)
        - simple: Whether to return a simple expression without normalization
        - **kwargs: Additional keyword arguments
        
        Returns:
        LicenseExpression object representing the parsed expression
        
        Raises:
        - ExpressionParseError: If parsing fails or strict=True with unknown symbols
        - ExpressionError: If validation fails with unknown symbols
        """
    
    def validate(self, expression: str, strict: bool = True, **kwargs) -> ExpressionInfo:
        """
        Validate a license expression and return detailed validation information.
        
        Parameters:
        - expression: License expression string to validate
        - strict: Whether to use strict validation (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        ExpressionInfo object containing validation results, errors, and normalized expression
        """
    
    def is_equivalent(self, expr1, expr2) -> bool:
        """
        Test if two license expressions are logically equivalent.
        
        Parameters:
        - expr1: First expression (string or LicenseExpression)
        - expr2: Second expression (string or LicenseExpression)
        
        Returns:
        True if expressions are equivalent, False otherwise
        """
    
    def contains(self, expr1, expr2) -> bool:
        """
        Test if the first expression contains (implies) the second expression.
        
        Parameters:
        - expr1: Container expression (string or LicenseExpression)
        - expr2: Contained expression (string or LicenseExpression)
        
        Returns:
        True if expr1 contains expr2, False otherwise
        """
    
    def license_symbols(self, expression, unique: bool = True, decompose: bool = True, **kwargs) -> list:
        """
        Extract all license symbols from an expression.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - unique: Only return unique symbols (default: True)
        - decompose: Decompose composite symbols like LicenseWithExceptionSymbol (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        List of LicenseSymbol objects found in the expression
        """
    
    def primary_license_symbol(self, expression, decompose: bool = True, **kwargs):
        """
        Return the left-most license symbol of an expression or None.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - decompose: Only return license symbol from decomposed composite symbols (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        First LicenseSymbol object found in the expression, or None
        """
    
    def primary_license_key(self, expression, **kwargs) -> str:
        """
        Return the left-most license key of an expression or None.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - **kwargs: Additional keyword arguments
        
        Returns:
        First license key found in the expression, or None
        """
    
    def license_keys(self, expression, unique: bool = True, **kwargs) -> list:
        """
        Return a list of license keys used in an expression.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - unique: Only return unique keys (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        List of license key strings found in the expression
        """
    
    def unknown_license_symbols(self, expression, unique: bool = True, **kwargs) -> list:
        """
        Return a list of unknown license symbols used in an expression.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - unique: Only return unique symbols (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        List of unknown LicenseSymbol objects found in the expression
        """
    
    def unknown_license_keys(self, expression, unique: bool = True, **kwargs) -> list:
        """
        Return a list of unknown license keys used in an expression.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        - unique: Only return unique keys (default: True)
        - **kwargs: Additional keyword arguments
        
        Returns:
        List of unknown license key strings found in the expression
        """
    
    def validate_license_keys(self, expression):
        """
        Validate license keys in an expression, raising ExpressionError for unknown keys.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        
        Raises:
        - ExpressionError: If unknown license keys are found
        """
    
    def dedup(self, expression):
        """
        Return a deduplicated LicenseExpression given a license expression.
        
        Deduplication is similar to simplification but specialized for license expressions.
        Unlike simplification, choices (OR expressions) are kept as-is to preserve options.
        
        Parameters:
        - expression: License expression (string or LicenseExpression)
        
        Returns:
        Deduplicated LicenseExpression object
        """
    
    def tokenize(self, expression: str, strict: bool = False, simple: bool = False):
        """
        Return an iterable of 3-tuple describing each token in an expression string.
        
        Parameters:
        - expression: License expression string to tokenize
        - strict: Use strict validation for WITH expressions (default: False)
        - simple: Use simple tokenizer assuming no spaces in license symbols (default: False)
        
        Returns:
        Iterator of (token_obj, token_string, position) tuples
        """
    
    def get_advanced_tokenizer(self):
        """
        Return an AdvancedTokenizer instance for this Licensing.
        
        Returns:
        AdvancedTokenizer instance that recognizes known symbol keys and aliases
        """
    
    def simple_tokenizer(self, expression: str):
        """
        Return an iterable of Token from an expression string using simple tokenization.
        
        Parameters:
        - expression: License expression string to tokenize
        
        Returns:
        Iterator of Token objects from simple tokenization
        """
    
    def advanced_tokenizer(self, expression: str):
        """
        Return an iterable of Token from an expression string using advanced tokenization.
        
        Parameters:
        - expression: License expression string to tokenize
        
        Returns:
        Iterator of Token objects from advanced tokenization
        """

Usage Examples

Basic Parsing

from license_expression import Licensing

licensing = Licensing()
expr = licensing.parse('MIT or Apache-2.0')
print(str(expr))  # 'MIT OR Apache-2.0'

Validation with Error Handling

# Validate an expression with unknown licenses
result = licensing.validate('MIT and UnknownLicense')
print(result.errors)  # ['Unknown license key(s): UnknownLicense']
print(result.invalid_symbols)  # ['UnknownLicense']

# Strict parsing raises exceptions
try:
    licensing.parse('MIT and UnknownLicense', validate=True, strict=True)
except ExpressionParseError as e:
    print(f"Parse error: {e}")

Expression Comparison

# Test equivalence
expr1 = 'MIT or (Apache-2.0 and GPL-2.0)'
expr2 = '(GPL-2.0 and Apache-2.0) or MIT'
print(licensing.is_equivalent(expr1, expr2))  # True

# Test containment
broad_expr = 'MIT or Apache-2.0 or GPL-2.0'
specific_expr = 'MIT'
print(licensing.contains(broad_expr, specific_expr))  # True

Symbol Extraction

expression = 'MIT with Exception or (Apache-2.0 and GPL-2.0+)'
symbols = licensing.license_symbols(expression)
for symbol in symbols:
    print(f"Key: {symbol.key}, Exception: {symbol.is_exception}")

Boolean Operations

The Licensing class provides boolean operators for combining expressions:

class Licensing:
    AND: type  # Boolean AND operator class
    OR: type   # Boolean OR operator class
    
    def __call__(self, *args, **kwargs):
        """Create boolean expressions using the licensing instance as callable."""

Boolean Operation Examples

# Create expressions using boolean operations
mit = licensing.parse('MIT')
apache = licensing.parse('Apache-2.0')

# Combine with AND
combined_and = licensing.AND(mit, apache)
print(str(combined_and))  # 'MIT AND Apache-2.0'

# Combine with OR
combined_or = licensing.OR(mit, apache)
print(str(combined_or))  # 'MIT OR Apache-2.0'

Expression Simplification

# Simplify complex expressions
complex_expr = licensing.parse('MIT or (MIT and Apache-2.0) or MIT')
simplified = complex_expr.simplify()
print(str(simplified))  # 'MIT'

# Simplify with normalization
redundant = licensing.parse('(A and B) or (B and A) or A')
simplified = redundant.simplify()
print(str(simplified))  # 'A OR (A AND B)' or similar simplified form

Advanced License Analysis

from license_expression import get_spdx_licensing

licensing = get_spdx_licensing()
expression = 'MIT or (Apache-2.0 and GPL-2.0+) or MIT'

# Get primary license information
primary_symbol = licensing.primary_license_symbol(expression)
primary_key = licensing.primary_license_key(expression)
print(f"Primary license: {primary_key}")  # 'MIT'

# Get all license keys (with and without duplicates)
all_keys = licensing.license_keys(expression, unique=False)
unique_keys = licensing.license_keys(expression, unique=True)
print(f"All keys: {all_keys}")      # ['MIT', 'Apache-2.0', 'GPL-2.0+', 'MIT']
print(f"Unique keys: {unique_keys}")  # ['MIT', 'Apache-2.0', 'GPL-2.0+']

# Find unknown licenses
unknown_keys = licensing.unknown_license_keys('MIT and UnknownLicense')
print(f"Unknown keys: {unknown_keys}")  # ['UnknownLicense']

Expression Deduplication

# Deduplication preserves license choices (unlike simplification)
complex_expr = 'MIT or Apache-2.0 or MIT or (GPL-2.0 and MIT)'
deduplicated = licensing.dedup(complex_expr)
print(str(deduplicated))  # Removes MIT duplicates but preserves structure

# Compare with simplification
simplified = licensing.parse(complex_expr).simplify()
print(str(simplified))    # May be more aggressively simplified

Advanced Tokenization

expression = 'MIT WITH Classpath-exception-2.0 OR Apache-2.0'

# Tokenize with different methods
for token_obj, token_string, position in licensing.tokenize(expression):
    print(f"Token: '{token_string}' at position {position}, type: {type(token_obj)}")

# Get advanced tokenizer for reuse
tokenizer = licensing.get_advanced_tokenizer()

Install with Tessl CLI

npx tessl i tessl/pypi-license-expression

docs

constants.md

expressions.md

factories.md

index.md

licensing.md

symbols.md

tile.json