A comprehensive utility library to parse, compare, simplify and normalize license expressions using boolean logic
—
The Licensing class provides the main interface for parsing, validating, and manipulating license expressions. It extends the boolean algebra system to provide license-specific functionality including expression parsing, validation, equivalence testing, and containment analysis.
The primary class for working with license expressions, providing comprehensive parsing, validation, and comparison capabilities.
class Licensing:
"""
Main entry point for license expression operations.
Extends boolean.BooleanAlgebra with license-specific functionality.
"""
def __init__(self, symbols=None, quiet: bool = True):
"""
Initialize Licensing with optional license symbols.
Parameters:
- symbols: Iterable of LicenseSymbol objects or strings
- quiet: Print warnings and errors found in symbols unless True (default: True)
"""
def parse(self, expression: str, validate: bool = False, strict: bool = False, simple: bool = False, **kwargs) -> LicenseExpression:
"""
Parse a license expression string into a LicenseExpression object.
Parameters:
- expression: License expression string to parse
- validate: Whether to validate symbols against known licenses
- strict: Whether to use strict validation (raises exceptions for unknown symbols)
- simple: Whether to return a simple expression without normalization
- **kwargs: Additional keyword arguments
Returns:
LicenseExpression object representing the parsed expression
Raises:
- ExpressionParseError: If parsing fails or strict=True with unknown symbols
- ExpressionError: If validation fails with unknown symbols
"""
def validate(self, expression: str, strict: bool = True, **kwargs) -> ExpressionInfo:
"""
Validate a license expression and return detailed validation information.
Parameters:
- expression: License expression string to validate
- strict: Whether to use strict validation (default: True)
- **kwargs: Additional keyword arguments
Returns:
ExpressionInfo object containing validation results, errors, and normalized expression
"""
def is_equivalent(self, expr1, expr2) -> bool:
"""
Test if two license expressions are logically equivalent.
Parameters:
- expr1: First expression (string or LicenseExpression)
- expr2: Second expression (string or LicenseExpression)
Returns:
True if expressions are equivalent, False otherwise
"""
def contains(self, expr1, expr2) -> bool:
"""
Test if the first expression contains (implies) the second expression.
Parameters:
- expr1: Container expression (string or LicenseExpression)
- expr2: Contained expression (string or LicenseExpression)
Returns:
True if expr1 contains expr2, False otherwise
"""
def license_symbols(self, expression, unique: bool = True, decompose: bool = True, **kwargs) -> list:
"""
Extract all license symbols from an expression.
Parameters:
- expression: License expression (string or LicenseExpression)
- unique: Only return unique symbols (default: True)
- decompose: Decompose composite symbols like LicenseWithExceptionSymbol (default: True)
- **kwargs: Additional keyword arguments
Returns:
List of LicenseSymbol objects found in the expression
"""
def primary_license_symbol(self, expression, decompose: bool = True, **kwargs):
"""
Return the left-most license symbol of an expression or None.
Parameters:
- expression: License expression (string or LicenseExpression)
- decompose: Only return license symbol from decomposed composite symbols (default: True)
- **kwargs: Additional keyword arguments
Returns:
First LicenseSymbol object found in the expression, or None
"""
def primary_license_key(self, expression, **kwargs) -> str:
"""
Return the left-most license key of an expression or None.
Parameters:
- expression: License expression (string or LicenseExpression)
- **kwargs: Additional keyword arguments
Returns:
First license key found in the expression, or None
"""
def license_keys(self, expression, unique: bool = True, **kwargs) -> list:
"""
Return a list of license keys used in an expression.
Parameters:
- expression: License expression (string or LicenseExpression)
- unique: Only return unique keys (default: True)
- **kwargs: Additional keyword arguments
Returns:
List of license key strings found in the expression
"""
def unknown_license_symbols(self, expression, unique: bool = True, **kwargs) -> list:
"""
Return a list of unknown license symbols used in an expression.
Parameters:
- expression: License expression (string or LicenseExpression)
- unique: Only return unique symbols (default: True)
- **kwargs: Additional keyword arguments
Returns:
List of unknown LicenseSymbol objects found in the expression
"""
def unknown_license_keys(self, expression, unique: bool = True, **kwargs) -> list:
"""
Return a list of unknown license keys used in an expression.
Parameters:
- expression: License expression (string or LicenseExpression)
- unique: Only return unique keys (default: True)
- **kwargs: Additional keyword arguments
Returns:
List of unknown license key strings found in the expression
"""
def validate_license_keys(self, expression):
"""
Validate license keys in an expression, raising ExpressionError for unknown keys.
Parameters:
- expression: License expression (string or LicenseExpression)
Raises:
- ExpressionError: If unknown license keys are found
"""
def dedup(self, expression):
"""
Return a deduplicated LicenseExpression given a license expression.
Deduplication is similar to simplification but specialized for license expressions.
Unlike simplification, choices (OR expressions) are kept as-is to preserve options.
Parameters:
- expression: License expression (string or LicenseExpression)
Returns:
Deduplicated LicenseExpression object
"""
def tokenize(self, expression: str, strict: bool = False, simple: bool = False):
"""
Return an iterable of 3-tuple describing each token in an expression string.
Parameters:
- expression: License expression string to tokenize
- strict: Use strict validation for WITH expressions (default: False)
- simple: Use simple tokenizer assuming no spaces in license symbols (default: False)
Returns:
Iterator of (token_obj, token_string, position) tuples
"""
def get_advanced_tokenizer(self):
"""
Return an AdvancedTokenizer instance for this Licensing.
Returns:
AdvancedTokenizer instance that recognizes known symbol keys and aliases
"""
def simple_tokenizer(self, expression: str):
"""
Return an iterable of Token from an expression string using simple tokenization.
Parameters:
- expression: License expression string to tokenize
Returns:
Iterator of Token objects from simple tokenization
"""
def advanced_tokenizer(self, expression: str):
"""
Return an iterable of Token from an expression string using advanced tokenization.
Parameters:
- expression: License expression string to tokenize
Returns:
Iterator of Token objects from advanced tokenization
"""from license_expression import Licensing
licensing = Licensing()
expr = licensing.parse('MIT or Apache-2.0')
print(str(expr)) # 'MIT OR Apache-2.0'# Validate an expression with unknown licenses
result = licensing.validate('MIT and UnknownLicense')
print(result.errors) # ['Unknown license key(s): UnknownLicense']
print(result.invalid_symbols) # ['UnknownLicense']
# Strict parsing raises exceptions
try:
licensing.parse('MIT and UnknownLicense', validate=True, strict=True)
except ExpressionParseError as e:
print(f"Parse error: {e}")# Test equivalence
expr1 = 'MIT or (Apache-2.0 and GPL-2.0)'
expr2 = '(GPL-2.0 and Apache-2.0) or MIT'
print(licensing.is_equivalent(expr1, expr2)) # True
# Test containment
broad_expr = 'MIT or Apache-2.0 or GPL-2.0'
specific_expr = 'MIT'
print(licensing.contains(broad_expr, specific_expr)) # Trueexpression = 'MIT with Exception or (Apache-2.0 and GPL-2.0+)'
symbols = licensing.license_symbols(expression)
for symbol in symbols:
print(f"Key: {symbol.key}, Exception: {symbol.is_exception}")The Licensing class provides boolean operators for combining expressions:
class Licensing:
AND: type # Boolean AND operator class
OR: type # Boolean OR operator class
def __call__(self, *args, **kwargs):
"""Create boolean expressions using the licensing instance as callable."""# Create expressions using boolean operations
mit = licensing.parse('MIT')
apache = licensing.parse('Apache-2.0')
# Combine with AND
combined_and = licensing.AND(mit, apache)
print(str(combined_and)) # 'MIT AND Apache-2.0'
# Combine with OR
combined_or = licensing.OR(mit, apache)
print(str(combined_or)) # 'MIT OR Apache-2.0'# Simplify complex expressions
complex_expr = licensing.parse('MIT or (MIT and Apache-2.0) or MIT')
simplified = complex_expr.simplify()
print(str(simplified)) # 'MIT'
# Simplify with normalization
redundant = licensing.parse('(A and B) or (B and A) or A')
simplified = redundant.simplify()
print(str(simplified)) # 'A OR (A AND B)' or similar simplified formfrom license_expression import get_spdx_licensing
licensing = get_spdx_licensing()
expression = 'MIT or (Apache-2.0 and GPL-2.0+) or MIT'
# Get primary license information
primary_symbol = licensing.primary_license_symbol(expression)
primary_key = licensing.primary_license_key(expression)
print(f"Primary license: {primary_key}") # 'MIT'
# Get all license keys (with and without duplicates)
all_keys = licensing.license_keys(expression, unique=False)
unique_keys = licensing.license_keys(expression, unique=True)
print(f"All keys: {all_keys}") # ['MIT', 'Apache-2.0', 'GPL-2.0+', 'MIT']
print(f"Unique keys: {unique_keys}") # ['MIT', 'Apache-2.0', 'GPL-2.0+']
# Find unknown licenses
unknown_keys = licensing.unknown_license_keys('MIT and UnknownLicense')
print(f"Unknown keys: {unknown_keys}") # ['UnknownLicense']# Deduplication preserves license choices (unlike simplification)
complex_expr = 'MIT or Apache-2.0 or MIT or (GPL-2.0 and MIT)'
deduplicated = licensing.dedup(complex_expr)
print(str(deduplicated)) # Removes MIT duplicates but preserves structure
# Compare with simplification
simplified = licensing.parse(complex_expr).simplify()
print(str(simplified)) # May be more aggressively simplifiedexpression = 'MIT WITH Classpath-exception-2.0 OR Apache-2.0'
# Tokenize with different methods
for token_obj, token_string, position in licensing.tokenize(expression):
print(f"Token: '{token_string}' at position {position}, type: {type(token_obj)}")
# Get advanced tokenizer for reuse
tokenizer = licensing.get_advanced_tokenizer()Install with Tessl CLI
npx tessl i tessl/pypi-license-expression