CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-voluptuous

Python data validation library for validating nested data structures with comprehensive error reporting

Pending
Overview
Eval results
Files

string-pattern-validators.mddocs/

String & Pattern Validation

Regular expression matching, string replacement, email and URL validation, and file system path validation. These validators handle text patterns, network addresses, and file system operations.

Capabilities

Regular Expression Matching

Validate strings against regular expression patterns.

class Match:
    def __init__(self, pattern, msg=None):
        """
        Validate string matches regex pattern.
        
        Parameters:
        - pattern: Regular expression pattern (string or compiled regex)
        - msg: Custom error message if pattern doesn't match
        
        Returns:
        Original string if pattern matches
        
        Raises:
        MatchInvalid: If string doesn't match pattern
        """

Usage Examples:

from voluptuous import Schema, Match, All

# Username validation
username_validator = Match(r'^[a-zA-Z0-9_]{3,20}$')

# Phone number validation (flexible format)
phone_validator = Match(r'^\+?[\d\s\-\(\)]{10,}$')

# Version string validation (semantic versioning)
version_validator = Match(r'^\d+\.\d+\.\d+(-[a-zA-Z0-9\-]+)?$')

# Combined validation
user_schema = Schema({
    'username': All(str, username_validator),
    'phone': All(str, phone_validator),
    'version': All(str, version_validator),
})

# Valid examples:
user_schema({
    'username': 'john_doe123',
    'phone': '+1 (555) 123-4567', 
    'version': '1.2.3-beta',
})

String Pattern Replacement

Perform regex-based string substitution during validation.

class Replace:
    def __init__(self, pattern, substitution, msg=None):
        """
        Perform regex substitution on string.
        
        Parameters:
        - pattern: Regular expression pattern to match
        - substitution: Replacement string (can include regex groups like \\1, \\2)
        - msg: Custom error message if replacement fails
        
        Returns:
        String with pattern replaced by substitution
        
        Raises:
        Invalid: If input is not a string or regex operation fails
        """

Usage Examples:

from voluptuous import Schema, Replace, All

# Normalize phone numbers by removing formatting
normalize_phone = Replace(r'[\s\-\(\)]', '')  # Remove spaces, hyphens, parentheses

# Clean up whitespace (multiple spaces -> single space)
normalize_whitespace = Replace(r'\s+', ' ')

# Extract and reformat dates (MM/DD/YYYY -> YYYY-MM-DD)
reformat_date = Replace(r'(\d{2})/(\d{2})/(\d{4})', r'\3-\1-\2')

# Sanitize user input
sanitize_username = Replace(r'[^a-zA-Z0-9_]', '_')  # Replace invalid chars with underscore

# Combined transformations
phone_schema = Schema(All(
    str,
    normalize_phone,              # Remove formatting
    Match(r'^\+?\d{10,}$'),      # Validate clean number
))

text_schema = Schema(All(
    str,
    normalize_whitespace,         # Clean whitespace
    lambda s: s.strip(),         # Remove leading/trailing spaces
))

# Usage:
phone_schema('+1 (555) 123-4567')  # -> '+15551234567'
text_schema('hello    world   ')   # -> 'hello world'

Email Validation

Comprehensive email address format validation.

def Email(v):
    """
    Validate email address format.
    
    Parameters:
    - v: String to validate as email address
    
    Validation includes:
    - User part: alphanumeric, dots, hyphens, underscores, plus signs
    - Domain part: valid domain name or IP address
    - Overall format: user@domain structure
    
    Returns:
    Original email string if valid
    
    Raises:
    EmailInvalid: If email format is invalid
    """

Usage Examples:

from voluptuous import Schema, Email, All, Lower

# Basic email validation
user_schema = Schema({
    'email': Email(),
})

# Normalized email validation
normalized_email = Schema(All(
    str,
    Lower,                        # Convert to lowercase
    lambda s: s.strip(),         # Remove whitespace
    Email(),                     # Validate format
))

# Multiple email addresses
email_list_schema = Schema([Email()])

# Valid emails:
Email()('user@example.com')
Email()('first.last+tag@subdomain.example.org')
Email()('user123@example-site.com')

# Invalid emails would raise EmailInvalid:
# Email()('invalid.email')      # Missing @
# Email()('@example.com')       # Missing user part
# Email()('user@')              # Missing domain

URL Validation

Validate URL format with scheme and netloc requirements.

def Url(v):
    """
    Validate URL format.
    
    Parameters:
    - v: String to validate as URL
    
    Requirements:
    - Must have scheme (http, https, ftp, etc.)
    - Must have netloc (domain/host)
    - Validates overall URL structure
    
    Returns:
    Original URL string if valid
    
    Raises:
    UrlInvalid: If URL format is invalid
    """

def FqdnUrl(v):
    """
    Validate fully qualified domain name URL.
    
    Parameters:
    - v: String to validate as FQDN URL
    
    Requirements:
    - Same as Url() requirements
    - Domain must contain dots (be fully qualified)
    
    Returns:
    Original URL string if valid
    
    Raises:
    UrlInvalid: If URL format is invalid or domain not fully qualified
    """

Usage Examples:

from voluptuous import Schema, Url, FqdnUrl, Any, Optional

# Basic URL validation
api_schema = Schema({
    'endpoint': Url(),
    'callback_url': Optional(Url()),
})

# Strict FQDN URL validation
external_service_schema = Schema({
    'webhook_url': FqdnUrl(),     # Must be fully qualified domain
})

# Flexible URL validation
link_schema = Schema({
    'homepage': Any(None, Url()),  # Optional URL
})

# Valid URLs:
Url()('https://example.com/api/v1')
Url()('http://localhost:8080/path')
Url()('ftp://files.example.org/')

FqdnUrl()('https://api.service.com/webhook')  # Valid FQDN
# FqdnUrl()('http://localhost/path')          # Invalid - not FQDN

File System Validation

Validate file system paths and check for existence.

def IsFile(v):
    """
    Verify path points to an existing file.
    
    Parameters:
    - v: String path to validate
    
    Returns:
    Original path string if file exists
    
    Raises:
    FileInvalid: If path doesn't exist or is not a file
    """

def IsDir(v):
    """
    Verify path points to an existing directory.
    
    Parameters:
    - v: String path to validate
    
    Returns:
    Original path string if directory exists
    
    Raises:
    DirInvalid: If path doesn't exist or is not a directory
    """

def PathExists(v):
    """
    Verify path exists (file or directory).
    
    Parameters:
    - v: String path to validate
    
    Returns:
    Original path string if path exists
    
    Raises:
    PathInvalid: If path doesn't exist
    """

Usage Examples:

from voluptuous import Schema, IsFile, IsDir, PathExists, Any
import os

# Configuration file validation
config_schema = Schema({
    'config_file': IsFile,        # Must be existing file
    'log_dir': IsDir,             # Must be existing directory
    'data_path': PathExists,      # Must exist (file or directory)
})

# Flexible path validation
backup_schema = Schema({
    'source': PathExists,         # Source must exist
    'destination': Any(IsDir, lambda path: os.path.dirname(path) and IsDir(os.path.dirname(path))),  # Dest dir must exist
})

# Development environment validation
dev_schema = Schema({
    'project_root': IsDir,
    'requirements_file': IsFile,
    'virtual_env': Any(None, IsDir),  # Optional virtual environment
})

# Valid paths (assuming they exist):
config_schema({
    'config_file': '/etc/myapp/config.yaml',
    'log_dir': '/var/log/myapp',
    'data_path': '/opt/myapp/data',
})

Date and Time Validation

Validate date and datetime string formats with custom or default patterns.

class Datetime:
    def __init__(self, format=None, msg=None):
        """
        Validate datetime string format.
        
        Parameters:
        - format: Datetime format string (default: '%Y-%m-%dT%H:%M:%S.%fZ')
        - msg: Custom error message
        
        Returns:
        Original datetime string if valid format
        
        Raises:
        DatetimeInvalid: If string doesn't match expected datetime format
        """

class Date:
    def __init__(self, format=None, msg=None):
        """
        Validate date string format.
        
        Parameters:
        - format: Date format string (default: '%Y-%m-%d')
        - msg: Custom error message
        
        Returns:
        Original date string if valid format
        
        Raises:
        DateInvalid: If string doesn't match expected date format
        """

Usage Examples:

from voluptuous import Schema, Datetime, Date, All

# Default ISO format validation
event_schema = Schema({
    'start_time': Datetime(),        # ISO 8601 format: 2023-12-25T14:30:00.000Z
    'end_date': Date(),              # ISO date format: 2023-12-25
})

# Custom format validation
custom_format_schema = Schema({
    'created_at': Datetime('%Y-%m-%d %H:%M:%S'),     # Custom datetime format
    'birth_date': Date('%m/%d/%Y'),                  # US date format
    'log_time': Datetime('%Y%m%d_%H%M%S'),          # Compact format
})

# Flexible date validation
flexible_date_schema = Schema({
    'date': Any(
        Date(),                      # ISO format
        Date('%m/%d/%Y'),           # US format
        Date('%d-%m-%Y'),           # European format
    )
})

# Valid examples:
event_schema({
    'start_time': '2023-12-25T14:30:00.000Z',       # Valid ISO datetime
    'end_date': '2023-12-26',                       # Valid ISO date
})

custom_format_schema({
    'created_at': '2023-12-25 14:30:00',            # Custom datetime format
    'birth_date': '12/25/1990',                     # US date format
    'log_time': '20231225_143000',                  # Compact format
})

Pattern Validation Patterns

Common patterns for complex text validation.

Multi-stage Text Validation:

from voluptuous import Schema, All, Match, Replace, Length

# Clean and validate user input
clean_text_validator = All(
    str,                          # Ensure string
    lambda s: s.strip(),         # Remove whitespace
    Replace(r'\s+', ' '),        # Normalize internal whitespace
    Length(min=1, max=1000),     # Validate length
    Match(r'^[^<>{}]*$'),        # No HTML-like brackets
)

# Secure filename validation
safe_filename = All(
    str,
    Replace(r'[^\w\-_\.]', '_'), # Replace unsafe characters
    Match(r'^[^\.][^/\\]*$'),    # No leading dot, no path separators
    Length(min=1, max=255),      # Reasonable length limits
)

Flexible Format Validation:

from voluptuous import Schema, Any, Match, All

# Multiple ID formats
flexible_id = Any(
    Match(r'^\d+$'),             # Numeric ID
    Match(r'^[A-Z]{2,3}-\d+$'),  # Prefixed ID (AB-123, ABC-456)
    Match(r'^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$'),  # UUID
)

# Multiple date formats
flexible_date = Any(
    Match(r'^\d{4}-\d{2}-\d{2}$'),          # ISO format (YYYY-MM-DD)
    Match(r'^\d{2}/\d{2}/\d{4}$'),          # US format (MM/DD/YYYY)
    Match(r'^\d{2}-\d{2}-\d{4}$'),          # EU format (DD-MM-YYYY)
)

Input Sanitization:

from voluptuous import Schema, All, Replace

# HTML tag removal
strip_html = Replace(r'<[^>]*>', '')

# SQL injection prevention (basic)
escape_sql_chars = Replace(r"[';\"\\]", lambda m: '\\' + m.group(0))

# XSS prevention
sanitize_js = Replace(r'<script[^>]*>.*?</script>', '', flags=re.IGNORECASE | re.DOTALL)

# Combined sanitization
safe_user_input = All(
    str,
    strip_html,                   # Remove HTML tags
    Replace(r'[^\w\s\-_@.]', ''), # Keep only safe characters
    lambda s: s.strip(),         # Remove whitespace
    Length(min=1, max=500),      # Reasonable limits
)

Advanced Pattern Matching:

from voluptuous import Schema, Match, All
import re

# Case-insensitive matching
case_insensitive_match = lambda pattern: lambda value: Match(pattern, re.IGNORECASE)(value)

# Multi-line pattern matching
multiline_match = lambda pattern: lambda value: Match(pattern, re.MULTILINE | re.DOTALL)(value)

# Complex validation combining multiple patterns
complex_validator = All(
    str,
    Match(r'^[A-Z]'),            # Must start with uppercase
    Match(r'[a-z]'),             # Must contain lowercase
    Match(r'\d'),                # Must contain digit
    Match(r'^.{8,}$'),           # Must be at least 8 characters
    lambda s: not Match(r'(.)\1{2,}')(s),  # No more than 2 consecutive same chars
)

Install with Tessl CLI

npx tessl i tessl/pypi-voluptuous

docs

core-schema.md

error-handling.md

index.md

range-collection-validators.md

string-pattern-validators.md

type-validators.md

utility-transformers.md

validation-composers.md

tile.json