CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-vcrpy

Automatically mock your HTTP interactions to simplify and speed up testing

Overview
Eval results
Files

data-filtering.mddocs/

Data Filtering

Functions for removing or replacing sensitive data in requests and responses before recording to cassettes. VCR.py provides comprehensive filtering capabilities to sanitize sensitive information while maintaining test functionality.

Capabilities

Header Filtering

Functions for modifying request and response headers before recording.

def replace_headers(request: Request, replacements: list) -> Request:
    """
    Replace headers in request according to replacements list.
    
    Args:
        request: Request object to modify
        replacements: List of (key, value) tuples where value can be:
            - str: Simple replacement value
            - None: Remove the header entirely
            - callable: Function(key, value, request) -> str or None
            
    Returns:
        Request: Modified request object
    """

def remove_headers(request: Request, headers_to_remove: list) -> Request:
    """
    Remove specified headers from request.
    
    Args:
        request: Request object to modify
        headers_to_remove: List of header names to remove
        
    Returns:
        Request: Request with specified headers removed
    """

Query Parameter Filtering

Functions for sanitizing URL query parameters.

def replace_query_parameters(request: Request, replacements: list) -> Request:
    """
    Replace or remove query parameters from request URI.
    
    Args:
        request: Request object to modify
        replacements: List of (param_name, value) tuples where value can be:
            - str: Replacement value
            - None: Remove parameter entirely
            - callable: Function(key, value, request) -> str or None
            
    Returns:
        Request: Request with modified query parameters
    """

POST Data Filtering

Functions for sanitizing form data and request body parameters.

def replace_post_data_parameters(request: Request, replacements: list) -> Request:
    """
    Replace or remove POST data parameters from request body.
    
    Args:
        request: Request object to modify  
        replacements: List of (param_name, value) tuples where value can be:
            - str: Replacement value
            - None: Remove parameter entirely
            - callable: Function(key, value, request) -> str or None
            
    Returns:
        Request: Request with modified POST data
    """

Response Filtering

Functions for processing response content before recording.

def decode_response(response) -> Response:
    """
    Decode compressed response content (gzip, deflate).
    
    Args:
        response: Response object to decode
        
    Returns:
        Response: Response with decoded content
    """

Usage Examples

Basic Header Filtering

import vcr

# Filter sensitive headers
my_vcr = vcr.VCR(
    filter_headers=['authorization', 'x-api-key', 'cookie']
)

@my_vcr.use_cassette('filtered.yaml')
def test_with_filtered_headers():
    # Authorization headers will be removed from recorded cassette
    response = requests.get(
        'https://api.example.com/data',
        headers={'Authorization': 'Bearer secret-token'}
    )

Header Replacement with Custom Values

# Replace headers with static values
my_vcr = vcr.VCR(
    filter_headers=[
        ('authorization', 'Bearer REDACTED'),
        ('x-api-key', 'FILTERED'),
        ('user-agent', None)  # Remove entirely
    ]
)

Dynamic Header Filtering

def sanitize_auth_header(key, value, request):
    """Custom function to sanitize authorization headers."""
    if value.startswith('Bearer '):
        return 'Bearer [FILTERED-TOKEN]'
    elif value.startswith('Basic '):
        return 'Basic [FILTERED-CREDENTIALS]'
    else:
        return '[FILTERED-AUTH]'

my_vcr = vcr.VCR(
    filter_headers=[
        ('authorization', sanitize_auth_header),
        ('x-session-id', lambda k, v, r: 'session-redacted')
    ]
)

Query Parameter Filtering

# Remove sensitive query parameters
my_vcr = vcr.VCR(
    filter_query_parameters=['api_key', 'access_token', 'session_id']
)

@my_vcr.use_cassette('no_secrets.yaml')
def test_filtered_query_params():
    # These parameters will be removed from recorded URLs
    response = requests.get(
        'https://api.example.com/data?api_key=secret123&user_id=456'
    )
    # Recorded URL: https://api.example.com/data?user_id=456

Query Parameter Replacement

my_vcr = vcr.VCR(
    filter_query_parameters=[
        ('api_key', 'REDACTED'),
        ('timestamp', lambda k, v, r: '1234567890'),  # Fixed timestamp
        ('nonce', None)  # Remove entirely
    ]
)

POST Data Filtering

# Filter form data parameters
my_vcr = vcr.VCR(
    filter_post_data_parameters=['password', 'credit_card', 'ssn']
)

@my_vcr.use_cassette('safe_posts.yaml')
def test_filtered_post_data():
    # Sensitive form data will be removed from recordings
    response = requests.post(
        'https://api.example.com/submit',
        data={'username': 'john', 'password': 'secret123', 'email': 'john@example.com'}
    )
    # Recorded data: {'username': 'john', 'email': 'john@example.com'}

Custom POST Data Processing

def mask_credit_card(key, value, request):
    """Mask credit card numbers but preserve format."""
    if len(value) == 16 and value.isdigit():
        return f"****-****-****-{value[-4:]}"
    return value

my_vcr = vcr.VCR(
    filter_post_data_parameters=[
        ('password', '[FILTERED]'),
        ('card_number', mask_credit_card)
    ]
)

Response Content Filtering

def filter_response_data(response):
    """Custom response filtering function."""
    import json
    
    try:
        # Parse JSON response
        data = json.loads(response['body']['string'])
        
        # Remove sensitive fields
        if 'user' in data:
            data['user'].pop('email', None)
            data['user'].pop('phone', None)
        
        # Mask API keys in response
        if 'api_keys' in data:
            data['api_keys'] = ['[REDACTED]'] * len(data['api_keys'])
        
        # Update response body
        response['body']['string'] = json.dumps(data)
    except (json.JSONDecodeError, KeyError, TypeError):
        # Non-JSON response or missing fields - leave unchanged
        pass
    
    return response

my_vcr = vcr.VCR(
    before_record_response=filter_response_data,
    decode_compressed_response=True  # Decode gzipped responses first
)

Comprehensive Request Filtering

def comprehensive_request_filter(request):
    """Apply multiple filtering operations to requests."""
    import json
    from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
    
    # Filter headers
    sensitive_headers = ['authorization', 'cookie', 'x-api-key']
    for header in sensitive_headers:
        if header in request.headers:
            request.headers[header] = '[FILTERED]'
    
    # Filter query parameters
    parsed_url = urlparse(request.uri)
    query_params = parse_qs(parsed_url.query)
    
    # Remove sensitive query parameters
    for param in ['api_key', 'access_token', 'session']:
        query_params.pop(param, None)
    
    # Reconstruct URL
    new_query = urlencode({k: v[0] for k, v in query_params.items()})
    new_url = urlunparse((
        parsed_url.scheme, parsed_url.netloc, parsed_url.path,
        parsed_url.params, new_query, parsed_url.fragment
    ))
    request.uri = new_url
    
    # Filter JSON body content
    if request.body and request.headers.get('content-type', '').startswith('application/json'):
        try:
            data = json.loads(request.body)
            # Remove sensitive fields
            data.pop('password', None)
            data.pop('api_secret', None)
            request.body = json.dumps(data)
        except (json.JSONDecodeError, TypeError):
            pass
    
    return request

my_vcr = vcr.VCR(before_record_request=comprehensive_request_filter)

Advanced Filtering Patterns

Conditional Filtering

def smart_header_filter(key, value, request):
    """Apply different filtering based on request context."""
    if request.host == 'internal-api.company.com':
        # More permissive for internal APIs
        return value if key.lower() != 'authorization' else '[INTERNAL-AUTH]'
    else:
        # Strict filtering for external APIs
        return '[FILTERED]'

my_vcr = vcr.VCR(
    filter_headers=[('authorization', smart_header_filter)]
)

Environment-Based Filtering

import os

def get_filter_config():
    """Get filtering configuration based on environment."""
    if os.getenv('VCR_FILTER_MODE') == 'strict':
        return {
            'filter_headers': ['authorization', 'cookie', 'x-api-key', 'user-agent'],
            'filter_query_parameters': ['api_key', 'token', 'session', 'timestamp'],
            'filter_post_data_parameters': ['password', 'secret', 'key']
        }
    elif os.getenv('VCR_FILTER_MODE') == 'minimal':
        return {
            'filter_headers': ['authorization'],
            'filter_query_parameters': ['api_key'],
            'filter_post_data_parameters': ['password']
        }
    else:
        return {}

my_vcr = vcr.VCR(**get_filter_config())

Chain Filtering

def create_filter_chain(*filters):
    """Create a chain of filter functions."""
    def chain_filter(request_or_response):
        result = request_or_response
        for filter_func in filters:
            result = filter_func(result)
            if result is None:
                break
        return result
    return chain_filter

# Individual filter functions
def remove_auth(request):
    request.headers.pop('authorization', None)
    return request

def sanitize_urls(request):
    # Custom URL sanitization logic
    return request

def mask_body_secrets(request):
    # Custom body masking logic
    return request

# Combine filters
combined_filter = create_filter_chain(
    remove_auth,
    sanitize_urls, 
    mask_body_secrets
)

my_vcr = vcr.VCR(before_record_request=combined_filter)

Binary Content Handling

def handle_binary_responses(response):
    """Handle binary response content appropriately."""
    content_type = response.get('headers', {}).get('content-type', [''])[0]
    
    if content_type.startswith('image/'):
        # Replace image data with placeholder
        response['body']['string'] = b'[BINARY-IMAGE-DATA-REMOVED]'
    elif content_type.startswith('application/pdf'):
        # Replace PDF data with placeholder
        response['body']['string'] = b'[BINARY-PDF-DATA-REMOVED]'
    elif 'zip' in content_type or 'octet-stream' in content_type:
        # Replace binary data with size information
        original_size = len(response['body']['string'])
        response['body']['string'] = f'[BINARY-DATA-{original_size}-BYTES]'.encode()
    
    return response

my_vcr = vcr.VCR(before_record_response=handle_binary_responses)

Install with Tessl CLI

npx tessl i tessl/pypi-vcrpy

docs

core-configuration.md

data-filtering.md

error-handling.md

index.md

record-modes.md

request-matching.md

request-response.md

serialization.md

test-integration.md

tile.json