CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-raven

Legacy Python client for Sentry error monitoring service with framework integration and exception tracking capabilities.

Pending
Overview
Eval results
Files

data-processing.mddocs/

Data Processing

Raven provides a comprehensive data processing pipeline for sanitizing sensitive information, transforming data structures, and controlling what information is sent to Sentry servers.

Capabilities

Base Processor

Foundation class for all data processors.

from raven.processors import Processor

class Processor:
    def __init__(self, client):
        """
        Base data processor.

        Parameters:
        - client (Client): Sentry client instance
        """

    def process(self, data, **kwargs):
        """
        Process event data.

        Parameters:
        - data (dict): Event data to process
        - **kwargs: Additional processing options

        Returns:
        dict: Processed event data
        """

Password Sanitization

Removes sensitive data like passwords and authentication tokens from event data.

from raven.processors import SanitizePasswordsProcessor

class SanitizePasswordsProcessor(SanitizeKeysProcessor):
    KEYS = frozenset([
        'password', 'secret', 'passwd', 'authorization', 'api_key',
        'apikey', 'sentry_dsn', 'access_token'
    ])
    
    VALUES_RE = re.compile(r'^(?:\d[ -]*?){13,16}$')
    MASK = '*' * 8

    def sanitize(self, item, value):
        """
        Sanitize field values, masking passwords and credit card numbers.

        Parameters:
        - item (str): Field name/key
        - value: Field value

        Returns:
        Sanitized value with sensitive data masked
        """

    def process(self, data, **kwargs):
        """
        Remove sensitive fields from event data.

        Parameters:
        - data (dict): Event data

        Returns:
        dict: Sanitized event data
        """

Key-Based Sanitization

Removes data matching configurable key patterns using regular expressions.

from raven.processors import SanitizeKeysProcessor

class SanitizeKeysProcessor(Processor):
    def __init__(self, client, sanitize_keys=None):
        """
        Key pattern-based sanitizer.

        Parameters:
        - client (Client): Sentry client instance
        - sanitize_keys (list): List of regex patterns for keys to sanitize
        """
        
    KEYS = frozenset([
        'password', 'secret', 'passwd', 'token', 'api_key',
        'access_token', 'auth_token', 'credentials'
    ])
    
    def process(self, data, **kwargs):
        """
        Sanitize data based on key patterns.

        Parameters:
        - data (dict): Event data

        Returns:
        dict: Sanitized event data
        """

POST Data Removal

Removes HTTP POST data from request information.

from raven.processors import RemovePostDataProcessor

class RemovePostDataProcessor(Processor):
    def process(self, data, **kwargs):
        """
        Remove HTTP POST data from event.

        Parameters:
        - data (dict): Event data

        Returns:
        dict: Event data with POST data removed
        """

Stack Locals Removal

Removes local variables from stack trace frames to reduce data size and prevent sensitive information leakage.

from raven.processors import RemoveStackLocalsProcessor

class RemoveStackLocalsProcessor(Processor):
    def process(self, data, **kwargs):
        """
        Remove local variables from stack traces.

        Parameters:
        - data (dict): Event data

        Returns:
        dict: Event data with stack locals removed
        """

Data Transformation Utilities

Core utilities for data processing and serialization.

from raven.utils.serializer import transform, register

def transform(data, **kwargs):
    """
    Transform data for serialization.

    Parameters:
    - data: Data to transform
    - **kwargs: Transformation options

    Returns:
    Serializable data structure
    """

def register(type_class, serializer):
    """
    Register custom serializer for data type.

    Parameters:
    - type_class (type): Data type to serialize
    - serializer (callable): Serialization function
    """

Usage Examples

Basic Processor Configuration

from raven import Client
from raven.processors import SanitizePasswordsProcessor, RemovePostDataProcessor

client = Client(
    dsn='https://your-dsn@sentry.io/project-id',
    processors=[
        SanitizePasswordsProcessor,
        RemovePostDataProcessor,
    ]
)

# These fields will be sanitized automatically
user_data = {
    'username': 'john_doe',
    'password': 'secret123',  # Will be masked
    'email': 'john@example.com'
}

client.extra_context({'user_data': user_data})
client.captureMessage('User login attempt')

Custom Sanitization Keys

from raven import Client
from raven.processors import SanitizeKeysProcessor

class CustomSanitizeProcessor(SanitizeKeysProcessor):
    KEYS = frozenset([
        'password', 'secret', 'token', 'api_key',
        'credit_card', 'ssn', 'social_security',
        'bank_account', 'routing_number'
    ])

client = Client(
    dsn='https://your-dsn@sentry.io/project-id',
    processors=[CustomSanitizeProcessor]
)

Pattern-Based Sanitization

from raven import Client
from raven.processors import SanitizeKeysProcessor
import re

class RegexSanitizeProcessor(SanitizeKeysProcessor):
    def __init__(self, client):
        super().__init__(client)
        self.sanitize_patterns = [
            re.compile(r'.*password.*', re.IGNORECASE),
            re.compile(r'.*secret.*', re.IGNORECASE),
            re.compile(r'.*token.*', re.IGNORECASE),
            re.compile(r'.*key.*', re.IGNORECASE),
            re.compile(r'.*auth.*', re.IGNORECASE),
        ]
    
    def sanitize(self, key, value):
        if any(pattern.match(key) for pattern in self.sanitize_patterns):
            return self.MASK
        return value

client = Client(
    dsn='https://your-dsn@sentry.io/project-id',
    processors=[RegexSanitizeProcessor]
)

Custom Data Processor

from raven.processors import Processor

class EmailSanitizeProcessor(Processor):
    def process(self, data, **kwargs):
        def sanitize_emails(obj):
            if isinstance(obj, dict):
                return {
                    key: sanitize_emails(value) 
                    for key, value in obj.items()
                }
            elif isinstance(obj, list):
                return [sanitize_emails(item) for item in obj]
            elif isinstance(obj, str) and '@' in obj:
                # Simple email detection and masking
                if obj.count('@') == 1 and '.' in obj.split('@')[1]:
                    user, domain = obj.split('@')
                    return f"{user[0]}***@{domain}"
            return obj
        
        return sanitize_emails(data)

class PIISanitizeProcessor(Processor):
    def process(self, data, **kwargs):
        import re
        
        def sanitize_pii(obj):
            if isinstance(obj, dict):
                return {
                    key: sanitize_pii(value)
                    for key, value in obj.items()
                }
            elif isinstance(obj, list):
                return [sanitize_pii(item) for item in obj]
            elif isinstance(obj, str):
                # Sanitize SSN pattern (XXX-XX-XXXX)
                obj = re.sub(r'\d{3}-\d{2}-\d{4}', 'XXX-XX-XXXX', obj)
                # Sanitize credit card pattern
                obj = re.sub(r'\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}', 
                           'XXXX-XXXX-XXXX-XXXX', obj)
                # Sanitize phone numbers
                obj = re.sub(r'\(\d{3}\)\s?\d{3}-\d{4}', '(XXX) XXX-XXXX', obj)
            return obj
        
        return sanitize_pii(data)

client = Client(
    dsn='https://your-dsn@sentry.io/project-id',
    processors=[
        EmailSanitizeProcessor,
        PIISanitizeProcessor,
        'raven.processors.SanitizePasswordsProcessor'
    ]
)

Environment-Specific Processing

import os
from raven import Client
from raven.processors import (
    SanitizePasswordsProcessor,
    RemovePostDataProcessor,
    RemoveStackLocalsProcessor
)

def get_processors():
    processors = [SanitizePasswordsProcessor]
    
    if os.getenv('ENVIRONMENT') == 'production':
        # More aggressive sanitization in production
        processors.extend([
            RemovePostDataProcessor,
            RemoveStackLocalsProcessor
        ])
    
    return processors

client = Client(
    dsn='https://your-dsn@sentry.io/project-id',
    processors=get_processors()
)

Custom Serializer Registration

from raven.utils.serializer import register, transform
from decimal import Decimal
import datetime

# Custom serializers for non-JSON types
def serialize_decimal(obj):
    return float(obj)

def serialize_datetime(obj):
    return obj.isoformat()

def serialize_custom_class(obj):
    return {
        'type': obj.__class__.__name__,
        'value': str(obj),
        'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}
    }

# Register custom serializers
register(Decimal, serialize_decimal)
register(datetime.datetime, serialize_datetime)
register(MyCustomClass, serialize_custom_class)

# Now these types will be properly serialized
data = {
    'price': Decimal('19.99'),
    'timestamp': datetime.datetime.now(),
    'custom_obj': MyCustomClass()
}

client.extra_context({'data': data})
client.captureMessage('Custom data types')

Processor Performance Optimization

from raven.processors import Processor
import time

class PerformanceTrackingProcessor(Processor):
    def __init__(self, client):
        super().__init__(client)
        self.processing_times = []
    
    def process(self, data, **kwargs):
        start_time = time.time()
        
        # Process data here
        processed_data = self._process_internal(data)
        
        processing_time = time.time() - start_time
        self.processing_times.append(processing_time)
        
        # Log slow processing
        if processing_time > 0.1:  # 100ms threshold
            print(f"Slow data processing: {processing_time:.3f}s")
        
        return processed_data
    
    def _process_internal(self, data):
        # Your actual processing logic
        return data

class ConditionalProcessor(Processor):
    def process(self, data, **kwargs):
        # Skip processing for certain event types
        if data.get('logger') == 'performance':
            return data
            
        # Skip for low-priority events
        if data.get('level') == 'debug':
            return data
            
        return self._sanitize_data(data)

Install with Tessl CLI

npx tessl i tessl/pypi-raven

docs

breadcrumb-system.md

context-management.md

core-client.md

data-processing.md

framework-integrations.md

index.md

logging-integration.md

transport-layer.md

tile.json