0
# Data Processing
1
2
Raven provides a comprehensive data processing pipeline for sanitizing sensitive information, transforming data structures, and controlling what information is sent to Sentry servers.
3
4
## Capabilities
5
6
### Base Processor
7
8
Foundation class for all data processors.
9
10
```python { .api }
11
from raven.processors import Processor
12
13
class Processor:
14
def __init__(self, client):
15
"""
16
Base data processor.
17
18
Parameters:
19
- client (Client): Sentry client instance
20
"""
21
22
def process(self, data, **kwargs):
23
"""
24
Process event data.
25
26
Parameters:
27
- data (dict): Event data to process
28
- **kwargs: Additional processing options
29
30
Returns:
31
dict: Processed event data
32
"""
33
```
34
35
### Password Sanitization
36
37
Removes sensitive data like passwords and authentication tokens from event data.
38
39
```python { .api }
40
from raven.processors import SanitizePasswordsProcessor
41
42
class SanitizePasswordsProcessor(SanitizeKeysProcessor):
43
KEYS = frozenset([
44
'password', 'secret', 'passwd', 'authorization', 'api_key',
45
'apikey', 'sentry_dsn', 'access_token'
46
])
47
48
VALUES_RE = re.compile(r'^(?:\d[ -]*?){13,16}$')
49
MASK = '*' * 8
50
51
def sanitize(self, item, value):
52
"""
53
Sanitize field values, masking passwords and credit card numbers.
54
55
Parameters:
56
- item (str): Field name/key
57
- value: Field value
58
59
Returns:
60
Sanitized value with sensitive data masked
61
"""
62
63
def process(self, data, **kwargs):
64
"""
65
Remove sensitive fields from event data.
66
67
Parameters:
68
- data (dict): Event data
69
70
Returns:
71
dict: Sanitized event data
72
"""
73
```
74
75
### Key-Based Sanitization
76
77
Removes data matching configurable key patterns using regular expressions.
78
79
```python { .api }
80
from raven.processors import SanitizeKeysProcessor
81
82
class SanitizeKeysProcessor(Processor):
83
def __init__(self, client, sanitize_keys=None):
84
"""
85
Key pattern-based sanitizer.
86
87
Parameters:
88
- client (Client): Sentry client instance
89
- sanitize_keys (list): List of regex patterns for keys to sanitize
90
"""
91
92
KEYS = frozenset([
93
'password', 'secret', 'passwd', 'token', 'api_key',
94
'access_token', 'auth_token', 'credentials'
95
])
96
97
def process(self, data, **kwargs):
98
"""
99
Sanitize data based on key patterns.
100
101
Parameters:
102
- data (dict): Event data
103
104
Returns:
105
dict: Sanitized event data
106
"""
107
```
108
109
### POST Data Removal
110
111
Removes HTTP POST data from request information.
112
113
```python { .api }
114
from raven.processors import RemovePostDataProcessor
115
116
class RemovePostDataProcessor(Processor):
117
def process(self, data, **kwargs):
118
"""
119
Remove HTTP POST data from event.
120
121
Parameters:
122
- data (dict): Event data
123
124
Returns:
125
dict: Event data with POST data removed
126
"""
127
```
128
129
### Stack Locals Removal
130
131
Removes local variables from stack trace frames to reduce data size and prevent sensitive information leakage.
132
133
```python { .api }
134
from raven.processors import RemoveStackLocalsProcessor
135
136
class RemoveStackLocalsProcessor(Processor):
137
def process(self, data, **kwargs):
138
"""
139
Remove local variables from stack traces.
140
141
Parameters:
142
- data (dict): Event data
143
144
Returns:
145
dict: Event data with stack locals removed
146
"""
147
```
148
149
### Data Transformation Utilities
150
151
Core utilities for data processing and serialization.
152
153
```python { .api }
154
from raven.utils.serializer import transform, register
155
156
def transform(data, **kwargs):
157
"""
158
Transform data for serialization.
159
160
Parameters:
161
- data: Data to transform
162
- **kwargs: Transformation options
163
164
Returns:
165
Serializable data structure
166
"""
167
168
def register(type_class, serializer):
169
"""
170
Register custom serializer for data type.
171
172
Parameters:
173
- type_class (type): Data type to serialize
174
- serializer (callable): Serialization function
175
"""
176
```
177
178
## Usage Examples
179
180
### Basic Processor Configuration
181
182
```python
183
from raven import Client
184
from raven.processors import SanitizePasswordsProcessor, RemovePostDataProcessor
185
186
client = Client(
187
dsn='https://your-dsn@sentry.io/project-id',
188
processors=[
189
SanitizePasswordsProcessor,
190
RemovePostDataProcessor,
191
]
192
)
193
194
# These fields will be sanitized automatically
195
user_data = {
196
'username': 'john_doe',
197
'password': 'secret123', # Will be masked
198
'email': 'john@example.com'
199
}
200
201
client.extra_context({'user_data': user_data})
202
client.captureMessage('User login attempt')
203
```
204
205
### Custom Sanitization Keys
206
207
```python
208
from raven import Client
209
from raven.processors import SanitizeKeysProcessor
210
211
class CustomSanitizeProcessor(SanitizeKeysProcessor):
212
KEYS = frozenset([
213
'password', 'secret', 'token', 'api_key',
214
'credit_card', 'ssn', 'social_security',
215
'bank_account', 'routing_number'
216
])
217
218
client = Client(
219
dsn='https://your-dsn@sentry.io/project-id',
220
processors=[CustomSanitizeProcessor]
221
)
222
```
223
224
### Pattern-Based Sanitization
225
226
```python
227
from raven import Client
228
from raven.processors import SanitizeKeysProcessor
229
import re
230
231
class RegexSanitizeProcessor(SanitizeKeysProcessor):
232
def __init__(self, client):
233
super().__init__(client)
234
self.sanitize_patterns = [
235
re.compile(r'.*password.*', re.IGNORECASE),
236
re.compile(r'.*secret.*', re.IGNORECASE),
237
re.compile(r'.*token.*', re.IGNORECASE),
238
re.compile(r'.*key.*', re.IGNORECASE),
239
re.compile(r'.*auth.*', re.IGNORECASE),
240
]
241
242
def sanitize(self, key, value):
243
if any(pattern.match(key) for pattern in self.sanitize_patterns):
244
return self.MASK
245
return value
246
247
client = Client(
248
dsn='https://your-dsn@sentry.io/project-id',
249
processors=[RegexSanitizeProcessor]
250
)
251
```
252
253
### Custom Data Processor
254
255
```python
256
from raven.processors import Processor
257
258
class EmailSanitizeProcessor(Processor):
259
def process(self, data, **kwargs):
260
def sanitize_emails(obj):
261
if isinstance(obj, dict):
262
return {
263
key: sanitize_emails(value)
264
for key, value in obj.items()
265
}
266
elif isinstance(obj, list):
267
return [sanitize_emails(item) for item in obj]
268
elif isinstance(obj, str) and '@' in obj:
269
# Simple email detection and masking
270
if obj.count('@') == 1 and '.' in obj.split('@')[1]:
271
user, domain = obj.split('@')
272
return f"{user[0]}***@{domain}"
273
return obj
274
275
return sanitize_emails(data)
276
277
class PIISanitizeProcessor(Processor):
278
def process(self, data, **kwargs):
279
import re
280
281
def sanitize_pii(obj):
282
if isinstance(obj, dict):
283
return {
284
key: sanitize_pii(value)
285
for key, value in obj.items()
286
}
287
elif isinstance(obj, list):
288
return [sanitize_pii(item) for item in obj]
289
elif isinstance(obj, str):
290
# Sanitize SSN pattern (XXX-XX-XXXX)
291
obj = re.sub(r'\d{3}-\d{2}-\d{4}', 'XXX-XX-XXXX', obj)
292
# Sanitize credit card pattern
293
obj = re.sub(r'\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}',
294
'XXXX-XXXX-XXXX-XXXX', obj)
295
# Sanitize phone numbers
296
obj = re.sub(r'\(\d{3}\)\s?\d{3}-\d{4}', '(XXX) XXX-XXXX', obj)
297
return obj
298
299
return sanitize_pii(data)
300
301
client = Client(
302
dsn='https://your-dsn@sentry.io/project-id',
303
processors=[
304
EmailSanitizeProcessor,
305
PIISanitizeProcessor,
306
'raven.processors.SanitizePasswordsProcessor'
307
]
308
)
309
```
310
311
### Environment-Specific Processing
312
313
```python
314
import os
315
from raven import Client
316
from raven.processors import (
317
SanitizePasswordsProcessor,
318
RemovePostDataProcessor,
319
RemoveStackLocalsProcessor
320
)
321
322
def get_processors():
323
processors = [SanitizePasswordsProcessor]
324
325
if os.getenv('ENVIRONMENT') == 'production':
326
# More aggressive sanitization in production
327
processors.extend([
328
RemovePostDataProcessor,
329
RemoveStackLocalsProcessor
330
])
331
332
return processors
333
334
client = Client(
335
dsn='https://your-dsn@sentry.io/project-id',
336
processors=get_processors()
337
)
338
```
339
340
### Custom Serializer Registration
341
342
```python
343
from raven.utils.serializer import register, transform
344
from decimal import Decimal
345
import datetime
346
347
# Custom serializers for non-JSON types
348
def serialize_decimal(obj):
349
return float(obj)
350
351
def serialize_datetime(obj):
352
return obj.isoformat()
353
354
def serialize_custom_class(obj):
355
return {
356
'type': obj.__class__.__name__,
357
'value': str(obj),
358
'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}
359
}
360
361
# Register custom serializers
362
register(Decimal, serialize_decimal)
363
register(datetime.datetime, serialize_datetime)
364
register(MyCustomClass, serialize_custom_class)
365
366
# Now these types will be properly serialized
367
data = {
368
'price': Decimal('19.99'),
369
'timestamp': datetime.datetime.now(),
370
'custom_obj': MyCustomClass()
371
}
372
373
client.extra_context({'data': data})
374
client.captureMessage('Custom data types')
375
```
376
377
### Processor Performance Optimization
378
379
```python
380
from raven.processors import Processor
381
import time
382
383
class PerformanceTrackingProcessor(Processor):
384
def __init__(self, client):
385
super().__init__(client)
386
self.processing_times = []
387
388
def process(self, data, **kwargs):
389
start_time = time.time()
390
391
# Process data here
392
processed_data = self._process_internal(data)
393
394
processing_time = time.time() - start_time
395
self.processing_times.append(processing_time)
396
397
# Log slow processing
398
if processing_time > 0.1: # 100ms threshold
399
print(f"Slow data processing: {processing_time:.3f}s")
400
401
return processed_data
402
403
def _process_internal(self, data):
404
# Your actual processing logic
405
return data
406
407
class ConditionalProcessor(Processor):
408
def process(self, data, **kwargs):
409
# Skip processing for certain event types
410
if data.get('logger') == 'performance':
411
return data
412
413
# Skip for low-priority events
414
if data.get('level') == 'debug':
415
return data
416
417
return self._sanitize_data(data)
418
```